├── dx ├── .gitignore ├── testdata │ ├── ingest │ │ ├── 0 │ │ └── 1 │ ├── spec │ │ └── spec.toml │ └── query │ │ └── 0 ├── query_test.go ├── compare_test.go ├── cmd_test.go ├── ingest.go ├── README.md ├── main.go └── compare.go ├── imagine ├── .gitignore ├── samples │ ├── long-tail-import.toml │ ├── small-mutex-import.toml │ ├── small-mutex.toml │ ├── parallel-import.toml │ ├── low-cardinality.toml │ ├── games.toml │ ├── long-tail.toml │ ├── age.toml │ ├── parallel.toml │ ├── sports.toml │ ├── time.toml │ ├── README.md │ ├── bsi.toml │ └── students.toml ├── sample_fast.toml ├── sample.toml ├── enums_stamptype.go ├── enums_densitytype.go ├── enums_cachetype.go ├── enums_fieldtype.go ├── enums_valueorder.go ├── enums_timequantum.go ├── enums_verifytype.go ├── enums_dimensionorder.go ├── sample.md ├── generators_test.go └── README.md ├── version.go ├── .gitignore ├── cmd ├── imagine │ └── main.go ├── dx │ └── main.go └── pi │ ├── query.go │ ├── slicewidth.go │ ├── basic_query.go │ ├── diagonal.go │ ├── range_query.go │ ├── random_query.go │ ├── replay.go │ ├── tps.go │ ├── import.go │ ├── import_range.go │ ├── random_set.go │ ├── bench.go │ ├── zipf.go │ └── main.go ├── apophenia ├── weighted_test.go ├── int128_test.go ├── zipf_test.go ├── weighted.go ├── permute_test.go ├── README.md ├── zipf.go ├── apophenia.go ├── int128.go └── permute.go ├── bench ├── query.go ├── doc.go ├── slicewidth.go ├── diagonal.go ├── range_query.go ├── random_query.go ├── bench_test.go ├── basic_query.go ├── random_set.go ├── import.go ├── import_range.go ├── zipf.go ├── tps.go └── bench.go ├── README.md ├── LICENSE ├── go.mod └── Makefile /dx/.gitignore: -------------------------------------------------------------------------------- 1 | dx 2 | -------------------------------------------------------------------------------- /imagine/.gitignore: -------------------------------------------------------------------------------- 1 | /imagine 2 | -------------------------------------------------------------------------------- /dx/testdata/ingest/0: -------------------------------------------------------------------------------- 1 | {"type":"ingest","time":"6.918462ms","threadcount":1} -------------------------------------------------------------------------------- /dx/testdata/ingest/1: -------------------------------------------------------------------------------- 1 | {"type":"ingest","time":"6.481175ms","threadcount":1} -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | package tools 2 | 3 | var Version = "v0.0.0" 4 | var BuildTime = "not set" 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor/ 2 | /.DS_Store 3 | 4 | /influx/etc/.DS_Store 5 | 6 | /influx/.DS_Store 7 | 8 | /influx/data/.DS_Store 9 | -------------------------------------------------------------------------------- /imagine/samples/long-tail-import.toml: -------------------------------------------------------------------------------- 1 | version = "1.0" 2 | [[workloads]] 3 | name = "initial import" 4 | tasks = [ 5 | { index = "users", field = "long-tail" } 6 | ] 7 | -------------------------------------------------------------------------------- /cmd/imagine/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/pilosa/tools/imagine" 5 | ) 6 | 7 | func main() { 8 | imagine.NewConfig().Execute() 9 | } 10 | -------------------------------------------------------------------------------- /imagine/samples/small-mutex-import.toml: -------------------------------------------------------------------------------- 1 | version = "1.0" 2 | [[workloads]] 3 | name = "initial import" 4 | tasks = [ 5 | { index = "users", field = "small-mutex", seed = 1 } 6 | ] 7 | -------------------------------------------------------------------------------- /imagine/samples/small-mutex.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.users] 4 | columns = 1000000 5 | fields = [ 6 | { name = "small-mutex", type = "mutex", max = 3, density = 0.99, }, 7 | ] 8 | -------------------------------------------------------------------------------- /imagine/samples/parallel-import.toml: -------------------------------------------------------------------------------- 1 | version = "1.0" 2 | [[workloads]] 3 | name = "initial import" 4 | tasks = [ 5 | { index = "users", field = "long-tail" }, 6 | { index = "users", field = "small-mutex", seed = 1 }, 7 | ] 8 | -------------------------------------------------------------------------------- /imagine/samples/low-cardinality.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.users] 4 | columns = 100000 5 | fields = [ 6 | { name = "flags", type = "set", max = 200, density = 0.99, valueRule = "zipf", zipfV = 1.0, zipfS = 4.0, }, 7 | ] 8 | -------------------------------------------------------------------------------- /cmd/dx/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/pilosa/tools/dx" 8 | ) 9 | 10 | func main() { 11 | if err := dx.NewRootCmd().Execute(); err != nil { 12 | fmt.Printf("%+v", err) 13 | os.Exit(1) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /dx/testdata/spec/spec.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.index] 4 | columns = 20 5 | fields = [ 6 | { name = "field", type = "set", min = 0, max = 5, chance = 0.1, density = 1.0, }, 7 | ] 8 | [[workloads]] 9 | name = "sample" 10 | threadCount = 1 11 | tasks = [ 12 | { index = "index", field = "field", columnOrder = "stride", stride = 3 }, 13 | ] -------------------------------------------------------------------------------- /imagine/samples/games.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.players] 4 | columns = 5000000000 5 | fields = [ 6 | { name = "timey", type = "set", max = 10, density = 0.03, }, 7 | ] 8 | [[workloads]] 9 | name = "ingest" 10 | threadCount = 1 11 | tasks = [ 12 | { index = "players", field = "timey", columnOrder = "permute", columns = 500 }, 13 | ] 14 | -------------------------------------------------------------------------------- /imagine/samples/long-tail.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.users] 4 | columns = 10000 5 | fields = [ 6 | { name = "long-tail", type = "set", max = 1000, density = 0.00001, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, }, 7 | { name = "long-tail", type = "set", chance = 0.01, density = 0.001, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, }, 8 | ] 9 | -------------------------------------------------------------------------------- /imagine/sample_fast.toml: -------------------------------------------------------------------------------- 1 | version = "1.0" 2 | [indexes.users] 3 | columns = 1000000000 4 | fields = [ 5 | {name = "numbers", type = "set", min=0, max=10000, zipfA=1.0, fastSparse = true, density = 0.1 }, 6 | ] 7 | [[workloads]] 8 | name = "sample" 9 | threadCount = 1 10 | tasks = [ 11 | { index = "users", field = "numbers", columnOrder = "stride", stride = 3, dimensionOrder="row" }, 12 | ] 13 | -------------------------------------------------------------------------------- /imagine/samples/age.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.students_ts] 4 | columns = 10000000 5 | fields = [ 6 | { name = "age", type = "int", max = 100, min=14, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 }, 7 | ] 8 | [[workloads]] 9 | name = "ingest" 10 | threadCount = 6 11 | batchSize = 1048576 12 | useRoaring = true 13 | tasks = [{ index = "students_ts", field = "age", seed = 1 }] 14 | -------------------------------------------------------------------------------- /imagine/samples/parallel.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.users] 4 | columns = 1000000 5 | fields = [ 6 | { name = "long-tail", type = "set", max = 100, density = 0.001, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, }, 7 | { name = "long-tail", type = "set", chance = 0.01, density = 0.1, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, }, 8 | { name = "small-mutex", type = "mutex", max = 3, density = 0.99, }, 9 | ] 10 | -------------------------------------------------------------------------------- /imagine/samples/sports.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.users] 4 | columns = 100000000 5 | fields = [ 6 | { name = "timey", type = "time", max = 100, density = 0.10, valueRule = "zipf", quantum = "YMDH" }, 7 | ] 8 | [[workloads]] 9 | name = "ingest" 10 | threadCount = 4 11 | batchSize = 1048576 12 | useRoaring = true 13 | tasks = [ 14 | { index = "users", field = "timey", stamp = "increasing", stampRange = "7560h", columns = 100000000 }, 15 | ] 16 | -------------------------------------------------------------------------------- /imagine/samples/time.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.testidx] 4 | columns = 10000000 5 | fields = [ 6 | { name = "timestamp", type = "time", max=1, density = 1.0, valueRule="zipf", quantum="YMDH" }, 7 | ] 8 | [[workloads]] 9 | name = "ingest" 10 | threadCount = 6 11 | batchSize = 1048576 12 | useRoaring = true 13 | tasks = [ 14 | { index = "testidx", field = "timestamp", stamp = "increasing", stampStart = "2019-01-01T00:00:00Z", stampRange = "240h" }, 15 | ] 16 | -------------------------------------------------------------------------------- /imagine/sample.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.users] 4 | columns = 100 5 | fields = [ 6 | # age = { type = "int", min = 10, max = 120 } 7 | # { name = "income", type = "int", min = 0, max = 640000, density = 1.0 }, 8 | { name = "numbers", type = "set", max = 2, density = 0.01, }, 9 | { name = "numbers", type = "set", chance = 0.05, density = 1.0, }, 10 | ] 11 | [[workloads]] 12 | name = "sample" 13 | threadCount = 1 14 | tasks = [ 15 | # { index = "users", field = "income", }, 16 | { index = "users", field = "numbers", columnOrder = "stride", stride = 3 }, 17 | ] 18 | # tasks = [ 19 | # { index = "users", field = "income", columnOrder = "permute" }, 20 | # ] 21 | -------------------------------------------------------------------------------- /apophenia/weighted_test.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func Benchmark_WeightedDistribution(b *testing.B) { 9 | src := NewSequence(0) 10 | w, err := NewWeighted(src) 11 | if err != nil { 12 | b.Fatalf("couldn't make weighted: %v", err) 13 | } 14 | scales := []uint64{3, 6, 12, 18, 24, 63} 15 | for _, scale := range scales { 16 | off := OffsetFor(SequenceWeighted, 0, 0, 0) 17 | scaled := uint64(1 << scale) 18 | b.Run(fmt.Sprintf("Scale%d", scale), func(b *testing.B) { 19 | for i := 0; i < b.N; i++ { 20 | w.Bits(off, 1, scaled) 21 | w.Bits(off, scaled/2, scaled) 22 | w.Bits(off, scaled-1, scaled) 23 | } 24 | }) 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /imagine/samples/README.md: -------------------------------------------------------------------------------- 1 | I'd like to see something like each of the following 6 cases with a single in 2 | column order import followed by random updates. 3 | 4 | - high cardinality (10 million?), long tail of values with very few bits set. 5 | Some columns have only a few (or zero) bits set, some have thousands. 6 | in column order import followed by random updates. 7 | 8 | - medium/low cardinality set field (hundreds). zipfian distribution among 9 | values. 95% of columns have 1 value. 4% have more, 1% have none. (roughish numbers) 10 | 11 | - low cardinality mutex field (3), even distribution, 99% of columns have values. 12 | 13 | - 16 bit int field 99% of columns have a value. zipfian distribution within the 16 bit range 14 | - 32 bit int field 99% of columns have a value. zipfian distribution within the 32 bit range 15 | - 64 bit int field 99% of columns have a value. simulated high precision 16 | timestamp - each update increases slightly from the previous number. 17 | -------------------------------------------------------------------------------- /bench/query.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | "time" 8 | 9 | "github.com/pilosa/go-pilosa" 10 | ) 11 | 12 | type QueryBenchmark struct { 13 | Name string `json:"name"` 14 | Query string `json:"query"` 15 | Index string `json:"index"` 16 | Iterations int `json:"iterations"` 17 | 18 | Logger *log.Logger `json:"-"` 19 | } 20 | 21 | func NewQueryBenchmark() *QueryBenchmark { 22 | return &QueryBenchmark{ 23 | Name: "query", 24 | Logger: log.New(os.Stderr, "", log.LstdFlags), 25 | } 26 | } 27 | 28 | // Run runs the benchmark. 29 | func (b *QueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 30 | result := NewResult() 31 | result.AgentNum = agentNum 32 | result.Configuration = b 33 | 34 | // Initialize schema. 35 | index, _, err := ensureSchema(client, b.Index, "") 36 | if err != nil { 37 | return result, err 38 | } 39 | 40 | for n := 0; n < b.Iterations; n++ { 41 | start := time.Now() 42 | resp, err := client.Query(index.RawQuery(b.Query)) 43 | result.Add(time.Since(start), resp) 44 | if err != nil { 45 | return result, err 46 | } 47 | } 48 | return result, nil 49 | } 50 | -------------------------------------------------------------------------------- /dx/query_test.go: -------------------------------------------------------------------------------- 1 | package dx 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestGenerateRandomRows(t *testing.T) { 8 | tests := []struct{ min, max, numRows int64 }{ 9 | {min: 4, max: 4, numRows: 3}, 10 | {min: 3, max: 9, numRows: 2}, 11 | {min: 5, max: 6, numRows: 4}, 12 | } 13 | for _, f := range tests { 14 | rows, err := generateRandomRows(f.min, f.max, f.numRows) 15 | if err != nil { 16 | t.Fatalf("generating rows for min: %v, max: %v, err: %v", f.min, f.max, err) 17 | } 18 | if int64(len(rows)) != f.numRows { 19 | t.Fatalf("expected %v rows, got %v", f.numRows, rows) 20 | } 21 | for _, rowNum := range rows { 22 | if !(f.min <= rowNum && rowNum <= f.max) { 23 | t.Fatalf("row num %v is not in range [%v, %v]", rowNum, f.min, f.max) 24 | } 25 | } 26 | } 27 | } 28 | 29 | func TestIndexSpec_RandomIndexField(t *testing.T) { 30 | fs := newFieldSpec() 31 | fs["field0"] = pair{min: 12, max: 13} 32 | is := newIndexSpec() 33 | is["index0"] = fs 34 | 35 | indexName, fieldName, err := is.randomIndexField() 36 | if err != nil { 37 | t.Fatalf("unexpected error: %v", err) 38 | } 39 | if indexName != "index0" { 40 | t.Fatalf("expected index name: %v, got %v", "index0", indexName) 41 | } 42 | if fieldName != "field0" { 43 | t.Fatalf("expected field name: %v, got %v", "field0", fieldName) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /imagine/samples/bsi.toml: -------------------------------------------------------------------------------- 1 | # int ingest/update for 16/32/63-bit values. 2 | # things act weird if the actual range exceeds 2^63 3 | densityscale = 2097152 4 | version = "1.0" 5 | [indexes.inttest] 6 | columns = 1000000 7 | fields = [ 8 | { name = "int16", type = "int", max = 65535, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 }, 9 | { name = "int32", type = "int", max = 4294967295, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 }, 10 | { name = "int63", type = "int", max = 9223372036854775807, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 }, 11 | ] 12 | [[workloads]] 13 | name = "Int initial import" 14 | tasks = [{ index = "inttest", field = "int16", seed = 1 }] 15 | 16 | # TODO: move to another workload 17 | #tasks = [{ index = "inttest", field = "int32", seed = 2 }] 18 | 19 | # TODO: move to another workload 20 | #tasks = [{ index = "inttest", field = "int63", seed = 3 }] 21 | 22 | [[workloads]] 23 | name = "Int updates" 24 | tasks = [{ index = "inttest", field = "int16", seed = 4, columns = 1000, columnOrder = "permute" }] 25 | 26 | 27 | # TODO: move to another workload 28 | #tasks = [{ index = "inttest", field = "int32", seed = 5, columns = 1000, columnOrder = "permute" }] 29 | 30 | # TODO: move to another workload 31 | #tasks = [{ index = "inttest", field = "int63", seed = 6, columns = 1000, columnOrder = "permute" }] 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Pilosa Tools 2 | =========================================== 3 | 4 | This repo contains the `pi` tool which can: 5 | 6 | - run a variety of predefined benchmarks (that can be configured in various way) 7 | - run combinations of predefined benchmarks 8 | - run benchmarks from multiple "agents" simultaneously 9 | - store the results of complex combinations of benchmarks running on multiple agents locally. 10 | 11 | The `pi` tool contains several subcommands which are described in more detail below. To get help for pi, or any subcommand of pi (or any subcommand of any subcommand of pi, etc.), just append `--help` at the command line: e.g. `pi --help` or `pi bench --help` or `pi bench import --help`. 12 | 13 | 14 | ## bench 15 | 16 | The bench command has a set of subcommands, one for each available benchmark. All of them take a `--hosts` argument which specifies the Pilosa cluster, and a `--agent-num` argument. The agent num argument is mostly used by `pi spawn` and we discuss it in more detail in that section. 17 | 18 | Example: 19 | 20 | ``` 21 | pi bench import --hosts=one.example.com:10101,two.example.com:10101,three.example.com:10101 --iterations=100000 --max-column-id=10000 --max-row-id=1000 22 | ``` 23 | 24 | The above would import 100,000 random bits into the three node Pilosa cluster specified. All bits would have column ID between 0 and 10,000, and row ID between 0 and 1000. 25 | 26 | -------------------------------------------------------------------------------- /cmd/pi/query.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // NewQueryCommand subcommands 12 | func NewQueryCommand() *cobra.Command { 13 | b := bench.NewQueryBenchmark() 14 | cmd := &cobra.Command{ 15 | Use: "query", 16 | Short: "Runs the given PQL query against pilosa and records the results along with the duration.", 17 | Long: `Runs the given PQL query against pilosa and records the results along with the duration. 18 | Agent num has no effect`, 19 | RunE: func(cmd *cobra.Command, args []string) error { 20 | flags := cmd.Flags() 21 | b.Logger = NewLoggerFromFlags(flags) 22 | client, err := NewClientFromFlags(flags) 23 | if err != nil { 24 | return err 25 | } 26 | agentNum, err := flags.GetInt("agent-num") 27 | if err != nil { 28 | return err 29 | } 30 | result, err := b.Run(context.Background(), client, agentNum) 31 | if err != nil { 32 | result.Error = err.Error() 33 | } 34 | return PrintResults(cmd, result, os.Stdout) 35 | }, 36 | } 37 | 38 | flags := cmd.Flags() 39 | flags.IntVar(&b.Iterations, "iterations", 1, "Number of times to repeat the query.") 40 | flags.StringVar(&b.Query, "query", "Count(Row(fbench=1))", "PQL query to perform.") 41 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.") 42 | 43 | return cmd 44 | } 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Pilosa Corp. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /cmd/pi/slicewidth.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewSliceWidthCommand() *cobra.Command { 12 | b := bench.NewSliceWidthBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "slice-width", 15 | Short: "Imports a given density of data uniformly over a configurable number of slices.", 16 | Long: `Imports a given density of data uniformly over a configurable number of slices based on bit density and slice count`, 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | flags := cmd.Flags() 19 | b.Logger = NewLoggerFromFlags(flags) 20 | client, err := NewClientFromFlags(flags) 21 | if err != nil { 22 | return err 23 | } 24 | agentNum, err := flags.GetInt("agent-num") 25 | if err != nil { 26 | return err 27 | } 28 | result, err := b.Run(context.Background(), client, agentNum) 29 | if err != nil { 30 | result.Error = err.Error() 31 | } 32 | return PrintResults(cmd, result, os.Stdout) 33 | }, 34 | } 35 | 36 | flags := cmd.Flags() 37 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.") 38 | flags.StringVar(&b.Field, "field", defaultField, "Field to import into.") 39 | flags.Float64Var(&b.BitDensity, "bit-density", 0.1, "data density.") 40 | flags.Int64Var(&b.SliceWidth, "slice-width", 1048576, "slice width, default to 2^20") 41 | flags.Int64Var(&b.SliceCount, "slice-count", 1, "slice count") 42 | 43 | return cmd 44 | } 45 | -------------------------------------------------------------------------------- /cmd/pi/basic_query.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewBasicQueryCommand() *cobra.Command { 12 | b := bench.NewBasicQueryBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "basic-query", 15 | Short: "Runs the given PQL query against pilosa multiple times with different arguments.", 16 | Long: `Runs the given PQL query against pilosa multiple times with different arguments. 17 | 18 | Agent num has no effect.`, 19 | RunE: func(cmd *cobra.Command, args []string) error { 20 | flags := cmd.Flags() 21 | b.Logger = NewLoggerFromFlags(flags) 22 | client, err := NewClientFromFlags(flags) 23 | if err != nil { 24 | return err 25 | } 26 | agentNum, err := flags.GetInt("agent-num") 27 | if err != nil { 28 | return err 29 | } 30 | result, err := b.Run(context.Background(), client, agentNum) 31 | if err != nil { 32 | result.Error = err.Error() 33 | } 34 | return PrintResults(cmd, result, os.Stdout) 35 | }, 36 | } 37 | 38 | flags := cmd.Flags() 39 | flags.IntVar(&b.Iterations, "iterations", 1, "Number of queries to make.") 40 | flags.IntVar(&b.NumArgs, "num-args", 2, "Number of rows to put in each query (i.e. number of rows to intersect)") 41 | flags.StringVar(&b.Query, "query", "Intersect", "query to perform (Intersect, Union, Difference, Xor)") 42 | flags.StringVar(&b.Field, "field", defaultField, "Field to query.") 43 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.") 44 | 45 | return cmd 46 | } 47 | -------------------------------------------------------------------------------- /bench/doc.go: -------------------------------------------------------------------------------- 1 | // bench contains benchmarks and common utilities useful to benchmarks 2 | // 3 | // In order to write new benchmarks, one must satisfy the Benchmark interface in 4 | // bench.go. In order to use the benchmark from pi, it needs a new file under 5 | // tools/cmd which defines a cobra.Cmd - look at an existing benchmark's file to 6 | // see what needs to be done. 7 | // 8 | // When writing a new benchmark, there are a few things to keep in mind other 9 | // than just implementing the interface: 10 | // 11 | // 1. The benchmark should modify its own configuration in its Init method based 12 | // on the agentNum it is given. How it modifies is specific to the benchmark, 13 | // but the idea is that it should make sense to call the benchmark with the same 14 | // configuration, but multiple different agent numbers, and it should do useful 15 | // work each time (i.e. not just setting the same bits, or running the same 16 | // queries). 17 | // 18 | // 2. The Init method should do everything that needs to be done to get the 19 | // benchmark to a runnable state - all code in run should be the stuff that we 20 | // actually want to time. 21 | // 22 | // 3. The Run method does not need to report the total runtime - that is collected 23 | // by calling code. 24 | // 25 | // Files: 26 | // 27 | // 1. client.go contains pilosa client code which is shared by many benchmarks 28 | // 29 | // 2. errgroup.go contains the ErrGroup implementation copied from golang.org/x/ 30 | // so as not to pull in a bunch of useless deps. 31 | // 32 | // 3. stats.go contains useful code for gathering stats about a series of timed 33 | // operations. 34 | package bench 35 | -------------------------------------------------------------------------------- /bench/slicewidth.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | 8 | "github.com/pilosa/go-pilosa" 9 | ) 10 | 11 | // SliceWidthBenchmark helps importing data based on slice-width and data density. 12 | // a single slice on query time. 13 | type SliceWidthBenchmark struct { 14 | Name string `json:"name"` 15 | Index string `json:"index"` 16 | Field string `json:"field"` 17 | BitDensity float64 `json:"bit-density"` 18 | SliceWidth int64 `json:"slice-width"` 19 | SliceCount int64 `json:"slice-count"` 20 | 21 | Logger *log.Logger `json:"-"` 22 | } 23 | 24 | // NewSliceWidthBenchmark creates slice width benchmark. 25 | func NewSliceWidthBenchmark() *SliceWidthBenchmark { 26 | return &SliceWidthBenchmark{ 27 | Name: "slice-width", 28 | Logger: log.New(os.Stderr, "", log.LstdFlags), 29 | } 30 | } 31 | 32 | // Run runs the benchmark to import data. 33 | func (b *SliceWidthBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 34 | numColumns := b.SliceWidth * b.SliceCount 35 | numRows := int64(1000) 36 | 37 | importBenchmark := NewImportBenchmark() 38 | importBenchmark.MaxRowID = numRows 39 | importBenchmark.MinColumnID = 0 40 | importBenchmark.MaxColumnID = numColumns 41 | importBenchmark.Iterations = int64(float64(numColumns)*b.BitDensity) * numRows 42 | importBenchmark.Index = b.Index 43 | importBenchmark.Field = b.Field 44 | importBenchmark.Distribution = "uniform" 45 | importBenchmark.BufferSize = 1000000 46 | 47 | result, err := importBenchmark.Run(ctx, client, agentNum) 48 | result.Configuration = b 49 | return result, err 50 | } 51 | -------------------------------------------------------------------------------- /bench/diagonal.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | "time" 8 | 9 | "github.com/pilosa/go-pilosa" 10 | ) 11 | 12 | // DiagonalSetBitsBenchmark sets bits with increasing column id and row id. 13 | type DiagonalSetBitsBenchmark struct { 14 | Name string `json:"name"` 15 | MinRowID int `json:"min-row-id"` 16 | MinColumnID int `json:"min-column-id"` 17 | Iterations int `json:"iterations"` 18 | Index string `json:"index"` 19 | Field string `json:"field"` 20 | 21 | Logger *log.Logger `json:"-"` 22 | } 23 | 24 | // NewDiagonalSetBitsBenchmark returns a new instance of DiagonalSetBitsBenchmark. 25 | func NewDiagonalSetBitsBenchmark() *DiagonalSetBitsBenchmark { 26 | return &DiagonalSetBitsBenchmark{ 27 | Name: "diagonal-set-bits", 28 | Logger: log.New(os.Stderr, "", log.LstdFlags), 29 | } 30 | } 31 | 32 | // Run runs the benchmark. 33 | func (b *DiagonalSetBitsBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 34 | result := NewResult() 35 | result.AgentNum = agentNum 36 | result.Configuration = b 37 | 38 | // Initialize schema. 39 | _, field, err := ensureSchema(client, b.Index, b.Field) 40 | if err != nil { 41 | return result, err 42 | } 43 | 44 | minRowID := b.MinRowID + (agentNum * b.Iterations) 45 | minColumnID := b.MinColumnID + (agentNum * b.Iterations) 46 | 47 | for n := 0; n < b.Iterations; n++ { 48 | start := time.Now() 49 | _, err := client.Query(field.Set(minRowID+n, minColumnID+n)) 50 | result.Add(time.Since(start), nil) 51 | if err != nil { 52 | return result, err 53 | } 54 | } 55 | return result, nil 56 | } 57 | -------------------------------------------------------------------------------- /imagine/samples/students.toml: -------------------------------------------------------------------------------- 1 | densityscale = 2097152 2 | version = "1.0" 3 | [indexes.students_ts] 4 | columns = 10000 5 | fields = [ 6 | { name = "gender", type = "mutex", max = 3, density = 0.9, valueRule = "linear", cache = "lru" }, 7 | { name = "school", type = "set", max = 400, density = 0.10, valueRule = "zipf" , cache = "lru" }, 8 | { name = "timestamp", type = "time", max=1, density = 1.0, valueRule="zipf", quantum="YMDH" }, 9 | #{ name = "client_mac", type="set", max=10000000, density=0.001, valueRule="zipf"}, 10 | { name = "zone", type="set", max=300, density=0.1, valueRule="zipf", cache = "lru" }, 11 | { name = "age", type="int", max=100, min=14, density=0.99, valueRule="zipf", zipfV = 3.0, zipfS = 1.1 }, 12 | { name = "on_campus", type="mutex", max=2, density=0.9, valueRule="zipf", cache = "lru", zipfS = 1.1, zipfV = 3.0 }, 13 | { name = "athlete", type="mutex", max=2, density=1.0, valueRule="linear", cache = "lru" }, 14 | { name = "gpa", type="int", max=400, min=0, density=0.99, valueRule="zipf", zipfV = 3.0, zipfS = 1.1 }, 15 | ] 16 | [[workloads]] 17 | name = "ingest" 18 | threadCount = 6 19 | batchSize = 1048576 20 | useRoaring = true 21 | tasks = [ 22 | { index = "students_ts", field = "gender"}, 23 | { index = "students_ts", field = "school"}, 24 | { index = "students_ts", field = "timestamp", stamp = "increasing", stampStart = "2019-01-01T00:00:00Z", stampRange = "240h" }, 25 | #{ index = "students_ts", field = "client_mac"}, 26 | { index = "students_ts", field = "zone"}, 27 | { index = "students_ts", field = "age", seed = 1}, 28 | { index = "students_ts", field = "on_campus"}, 29 | { index = "students_ts", field = "gpa", seed = 2}, 30 | ] 31 | -------------------------------------------------------------------------------- /cmd/pi/diagonal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewDiagonalSetBitsCommand() *cobra.Command { 12 | b := bench.NewDiagonalSetBitsBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "diagonal-set-bits", 15 | Short: "Sets bits with increasing column id and row id.", 16 | Long: `Sets bits with increasing column id and row id. 17 | 18 | Agent num offsets both the min column id and min row id by the number of 19 | iterations, so that only bits on the main diagonal are set, and agents don't 20 | overlap at all.`, 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | flags := cmd.Flags() 23 | b.Logger = NewLoggerFromFlags(flags) 24 | client, err := NewClientFromFlags(flags) 25 | if err != nil { 26 | return err 27 | } 28 | agentNum, err := flags.GetInt("agent-num") 29 | if err != nil { 30 | return err 31 | } 32 | result, err := b.Run(context.Background(), client, agentNum) 33 | if err != nil { 34 | result.Error = err.Error() 35 | } 36 | return PrintResults(cmd, result, os.Stdout) 37 | }, 38 | } 39 | 40 | flags := cmd.Flags() 41 | flags.IntVar(&b.MinRowID, "min-row-id", 0, "Rows being set will all be greater than this.") 42 | flags.IntVar(&b.MinColumnID, "min-column-id", 0, "Columns being set will all be greater than this.") 43 | flags.IntVar(&b.Iterations, "iterations", 100, "Number of bits to set.") 44 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index in which to set bits.") 45 | flags.StringVar(&b.Field, "field", defaultField, "Pilosa field in which to set bits.") 46 | 47 | return cmd 48 | } 49 | -------------------------------------------------------------------------------- /cmd/pi/range_query.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewRangeQueryCommand() *cobra.Command { 12 | b := bench.NewRangeQueryBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "range-query", 15 | Short: "Constructs and performs range queries.", 16 | Long: `Constructs and performs range queries. 17 | Agent num modifies random seed.`, 18 | RunE: func(cmd *cobra.Command, args []string) error { 19 | flags := cmd.Flags() 20 | b.Logger = NewLoggerFromFlags(flags) 21 | client, err := NewClientFromFlags(flags) 22 | if err != nil { 23 | return err 24 | } 25 | agentNum, err := flags.GetInt("agent-num") 26 | if err != nil { 27 | return err 28 | } 29 | result, err := b.Run(context.Background(), client, agentNum) 30 | if err != nil { 31 | result.Error = err.Error() 32 | } 33 | return PrintResults(cmd, result, os.Stdout) 34 | }, 35 | } 36 | 37 | flags := cmd.Flags() 38 | flags.IntVar(&b.MaxDepth, "max-depth", 2, "Maximum nesting of queries.") 39 | flags.IntVar(&b.MaxArgs, "max-args", 2, "Maximum number of arguments per query.") 40 | flags.Int64Var(&b.MinRange, "min-range", 0, "Minimum range to include in queries.") 41 | flags.Int64Var(&b.MaxRange, "max-range", 100, "Maximum range to include in queries.") 42 | flags.Int64Var(&b.Seed, "seed", 1, "random seed") 43 | flags.IntVar(&b.Iterations, "iterations", 100, "Number queries to perform.") 44 | flags.StringVar(&b.Field, "field", defaultField, "Field to query.") 45 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.") 46 | flags.StringVar(&b.QueryType, "type", "sum", "Query type for range, default to sum") 47 | return cmd 48 | } 49 | -------------------------------------------------------------------------------- /cmd/pi/random_query.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewRandomQueryCommand() *cobra.Command { 12 | b := bench.NewRandomQueryBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "random-query", 15 | Short: "Constructs and performs random queries.", 16 | Long: `Constructs and performs random queries. 17 | Agent num modifies random seed.`, 18 | RunE: func(cmd *cobra.Command, args []string) error { 19 | flags := cmd.Flags() 20 | b.Logger = NewLoggerFromFlags(flags) 21 | client, err := NewClientFromFlags(flags) 22 | if err != nil { 23 | return err 24 | } 25 | agentNum, err := flags.GetInt("agent-num") 26 | if err != nil { 27 | return err 28 | } 29 | result, err := b.Run(context.Background(), client, agentNum) 30 | if err != nil { 31 | result.Error = err.Error() 32 | } 33 | return PrintResults(cmd, result, os.Stdout) 34 | }, 35 | } 36 | 37 | flags := cmd.Flags() 38 | flags.IntVar(&b.MaxDepth, "max-depth", 4, "Maximum nesting of queries.") 39 | flags.IntVar(&b.MaxArgs, "max-args", 4, "Maximum number of arguments per query.") 40 | flags.IntVar(&b.MaxN, "max-n", 100, "Maximum value of N for TopN queries.") 41 | flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Minimum row id to include in queries.") 42 | flags.Int64Var(&b.MaxRowID, "max-row-id", 100000, "Maximum row id to include in queries.") 43 | flags.Int64Var(&b.Seed, "seed", 1, "random seed") 44 | flags.IntVar(&b.Iterations, "iterations", 100, "Number queries to perform.") 45 | flags.StringVar(&b.Field, "field", defaultField, "Field to query.") 46 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.") 47 | 48 | return cmd 49 | } 50 | -------------------------------------------------------------------------------- /bench/range_query.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | "time" 8 | 9 | "github.com/pilosa/go-pilosa" 10 | ) 11 | 12 | // RangeQueryBenchmark runs Range query randomly. 13 | type RangeQueryBenchmark struct { 14 | Name string `json:"name"` 15 | MaxDepth int `json:"max-depth"` 16 | MaxArgs int `json:"max-args"` 17 | MaxN int `json:"max-n"` 18 | MinRange int64 `json:"min-range"` 19 | MaxRange int64 `json:"max-range"` 20 | Iterations int `json:"iterations"` 21 | Seed int64 `json:"seed"` 22 | Frame string `json:"frame"` 23 | Index string `json:"index"` 24 | Field string `json:"field"` 25 | QueryType string `json:"type"` 26 | 27 | Logger *log.Logger `json:"-"` 28 | } 29 | 30 | // NewRangeQueryBenchmark returns a new instance of RangeQueryBenchmark. 31 | func NewRangeQueryBenchmark() *RangeQueryBenchmark { 32 | return &RangeQueryBenchmark{ 33 | Name: "range-query", 34 | Logger: log.New(os.Stderr, "", log.LstdFlags), 35 | } 36 | } 37 | 38 | // Run runs the benchmark. 39 | func (b *RangeQueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 40 | result := NewResult() 41 | result.AgentNum = agentNum 42 | result.Configuration = b 43 | 44 | // Initialize schema. 45 | index, field, err := ensureSchema(client, b.Index, b.Field) 46 | if err != nil { 47 | return result, err 48 | } 49 | 50 | g := NewQueryGenerator(index, field, b.Seed) 51 | for n := 0; n < b.Iterations; n++ { 52 | start := time.Now() 53 | _, err := client.Query(g.RandomRangeQuery(b.MaxDepth, b.MaxArgs, uint64(b.MinRange), uint64(b.MaxRange))) 54 | result.Add(time.Since(start), nil) 55 | if err != nil { 56 | return result, err 57 | } 58 | } 59 | return result, nil 60 | } 61 | -------------------------------------------------------------------------------- /bench/random_query.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | "time" 8 | 9 | "github.com/pilosa/go-pilosa" 10 | ) 11 | 12 | var _ Benchmark = (*BasicQueryBenchmark)(nil) 13 | 14 | // RandomQueryBenchmark queries randomly and deterministically based on a seed. 15 | type RandomQueryBenchmark struct { 16 | Name string `json:"name"` 17 | MaxDepth int `json:"max-depth"` 18 | MaxArgs int `json:"max-args"` 19 | MaxN int `json:"max-n"` 20 | MinRowID int64 `json:"min-row-id"` 21 | MaxRowID int64 `json:"max-row-id"` 22 | Iterations int `json:"iterations"` 23 | Seed int64 `json:"seed"` 24 | Index string `json:"index"` 25 | Field string `json:"field"` 26 | 27 | Logger *log.Logger `json:"-"` 28 | } 29 | 30 | // NewRandomQueryBenchmark returns a new instance of RandomQueryBenchmark. 31 | func NewRandomQueryBenchmark() *RandomQueryBenchmark { 32 | return &RandomQueryBenchmark{ 33 | Name: "random-query", 34 | Logger: log.New(os.Stderr, "", log.LstdFlags), 35 | } 36 | } 37 | 38 | // Run runs the RandomQuery benchmark 39 | func (b *RandomQueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 40 | result := NewResult() 41 | result.AgentNum = agentNum 42 | result.Configuration = b 43 | 44 | // Initialize schema. 45 | index, field, err := ensureSchema(client, b.Index, b.Field) 46 | if err != nil { 47 | return result, err 48 | } 49 | 50 | g := NewQueryGenerator(index, field, b.Seed+int64(agentNum)) 51 | for n := 0; n < b.Iterations; n++ { 52 | start := time.Now() 53 | _, err := client.Query(g.Random(b.MaxN, b.MaxDepth, b.MaxArgs, uint64(b.MinRowID), uint64(b.MaxRowID-b.MinRowID))) 54 | result.Add(time.Since(start), nil) 55 | if err != nil { 56 | return result, err 57 | } 58 | } 59 | return result, nil 60 | } 61 | -------------------------------------------------------------------------------- /cmd/pi/replay.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "github.com/jaffee/commandeer/cobrafy" 9 | "github.com/pilosa/go-pilosa" 10 | "github.com/pkg/errors" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | func NewReplayCommand() *cobra.Command { 15 | com, err := cobrafy.Command(newReplayCommand()) 16 | if err != nil { 17 | panic(fmt.Sprintf("Couldn't create cobra command: %v", err)) 18 | } 19 | com.Use = "replay" 20 | com.Short = "Replay recorded Pilosa imports." 21 | com.Long = `Replay recorded Pilosa imports. 22 | 23 | The go-pilosa client contains an option which allows it to record all 24 | imports it runs to a file (or other io.Writer). This tool takes a 25 | Pilosa cluster and a filename containing such recorded data and 26 | imports the data into that cluster. The cluster must already have the 27 | schema set up to support the recorded import data. 28 | ` 29 | 30 | return com 31 | } 32 | 33 | func newReplayCommand() *ReplayCommand { 34 | return &ReplayCommand{ 35 | File: "replay.gopilosa", 36 | Hosts: []string{"localhost:10101"}, 37 | Concurrency: 8, 38 | } 39 | } 40 | 41 | type ReplayCommand struct { 42 | File string `help:"File to read from."` 43 | Hosts []string `help:"Pilosa hosts (comma separated)."` 44 | Concurrency int `help:"Number of goroutines importing data." short:"n"` 45 | } 46 | 47 | func (r *ReplayCommand) Run() error { 48 | client, err := pilosa.NewClient(r.Hosts) 49 | if err != nil { 50 | return errors.Wrap(err, "creating Pilosa client") 51 | } 52 | f, err := os.Open(r.File) 53 | if err != nil { 54 | return errors.Wrap(err, "opening replay file") 55 | } 56 | 57 | start := time.Now() 58 | err = client.ExperimentalReplayImport(f, r.Concurrency) 59 | if err != nil { 60 | return errors.Wrap(err, "") 61 | } 62 | fmt.Printf("Done: %v", time.Since(start)) 63 | return nil 64 | } 65 | -------------------------------------------------------------------------------- /apophenia/int128_test.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func Test_Int128Rotate(t *testing.T) { 8 | cases := []struct { 9 | in Uint128 10 | bits uint64 11 | outL, outR Uint128 12 | }{ 13 | {in: Uint128{Lo: 0x1}, bits: 1, outR: Uint128{Lo: 0x0, Hi: 1 << 63}, outL: Uint128{Lo: 0x2, Hi: 0}}, 14 | {in: Uint128{Lo: 0x11}, bits: 4, outR: Uint128{Lo: 1, Hi: 1 << 60}, outL: Uint128{Lo: 0x110, Hi: 0}}, 15 | {in: Uint128{Lo: 0x11}, bits: 65, outR: Uint128{Lo: 1 << 63, Hi: 8}, outL: Uint128{Lo: 0, Hi: 0x22}}, 16 | } 17 | for _, c := range cases { 18 | u := c.in 19 | u.RotateRight(c.bits) 20 | if u != c.outR { 21 | t.Fatalf("rotate %s right by %d: expected %s, got %s", 22 | c.in, c.bits, c.outR, u) 23 | } 24 | u = c.in 25 | u.RotateLeft(c.bits) 26 | if u != c.outL { 27 | t.Fatalf("rotate %s left by %d: expected %s, got %s", 28 | c.in, c.bits, c.outL, u) 29 | } 30 | } 31 | } 32 | 33 | func Test_Int128Shift(t *testing.T) { 34 | cases := []struct { 35 | in Uint128 36 | bits uint64 37 | outL, outR Uint128 38 | }{ 39 | {in: Uint128{Lo: 0x1}, bits: 1, outR: Uint128{Lo: 0x0, Hi: 0}, outL: Uint128{Lo: 0x2, Hi: 0}}, 40 | {in: Uint128{Lo: 0x11}, bits: 4, outR: Uint128{Lo: 1, Hi: 0}, outL: Uint128{Lo: 0x110, Hi: 0}}, 41 | {in: Uint128{Lo: 0x11, Hi: 0x3}, bits: 65, outR: Uint128{Lo: 1, Hi: 0}, outL: Uint128{Lo: 0, Hi: 0x22}}, 42 | {in: Uint128{Lo: 0, Hi: 0x11}, bits: 68, outR: Uint128{Lo: 1, Hi: 0}, outL: Uint128{Lo: 0, Hi: 0}}, 43 | } 44 | for _, c := range cases { 45 | u := c.in 46 | u.ShiftRight(c.bits) 47 | if u != c.outR { 48 | t.Fatalf("shift %s right by %d: expected %s, got %s", 49 | c.in, c.bits, c.outR, u) 50 | } 51 | u = c.in 52 | u.ShiftLeft(c.bits) 53 | if u != c.outL { 54 | t.Fatalf("shift %s left by %d: expected %s, got %s", 55 | c.in, c.bits, c.outL, u) 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /bench/bench_test.go: -------------------------------------------------------------------------------- 1 | package bench_test 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "os" 8 | "testing" 9 | "time" 10 | 11 | "github.com/pilosa/tools/bench" 12 | ) 13 | 14 | func prettyEncode(data map[string]interface{}) string { 15 | pretty := bench.Prettify(data) 16 | jsonString := new(bytes.Buffer) 17 | enc := json.NewEncoder(jsonString) 18 | enc.SetIndent("", " ") 19 | err := enc.Encode(pretty) 20 | if err != nil { 21 | fmt.Fprintln(os.Stderr, err) 22 | } 23 | 24 | return jsonString.String() 25 | } 26 | 27 | func TestPrettifyString(t *testing.T) { 28 | res := make(map[string]interface{}, 1) 29 | res["0"] = "foobar" 30 | pretty := prettyEncode(res) 31 | 32 | expected := ` 33 | { 34 | "0": "foobar" 35 | } 36 | `[1:] 37 | 38 | if pretty != expected { 39 | t.Fatalf("Pretty string doesn't match") 40 | } 41 | } 42 | 43 | func TestPrettifyInt(t *testing.T) { 44 | res := make(map[string]interface{}, 1) 45 | res["0"] = 234567 46 | pretty := prettyEncode(res) 47 | 48 | expected := ` 49 | { 50 | "0": 234567 51 | } 52 | `[1:] 53 | 54 | if pretty != expected { 55 | t.Fatalf("Pretty int doesn't match") 56 | } 57 | } 58 | 59 | func TestPrettifyDuration(t *testing.T) { 60 | res := make(map[string]interface{}, 1) 61 | res["0"] = time.Duration(234567) 62 | pretty := prettyEncode(res) 63 | 64 | expected := ` 65 | { 66 | "0": "234.567µs" 67 | } 68 | `[1:] 69 | 70 | if pretty != expected { 71 | t.Fatalf("Pretty duration doesn't match") 72 | } 73 | } 74 | 75 | func TestPrettifyDurationSlice(t *testing.T) { 76 | res := make(map[string]interface{}, 1) 77 | res["0"] = []time.Duration{123, 234567, 34567890} 78 | pretty := prettyEncode(res) 79 | 80 | expected := ` 81 | { 82 | "0": [ 83 | "123ns", 84 | "234.567µs", 85 | "34.56789ms" 86 | ] 87 | } 88 | `[1:] 89 | 90 | if pretty != expected { 91 | t.Fatalf("Pretty duration slice doesn't match") 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/pilosa/tools 2 | 3 | go 1.12 4 | 5 | require ( 6 | cloud.google.com/go v0.43.0 // indirect 7 | github.com/BurntSushi/toml v0.3.1 8 | github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect 9 | github.com/go-kit/kit v0.9.0 // indirect 10 | github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 // indirect 11 | github.com/gorilla/handlers v1.4.1 // indirect 12 | github.com/gorilla/mux v1.7.3 // indirect 13 | github.com/grpc-ecosystem/grpc-gateway v1.9.4 // indirect 14 | github.com/jaffee/commandeer v0.1.0 15 | github.com/kr/pty v1.1.8 // indirect 16 | github.com/miekg/dns v1.1.15 // indirect 17 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect 18 | github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6 19 | github.com/pilosa/pilosa v1.3.1 20 | github.com/pkg/errors v0.8.1 21 | github.com/prometheus/procfs v0.0.3 // indirect 22 | github.com/rogpeppe/fastuuid v1.2.0 // indirect 23 | github.com/spf13/cobra v0.0.5 24 | github.com/spf13/pflag v1.0.3 25 | github.com/spf13/viper v1.4.0 26 | github.com/ugorji/go v1.1.7 // indirect 27 | golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 // indirect 28 | golang.org/x/exp v0.0.0-20190718202018-cfdd5522f6f6 // indirect 29 | golang.org/x/image v0.0.0-20190703141733-d6a02ce849c9 // indirect 30 | golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028 // indirect 31 | golang.org/x/net v0.0.0-20190628185345-da137c7871d7 // indirect 32 | golang.org/x/sync v0.0.0-20190423024810-112230192c58 33 | golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 // indirect 34 | golang.org/x/tools v0.0.0-20190719005602-e377ae9d6386 // indirect 35 | google.golang.org/grpc v1.22.0 // indirect 36 | ) 37 | 38 | replace github.com/pilosa/pilosa => github.com/pilosa/pilosa v1.2.1-0.20190715194839-bd00f1bfe2b2 39 | 40 | replace github.com/pilosa/go-pilosa => github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6 41 | -------------------------------------------------------------------------------- /imagine/enums_stamptype.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=stampType -trimprefix=stampType -text -transform=kebab -output enums_stamptype.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _stampTypeName = "noneincreasingrandom" 11 | 12 | var _stampTypeIndex = [...]uint8{0, 4, 14, 20} 13 | 14 | func (i stampType) String() string { 15 | if i < 0 || i >= stampType(len(_stampTypeIndex)-1) { 16 | return fmt.Sprintf("stampType(%d)", i) 17 | } 18 | return _stampTypeName[_stampTypeIndex[i]:_stampTypeIndex[i+1]] 19 | } 20 | 21 | var _stampTypeValues = []stampType{0, 1, 2} 22 | 23 | var _stampTypeNameToValueMap = map[string]stampType{ 24 | _stampTypeName[0:4]: 0, 25 | _stampTypeName[4:14]: 1, 26 | _stampTypeName[14:20]: 2, 27 | } 28 | 29 | // stampTypeString retrieves an enum value from the enum constants string name. 30 | // Throws an error if the param is not part of the enum. 31 | func stampTypeString(s string) (stampType, error) { 32 | if val, ok := _stampTypeNameToValueMap[s]; ok { 33 | return val, nil 34 | } 35 | return 0, fmt.Errorf("%s does not belong to stampType values", s) 36 | } 37 | 38 | // stampTypeValues returns all values of the enum 39 | func stampTypeValues() []stampType { 40 | return _stampTypeValues 41 | } 42 | 43 | // IsAstampType returns "true" if the value is listed in the enum definition. "false" otherwise 44 | func (i stampType) IsAstampType() bool { 45 | for _, v := range _stampTypeValues { 46 | if i == v { 47 | return true 48 | } 49 | } 50 | return false 51 | } 52 | 53 | // MarshalText implements the encoding.TextMarshaler interface for stampType 54 | func (i stampType) MarshalText() ([]byte, error) { 55 | return []byte(i.String()), nil 56 | } 57 | 58 | // UnmarshalText implements the encoding.TextUnmarshaler interface for stampType 59 | func (i *stampType) UnmarshalText(text []byte) error { 60 | var err error 61 | *i, err = stampTypeString(string(text)) 62 | return err 63 | } 64 | -------------------------------------------------------------------------------- /imagine/enums_densitytype.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=densityType -trimprefix=densityType -text -transform=kebab -output enums_densitytype.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _densityTypeName = "linearzipf" 11 | 12 | var _densityTypeIndex = [...]uint8{0, 6, 10} 13 | 14 | func (i densityType) String() string { 15 | if i < 0 || i >= densityType(len(_densityTypeIndex)-1) { 16 | return fmt.Sprintf("densityType(%d)", i) 17 | } 18 | return _densityTypeName[_densityTypeIndex[i]:_densityTypeIndex[i+1]] 19 | } 20 | 21 | var _densityTypeValues = []densityType{0, 1} 22 | 23 | var _densityTypeNameToValueMap = map[string]densityType{ 24 | _densityTypeName[0:6]: 0, 25 | _densityTypeName[6:10]: 1, 26 | } 27 | 28 | // densityTypeString retrieves an enum value from the enum constants string name. 29 | // Throws an error if the param is not part of the enum. 30 | func densityTypeString(s string) (densityType, error) { 31 | if val, ok := _densityTypeNameToValueMap[s]; ok { 32 | return val, nil 33 | } 34 | return 0, fmt.Errorf("%s does not belong to densityType values", s) 35 | } 36 | 37 | // densityTypeValues returns all values of the enum 38 | func densityTypeValues() []densityType { 39 | return _densityTypeValues 40 | } 41 | 42 | // IsAdensityType returns "true" if the value is listed in the enum definition. "false" otherwise 43 | func (i densityType) IsAdensityType() bool { 44 | for _, v := range _densityTypeValues { 45 | if i == v { 46 | return true 47 | } 48 | } 49 | return false 50 | } 51 | 52 | // MarshalText implements the encoding.TextMarshaler interface for densityType 53 | func (i densityType) MarshalText() ([]byte, error) { 54 | return []byte(i.String()), nil 55 | } 56 | 57 | // UnmarshalText implements the encoding.TextUnmarshaler interface for densityType 58 | func (i *densityType) UnmarshalText(text []byte) error { 59 | var err error 60 | *i, err = densityTypeString(string(text)) 61 | return err 62 | } 63 | -------------------------------------------------------------------------------- /imagine/enums_cachetype.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=cacheType -trimprefix=cacheType -text -transform=kebab -output enums_cachetype.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _cacheTypeName = "defaultnonelruranked" 11 | 12 | var _cacheTypeIndex = [...]uint8{0, 7, 11, 14, 20} 13 | 14 | func (i cacheType) String() string { 15 | if i < 0 || i >= cacheType(len(_cacheTypeIndex)-1) { 16 | return fmt.Sprintf("cacheType(%d)", i) 17 | } 18 | return _cacheTypeName[_cacheTypeIndex[i]:_cacheTypeIndex[i+1]] 19 | } 20 | 21 | var _cacheTypeValues = []cacheType{0, 1, 2, 3} 22 | 23 | var _cacheTypeNameToValueMap = map[string]cacheType{ 24 | _cacheTypeName[0:7]: 0, 25 | _cacheTypeName[7:11]: 1, 26 | _cacheTypeName[11:14]: 2, 27 | _cacheTypeName[14:20]: 3, 28 | } 29 | 30 | // cacheTypeString retrieves an enum value from the enum constants string name. 31 | // Throws an error if the param is not part of the enum. 32 | func cacheTypeString(s string) (cacheType, error) { 33 | if val, ok := _cacheTypeNameToValueMap[s]; ok { 34 | return val, nil 35 | } 36 | return 0, fmt.Errorf("%s does not belong to cacheType values", s) 37 | } 38 | 39 | // cacheTypeValues returns all values of the enum 40 | func cacheTypeValues() []cacheType { 41 | return _cacheTypeValues 42 | } 43 | 44 | // IsAcacheType returns "true" if the value is listed in the enum definition. "false" otherwise 45 | func (i cacheType) IsAcacheType() bool { 46 | for _, v := range _cacheTypeValues { 47 | if i == v { 48 | return true 49 | } 50 | } 51 | return false 52 | } 53 | 54 | // MarshalText implements the encoding.TextMarshaler interface for cacheType 55 | func (i cacheType) MarshalText() ([]byte, error) { 56 | return []byte(i.String()), nil 57 | } 58 | 59 | // UnmarshalText implements the encoding.TextUnmarshaler interface for cacheType 60 | func (i *cacheType) UnmarshalText(text []byte) error { 61 | var err error 62 | *i, err = cacheTypeString(string(text)) 63 | return err 64 | } 65 | -------------------------------------------------------------------------------- /cmd/pi/tps.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/jaffee/commandeer/cobrafy" 8 | "github.com/pilosa/tools/bench" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // NewQueryCommand subcommands 13 | func NewTPSCommand() *cobra.Command { 14 | b := bench.NewTPSBenchmark() 15 | com, err := cobrafy.Command(b) 16 | if err != nil { 17 | panic(err) 18 | } 19 | com.Use = b.Name 20 | com.Short = "Run TPS benchmark." 21 | com.Long = `Run TPS benchmark. 22 | 23 | This benchmark spawns goroutines, each of which queries 24 | Pilosa times serially. The idea is to get an 25 | understanding of what kind of query throughput various Pilosa 26 | configurations can handle. 27 | 28 | For this to be useful, you must already have an index in Pilosa with 29 | at least 1 field which has some data in it. I recommend the "imagine" 30 | tool (in this repository) for generating fake data with semi-realistic 31 | characteristics. 32 | 33 | For each query, TPS chooses randomly from the enabled query types 34 | (intersect, union, difference, xor), and then chooses two random 35 | fields, and two random rows within each field to perform the given 36 | operation on. It wraps each query in a Count() to make the result size 37 | consistent. 38 | 39 | Currently, row IDs are always chosen randomly between min and max. If 40 | no index is given, one is chosen at random, and if no fields are 41 | given, all the fields in the index are used. 42 | 43 | ` 44 | 45 | com.RunE = func(cmd *cobra.Command, args []string) error { 46 | flags := cmd.Flags() 47 | b.Logger = NewLoggerFromFlags(flags) 48 | client, err := NewClientFromFlags(flags) 49 | if err != nil { 50 | return err 51 | } 52 | agentNum, err := flags.GetInt("agent-num") 53 | if err != nil { 54 | return err 55 | } 56 | result, err := b.Run(context.Background(), client, agentNum) 57 | if err != nil { 58 | result.Error = err.Error() 59 | } 60 | return PrintResults(cmd, result, os.Stdout) 61 | } 62 | return com 63 | } 64 | -------------------------------------------------------------------------------- /cmd/pi/import.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewImportCommand() *cobra.Command { 12 | b := bench.NewImportBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "import", 15 | Short: "Import random data into Pilosa quickly.", 16 | Long: `import generates random data which can be controlled by command line flags and streams it into Pilosa's /import endpoint. Agent num has no effect`, 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | flags := cmd.Flags() 19 | b.Logger = NewLoggerFromFlags(flags) 20 | client, err := NewClientFromFlags(flags) 21 | if err != nil { 22 | return err 23 | } 24 | agentNum, err := flags.GetInt("agent-num") 25 | if err != nil { 26 | return err 27 | } 28 | result, err := b.Run(context.Background(), client, agentNum) 29 | if err != nil { 30 | result.Error = err.Error() 31 | } 32 | return PrintResults(cmd, result, os.Stdout) 33 | }, 34 | } 35 | 36 | flags := cmd.Flags() 37 | flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Minimum row id of set bits.") 38 | flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Minimum column id of set bits.") 39 | flags.Int64Var(&b.MaxRowID, "max-row-id", 1000, "Maximum row id of set bits.") 40 | flags.Int64Var(&b.MaxColumnID, "max-column-id", 1000, "Maximum column id of set bits.") 41 | flags.Int64Var(&b.Iterations, "iterations", 100000, "Number of bits to set") 42 | flags.Int64Var(&b.Seed, "seed", 0, "Random seed.") 43 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index in which to set bits.") 44 | flags.StringVar(&b.Field, "field", defaultField, "Pilosa field in which to set bits.") 45 | flags.StringVar(&b.Distribution, "distribution", "exponential", "Random distribution for deltas between set bits (exponential or uniform).") 46 | flags.IntVar(&b.BufferSize, "buffer-size", 10000000, "Number of set bits to buffer in importer before POSTing to Pilosa.") 47 | 48 | return cmd 49 | } 50 | -------------------------------------------------------------------------------- /imagine/enums_fieldtype.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=fieldType -trimprefix=fieldType -transform=kebab -text -output enums_fieldtype.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _fieldTypeName = "undefintsetmutextime" 11 | 12 | var _fieldTypeIndex = [...]uint8{0, 5, 8, 11, 16, 20} 13 | 14 | func (i fieldType) String() string { 15 | if i < 0 || i >= fieldType(len(_fieldTypeIndex)-1) { 16 | return fmt.Sprintf("fieldType(%d)", i) 17 | } 18 | return _fieldTypeName[_fieldTypeIndex[i]:_fieldTypeIndex[i+1]] 19 | } 20 | 21 | var _fieldTypeValues = []fieldType{0, 1, 2, 3, 4} 22 | 23 | var _fieldTypeNameToValueMap = map[string]fieldType{ 24 | _fieldTypeName[0:5]: 0, 25 | _fieldTypeName[5:8]: 1, 26 | _fieldTypeName[8:11]: 2, 27 | _fieldTypeName[11:16]: 3, 28 | _fieldTypeName[16:20]: 4, 29 | } 30 | 31 | // fieldTypeString retrieves an enum value from the enum constants string name. 32 | // Throws an error if the param is not part of the enum. 33 | func fieldTypeString(s string) (fieldType, error) { 34 | if val, ok := _fieldTypeNameToValueMap[s]; ok { 35 | return val, nil 36 | } 37 | return 0, fmt.Errorf("%s does not belong to fieldType values", s) 38 | } 39 | 40 | // fieldTypeValues returns all values of the enum 41 | func fieldTypeValues() []fieldType { 42 | return _fieldTypeValues 43 | } 44 | 45 | // IsAfieldType returns "true" if the value is listed in the enum definition. "false" otherwise 46 | func (i fieldType) IsAfieldType() bool { 47 | for _, v := range _fieldTypeValues { 48 | if i == v { 49 | return true 50 | } 51 | } 52 | return false 53 | } 54 | 55 | // MarshalText implements the encoding.TextMarshaler interface for fieldType 56 | func (i fieldType) MarshalText() ([]byte, error) { 57 | return []byte(i.String()), nil 58 | } 59 | 60 | // UnmarshalText implements the encoding.TextUnmarshaler interface for fieldType 61 | func (i *fieldType) UnmarshalText(text []byte) error { 62 | var err error 63 | *i, err = fieldTypeString(string(text)) 64 | return err 65 | } 66 | -------------------------------------------------------------------------------- /cmd/pi/import_range.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewImportRangeCommand() *cobra.Command { 12 | b := bench.NewImportRangeBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "import-range", 15 | Short: "Import random field data into Pilosa.", 16 | Long: `import-range generates random data which can be controlled by command line flags and streams it into Pilosa's /import endpoint. Agent num has no effect`, 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | flags := cmd.Flags() 19 | b.Logger = NewLoggerFromFlags(flags) 20 | client, err := NewClientFromFlags(flags) 21 | if err != nil { 22 | return err 23 | } 24 | agentNum, err := flags.GetInt("agent-num") 25 | if err != nil { 26 | return err 27 | } 28 | result, err := b.Run(context.Background(), client, agentNum) 29 | if err != nil { 30 | result.Error = err.Error() 31 | } 32 | return PrintResults(cmd, result, os.Stdout) 33 | }, 34 | } 35 | 36 | flags := cmd.Flags() 37 | flags.Int64Var(&b.MinValue, "min-value", 0, "Minimum row id of set bits.") 38 | flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Minimum column id of set bits.") 39 | flags.Int64Var(&b.MaxValue, "max-value", 1000, "Maximum row id of set bits.") 40 | flags.Int64Var(&b.MaxColumnID, "max-column-id", 1000, "Maximum column id of set bits.") 41 | flags.Int64Var(&b.Iterations, "iterations", 1000, "Number of bits to set") 42 | flags.Int64Var(&b.Seed, "seed", 0, "Random seed.") 43 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index in which to set bits.") 44 | flags.StringVar(&b.Field, "field", defaultField, "Pilosa field in which to set bits.") 45 | flags.StringVar(&b.Distribution, "distribution", "uniform", "Random distribution for deltas between set bits (exponential or uniform).") 46 | flags.IntVar(&b.BufferSize, "buffer-size", 10000000, "Number of set bits to buffer in importer before POSTing to Pilosa.") 47 | 48 | return cmd 49 | } 50 | -------------------------------------------------------------------------------- /imagine/enums_valueorder.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=valueOrder -trimprefix=valueOrder -text -transform=kebab -output enums_valueorder.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _valueOrderName = "linearstridepermutezipf" 11 | 12 | var _valueOrderIndex = [...]uint8{0, 6, 12, 19, 23} 13 | 14 | func (i valueOrder) String() string { 15 | if i < 0 || i >= valueOrder(len(_valueOrderIndex)-1) { 16 | return fmt.Sprintf("valueOrder(%d)", i) 17 | } 18 | return _valueOrderName[_valueOrderIndex[i]:_valueOrderIndex[i+1]] 19 | } 20 | 21 | var _valueOrderValues = []valueOrder{0, 1, 2, 3} 22 | 23 | var _valueOrderNameToValueMap = map[string]valueOrder{ 24 | _valueOrderName[0:6]: 0, 25 | _valueOrderName[6:12]: 1, 26 | _valueOrderName[12:19]: 2, 27 | _valueOrderName[19:23]: 3, 28 | } 29 | 30 | // valueOrderString retrieves an enum value from the enum constants string name. 31 | // Throws an error if the param is not part of the enum. 32 | func valueOrderString(s string) (valueOrder, error) { 33 | if val, ok := _valueOrderNameToValueMap[s]; ok { 34 | return val, nil 35 | } 36 | return 0, fmt.Errorf("%s does not belong to valueOrder values", s) 37 | } 38 | 39 | // valueOrderValues returns all values of the enum 40 | func valueOrderValues() []valueOrder { 41 | return _valueOrderValues 42 | } 43 | 44 | // IsAvalueOrder returns "true" if the value is listed in the enum definition. "false" otherwise 45 | func (i valueOrder) IsAvalueOrder() bool { 46 | for _, v := range _valueOrderValues { 47 | if i == v { 48 | return true 49 | } 50 | } 51 | return false 52 | } 53 | 54 | // MarshalText implements the encoding.TextMarshaler interface for valueOrder 55 | func (i valueOrder) MarshalText() ([]byte, error) { 56 | return []byte(i.String()), nil 57 | } 58 | 59 | // UnmarshalText implements the encoding.TextUnmarshaler interface for valueOrder 60 | func (i *valueOrder) UnmarshalText(text []byte) error { 61 | var err error 62 | *i, err = valueOrderString(string(text)) 63 | return err 64 | } 65 | -------------------------------------------------------------------------------- /imagine/enums_timequantum.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=timeQuantum -trimprefix=timeQuantum -text -transform=caps -output enums_timequantum.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _timeQuantumName = "YYMYMDYMDH" 11 | 12 | var _timeQuantumIndex = [...]uint8{0, 1, 3, 6, 10} 13 | 14 | func (i timeQuantum) String() string { 15 | if i < 0 || i >= timeQuantum(len(_timeQuantumIndex)-1) { 16 | return fmt.Sprintf("timeQuantum(%d)", i) 17 | } 18 | return _timeQuantumName[_timeQuantumIndex[i]:_timeQuantumIndex[i+1]] 19 | } 20 | 21 | var _timeQuantumValues = []timeQuantum{0, 1, 2, 3} 22 | 23 | var _timeQuantumNameToValueMap = map[string]timeQuantum{ 24 | _timeQuantumName[0:1]: 0, 25 | _timeQuantumName[1:3]: 1, 26 | _timeQuantumName[3:6]: 2, 27 | _timeQuantumName[6:10]: 3, 28 | } 29 | 30 | // timeQuantumString retrieves an enum value from the enum constants string name. 31 | // Throws an error if the param is not part of the enum. 32 | func timeQuantumString(s string) (timeQuantum, error) { 33 | if val, ok := _timeQuantumNameToValueMap[s]; ok { 34 | return val, nil 35 | } 36 | return 0, fmt.Errorf("%s does not belong to timeQuantum values", s) 37 | } 38 | 39 | // timeQuantumValues returns all values of the enum 40 | func timeQuantumValues() []timeQuantum { 41 | return _timeQuantumValues 42 | } 43 | 44 | // IsAtimeQuantum returns "true" if the value is listed in the enum definition. "false" otherwise 45 | func (i timeQuantum) IsAtimeQuantum() bool { 46 | for _, v := range _timeQuantumValues { 47 | if i == v { 48 | return true 49 | } 50 | } 51 | return false 52 | } 53 | 54 | // MarshalText implements the encoding.TextMarshaler interface for timeQuantum 55 | func (i timeQuantum) MarshalText() ([]byte, error) { 56 | return []byte(i.String()), nil 57 | } 58 | 59 | // UnmarshalText implements the encoding.TextUnmarshaler interface for timeQuantum 60 | func (i *timeQuantum) UnmarshalText(text []byte) error { 61 | var err error 62 | *i, err = timeQuantumString(string(text)) 63 | return err 64 | } 65 | -------------------------------------------------------------------------------- /imagine/enums_verifytype.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=verifyType -trimprefix=verifyType -text -transform=kebab -output enums_verifytype.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _verifyTypeName = "errornonepurgeupdatecreate" 11 | 12 | var _verifyTypeIndex = [...]uint8{0, 5, 9, 14, 20, 26} 13 | 14 | func (i verifyType) String() string { 15 | if i < 0 || i >= verifyType(len(_verifyTypeIndex)-1) { 16 | return fmt.Sprintf("verifyType(%d)", i) 17 | } 18 | return _verifyTypeName[_verifyTypeIndex[i]:_verifyTypeIndex[i+1]] 19 | } 20 | 21 | var _verifyTypeValues = []verifyType{0, 1, 2, 3, 4} 22 | 23 | var _verifyTypeNameToValueMap = map[string]verifyType{ 24 | _verifyTypeName[0:5]: 0, 25 | _verifyTypeName[5:9]: 1, 26 | _verifyTypeName[9:14]: 2, 27 | _verifyTypeName[14:20]: 3, 28 | _verifyTypeName[20:26]: 4, 29 | } 30 | 31 | // verifyTypeString retrieves an enum value from the enum constants string name. 32 | // Throws an error if the param is not part of the enum. 33 | func verifyTypeString(s string) (verifyType, error) { 34 | if val, ok := _verifyTypeNameToValueMap[s]; ok { 35 | return val, nil 36 | } 37 | return 0, fmt.Errorf("%s does not belong to verifyType values", s) 38 | } 39 | 40 | // verifyTypeValues returns all values of the enum 41 | func verifyTypeValues() []verifyType { 42 | return _verifyTypeValues 43 | } 44 | 45 | // IsAverifyType returns "true" if the value is listed in the enum definition. "false" otherwise 46 | func (i verifyType) IsAverifyType() bool { 47 | for _, v := range _verifyTypeValues { 48 | if i == v { 49 | return true 50 | } 51 | } 52 | return false 53 | } 54 | 55 | // MarshalText implements the encoding.TextMarshaler interface for verifyType 56 | func (i verifyType) MarshalText() ([]byte, error) { 57 | return []byte(i.String()), nil 58 | } 59 | 60 | // UnmarshalText implements the encoding.TextUnmarshaler interface for verifyType 61 | func (i *verifyType) UnmarshalText(text []byte) error { 62 | var err error 63 | *i, err = verifyTypeString(string(text)) 64 | return err 65 | } 66 | -------------------------------------------------------------------------------- /imagine/enums_dimensionorder.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=dimensionOrder -trimprefix=dimensionOrder -text -transform=kebab -output enums_dimensionorder.go"; DO NOT EDIT. 2 | 3 | // 4 | package imagine 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | const _dimensionOrderName = "rowcolumn" 11 | 12 | var _dimensionOrderIndex = [...]uint8{0, 3, 9} 13 | 14 | func (i dimensionOrder) String() string { 15 | if i < 0 || i >= dimensionOrder(len(_dimensionOrderIndex)-1) { 16 | return fmt.Sprintf("dimensionOrder(%d)", i) 17 | } 18 | return _dimensionOrderName[_dimensionOrderIndex[i]:_dimensionOrderIndex[i+1]] 19 | } 20 | 21 | var _dimensionOrderValues = []dimensionOrder{0, 1} 22 | 23 | var _dimensionOrderNameToValueMap = map[string]dimensionOrder{ 24 | _dimensionOrderName[0:3]: 0, 25 | _dimensionOrderName[3:9]: 1, 26 | } 27 | 28 | // dimensionOrderString retrieves an enum value from the enum constants string name. 29 | // Throws an error if the param is not part of the enum. 30 | func dimensionOrderString(s string) (dimensionOrder, error) { 31 | if val, ok := _dimensionOrderNameToValueMap[s]; ok { 32 | return val, nil 33 | } 34 | return 0, fmt.Errorf("%s does not belong to dimensionOrder values", s) 35 | } 36 | 37 | // dimensionOrderValues returns all values of the enum 38 | func dimensionOrderValues() []dimensionOrder { 39 | return _dimensionOrderValues 40 | } 41 | 42 | // IsAdimensionOrder returns "true" if the value is listed in the enum definition. "false" otherwise 43 | func (i dimensionOrder) IsAdimensionOrder() bool { 44 | for _, v := range _dimensionOrderValues { 45 | if i == v { 46 | return true 47 | } 48 | } 49 | return false 50 | } 51 | 52 | // MarshalText implements the encoding.TextMarshaler interface for dimensionOrder 53 | func (i dimensionOrder) MarshalText() ([]byte, error) { 54 | return []byte(i.String()), nil 55 | } 56 | 57 | // UnmarshalText implements the encoding.TextUnmarshaler interface for dimensionOrder 58 | func (i *dimensionOrder) UnmarshalText(text []byte) error { 59 | var err error 60 | *i, err = dimensionOrderString(string(text)) 61 | return err 62 | } 63 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: pi crossbuild install release test cover cover-pkg cover-viz enumer 2 | 3 | ENUMER := $(shell command -v enumer 2>/dev/null) 4 | VERSION := $(shell git describe --tags 2> /dev/null || echo unknown) 5 | IDENTIFIER := $(VERSION)-$(GOOS)-$(GOARCH) 6 | CLONE_URL=github.com/pilosa/tools 7 | PKGS := $(shell cd $(GOPATH)/src/$(CLONE_URL); go list ./... | grep -v vendor) 8 | BUILD_TIME=`date -u +%FT%T%z` 9 | LDFLAGS="-X github.com/pilosa/tools.Version=$(VERSION) -X github.com/pilosa/tools.BuildTime=$(BUILD_TIME)" 10 | export GO111MODULE=on 11 | 12 | default: test install 13 | 14 | test: 15 | go test ./... $(TESTFLAGS) 16 | 17 | cover: 18 | mkdir -p build/coverage 19 | echo "mode: set" > build/coverage/all.out 20 | for pkg in $(PKGS) ; do \ 21 | make cover-pkg PKG=$$pkg ; \ 22 | done 23 | 24 | cover-pkg: 25 | mkdir -p build/coverage 26 | touch build/coverage/$(subst /,-,$(PKG)).out 27 | go test -coverprofile=build/coverage/$(subst /,-,$(PKG)).out $(PKG) 28 | tail -n +2 build/coverage/$(subst /,-,$(PKG)).out >> build/coverage/all.out 29 | 30 | cover-viz: cover 31 | go tool cover -html=build/coverage/all.out 32 | 33 | crossbuild: 34 | mkdir -p build/pi-$(IDENTIFIER) 35 | make pi FLAGS="-o build/pi-$(IDENTIFIER)/pi" 36 | cp LICENSE README.md build/pi-$(IDENTIFIER) 37 | tar -cvz -C build -f build/pi-$(IDENTIFIER).tar.gz pilosa-$(IDENTIFIER)/ 38 | @echo "Created release build: build/pi-$(IDENTIFIER).tar.gz" 39 | 40 | release: 41 | make crossbuild GOOS=linux GOARCH=amd64 42 | make crossbuild GOOS=linux GOARCH=386 43 | make crossbuild GOOS=darwin GOARCH=amd64 44 | 45 | install: install-pi install-imagine install-dx 46 | 47 | install-dx: 48 | go install -ldflags $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/dx 49 | 50 | install-imagine: 51 | go install -ldflags $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/imagine 52 | 53 | install-pi: 54 | go install -ldflags $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/pi 55 | 56 | 57 | generate: enumer-install 58 | cd imagine && \ 59 | go generate 60 | 61 | 62 | enumer-install: 63 | $(if $(ENUMER),@echo "enumer already installed — skipping.", go get -u github.com/alvaroloes/enumer) 64 | -------------------------------------------------------------------------------- /bench/basic_query.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | "time" 9 | 10 | "github.com/pilosa/go-pilosa" 11 | ) 12 | 13 | var _ Benchmark = (*BasicQueryBenchmark)(nil) 14 | 15 | // BasicQueryBenchmark runs a query multiple times with increasing row ids. 16 | type BasicQueryBenchmark struct { 17 | Name string `json:"name"` 18 | MinRowID int64 `json:"min-row-id"` 19 | Iterations int `json:"iterations"` 20 | NumArgs int `json:"num-args"` 21 | Query string `json:"query"` 22 | Index string `json:"index"` 23 | Field string `json:"field"` 24 | 25 | Logger *log.Logger `json:"-"` 26 | } 27 | 28 | // NewBasicQueryBenchmark returns a new instance of BasicQueryBenchmark. 29 | func NewBasicQueryBenchmark() *BasicQueryBenchmark { 30 | return &BasicQueryBenchmark{ 31 | Name: "basic-query", 32 | Logger: log.New(os.Stderr, "", log.LstdFlags), 33 | } 34 | } 35 | 36 | // Run runs the benchmark. 37 | func (b *BasicQueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 38 | result := NewResult() 39 | result.AgentNum = agentNum 40 | result.Configuration = b 41 | 42 | // Initialize schema. 43 | index, field, err := ensureSchema(client, b.Index, b.Field) 44 | if err != nil { 45 | return result, err 46 | } 47 | 48 | // Determine minimum row id. 49 | minRowID := b.MinRowID + int64(agentNum*b.Iterations) 50 | 51 | var start time.Time 52 | for n := 0; n < b.Iterations; n++ { 53 | rows := make([]*pilosa.PQLRowQuery, b.NumArgs) 54 | for i := range rows { 55 | rows[i] = field.Row(minRowID + int64(n)) 56 | } 57 | 58 | var q *pilosa.PQLRowQuery 59 | switch b.Query { 60 | case "Intersect": 61 | q = index.Intersect(rows...) 62 | case "Union": 63 | q = index.Union(rows...) 64 | case "Difference": 65 | q = index.Difference(rows...) 66 | case "Xor": 67 | q = index.Xor(rows...) 68 | default: 69 | return result, fmt.Errorf("invalid query type: %q", b.Query) 70 | } 71 | 72 | start = time.Now() 73 | _, err := client.Query(q) 74 | result.Add(time.Since(start), nil) 75 | if err != nil { 76 | return result, err 77 | } 78 | } 79 | return result, nil 80 | } 81 | -------------------------------------------------------------------------------- /cmd/pi/random_set.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // NewRandomSetCommand subcommands 12 | func NewRandomSetCommand() *cobra.Command { 13 | b := bench.NewRandomSetBenchmark() 14 | cmd := &cobra.Command{ 15 | Use: "random-set", 16 | Short: "Executes random sets.", 17 | Long: `Sets random values according to the parameters using PQL through the /query endpoint. 18 | If NumAttrs and NumAttrValues are greater than 0, then each SetBit query is 19 | followed by a SetRowAttrs query on the same row id. Each SetRowAttrs query sets 20 | a single attribute to an integer value chosen randomly. There will be num-attrs 21 | total possible attributes and num-attr-values total possible values. Agent num 22 | modifies random seed.`, 23 | 24 | RunE: func(cmd *cobra.Command, args []string) error { 25 | flags := cmd.Flags() 26 | b.Logger = NewLoggerFromFlags(flags) 27 | client, err := NewClientFromFlags(flags) 28 | if err != nil { 29 | return err 30 | } 31 | agentNum, err := flags.GetInt("agent-num") 32 | if err != nil { 33 | return err 34 | } 35 | result, err := b.Run(context.Background(), client, agentNum) 36 | if err != nil { 37 | result.Error = err.Error() 38 | } 39 | return PrintResults(cmd, result, os.Stdout) 40 | }, 41 | } 42 | 43 | flags := cmd.Flags() 44 | flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Minimum row id for set.") 45 | flags.Int64Var(&b.MaxRowID, "max-row-id", 100000, "Maximum row id for set.") 46 | flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Minimum column id for set.") 47 | flags.Int64Var(&b.MaxColumnID, "max-column-id", 100000, "Maximum column id for set.") 48 | flags.Int64Var(&b.Seed, "seed", 1, "Random seed.") 49 | flags.IntVar(&b.Iterations, "iterations", 100, "Number of values to set.") 50 | flags.IntVar(&b.BatchSize, "batch-size", 1, "Number of values to set per batch.") 51 | flags.IntVar(&b.NumAttrs, "num-attrs", 0, "If > 0, alternate set with setrowattrs - this number of different attributes") 52 | flags.IntVar(&b.NumAttrValues, "num-attr-values", 0, "If > 0, alternate set with setrowattrs - this number of different attribute values") 53 | flags.StringVar(&b.Field, "field", defaultField, "Field to set in.") 54 | flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.") 55 | 56 | return cmd 57 | } 58 | -------------------------------------------------------------------------------- /cmd/pi/bench.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "io" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | const ( 12 | defaultIndex = "ibench" 13 | defaultField = "fbench" 14 | defaultRangeField = "range-field" 15 | ) 16 | 17 | func NewBenchCommand() *cobra.Command { 18 | benchCmd := &cobra.Command{ 19 | Use: "bench", 20 | Short: "Runs benchmarks against a pilosa cluster.", 21 | Long: `Runs benchmarks against a pilosa cluster. 22 | 23 | See the various subcommands for specific benchmarks and their arguments. The 24 | various benchmarks should modulate their behavior based on what agent-num is 25 | given, so that multiple benchmarks with identical configurations but differing 26 | agent numbers will do interesting work. 27 | 28 | `, 29 | } 30 | 31 | flags := benchCmd.PersistentFlags() 32 | flags.StringSlice("hosts", []string{"localhost:10101"}, "Comma separated list of \"host:port\" pairs of the Pilosa cluster.") 33 | flags.Int("agent-num", 0, "A unique integer to associate with this invocation of 'bench' to distinguish it from others running concurrently.") 34 | flags.Bool("human", true, "Make output human friendly.") 35 | flags.Bool("tls.skip-verify", false, "Skip TLS certificate verification (not secure)") 36 | 37 | benchCmd.AddCommand(NewBasicQueryCommand()) 38 | benchCmd.AddCommand(NewDiagonalSetBitsCommand()) 39 | benchCmd.AddCommand(NewImportCommand()) 40 | benchCmd.AddCommand(NewImportRangeCommand()) 41 | benchCmd.AddCommand(NewQueryCommand()) 42 | benchCmd.AddCommand(NewRandomQueryCommand()) 43 | benchCmd.AddCommand(NewRandomSetCommand()) 44 | benchCmd.AddCommand(NewRangeQueryCommand()) 45 | benchCmd.AddCommand(NewSliceWidthCommand()) 46 | benchCmd.AddCommand(NewZipfCommand()) 47 | benchCmd.AddCommand(NewTPSCommand()) 48 | 49 | return benchCmd 50 | } 51 | 52 | // PrintResults encodes the output of a benchmark subcommand as json and writes 53 | // it to the given Writer. It takes the "human" flag into account when encoding 54 | // the json. 55 | func PrintResults(cmd *cobra.Command, result *bench.Result, out io.Writer) error { 56 | human, err := cmd.Flags().GetBool("human") 57 | if err != nil { 58 | return err 59 | } 60 | 61 | enc := json.NewEncoder(out) 62 | if human { 63 | enc.SetIndent("", " ") 64 | } 65 | if err := enc.Encode(result); err != nil { 66 | return err 67 | } 68 | return nil 69 | } 70 | -------------------------------------------------------------------------------- /cmd/pi/zipf.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/pilosa/tools/bench" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func NewZipfCommand() *cobra.Command { 12 | b := bench.NewZipfBenchmark() 13 | cmd := &cobra.Command{ 14 | Use: "zipf", 15 | Short: "zipf sets random bits according to the Zipf distribution.", 16 | Long: `Sets random bits according to the Zipf distribution. 17 | 18 | This is a power-law distribution controlled by two parameters. 19 | Exponent, in the range (1, inf), with a default value of 1.001, controls 20 | the "sharpness" of the distribution, with higher exponent being sharper. 21 | Ratio, in the range (0, 1), with a default value of 0.25, controls the 22 | maximum variation of the distribution, with higher ratio being more uniform. 23 | `, 24 | RunE: func(cmd *cobra.Command, args []string) error { 25 | flags := cmd.Flags() 26 | b.Logger = NewLoggerFromFlags(flags) 27 | client, err := NewClientFromFlags(flags) 28 | if err != nil { 29 | return err 30 | } 31 | agentNum, err := flags.GetInt("agent-num") 32 | if err != nil { 33 | return err 34 | } 35 | result, err := b.Run(context.Background(), client, agentNum) 36 | if err != nil { 37 | result.Error = err.Error() 38 | } 39 | return PrintResults(cmd, result, os.Stdout) 40 | }, 41 | } 42 | 43 | flags := cmd.Flags() 44 | flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Rows being set will all be greater than this.") 45 | flags.Int64Var(&b.MaxRowID, "max-row-id", 100000, "Maximum row id for set bits.") 46 | flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Column id to start from.") 47 | flags.Int64Var(&b.MaxColumnID, "max-column-id", 100000, "Maximum column id for set bits.") 48 | flags.IntVar(&b.Iterations, "iterations", 100, "Number of bits to set.") 49 | flags.Int64Var(&b.Seed, "seed", 1, "Seed for RNG.") 50 | flags.StringVar(&b.Field, "field", "fbench", "Pilosa field in which to set bits.") 51 | flags.StringVar(&b.Index, "index", "ibench", "Pilosa index to use.") 52 | flags.Float64Var(&b.RowExponent, "row-exponent", 1.01, "Zipf exponent parameter for row IDs.") 53 | flags.Float64Var(&b.RowRatio, "row-ratio", 0.25, "Zipf probability ratio parameter for row IDs.") 54 | flags.Float64Var(&b.ColumnExponent, "column-exponent", 1.01, "Zipf exponent parameter for column IDs.") 55 | flags.Float64Var(&b.ColumnRatio, "column-ratio", 0.25, "Zipf probability ratio parameter for column IDs.") 56 | flags.StringVar(&b.Operation, "operation", "set", "Can be set or clear.") 57 | 58 | return cmd 59 | } 60 | -------------------------------------------------------------------------------- /bench/random_set.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "math/rand" 8 | "os" 9 | "time" 10 | 11 | "github.com/pilosa/go-pilosa" 12 | ) 13 | 14 | var _ Benchmark = (*RandomSetBenchmark)(nil) 15 | 16 | // RandomSetBenchmark sets bits randomly and deterministically based on a seed. 17 | type RandomSetBenchmark struct { 18 | Name string `json:"name"` 19 | MinRowID int64 `json:"min-row-id"` 20 | MaxRowID int64 `json:"max-row-id"` 21 | MinColumnID int64 `json:"min-column-id"` 22 | MaxColumnID int64 `json:"max-column-id"` 23 | Iterations int `json:"iterations"` 24 | BatchSize int `json:"batch-size"` 25 | Seed int64 `json:"seed"` 26 | NumAttrs int `json:"num-attrs"` 27 | NumAttrValues int `json:"num-attr-values"` 28 | Index string `json:"index"` 29 | Field string `json:"field"` 30 | 31 | Logger *log.Logger `json:"-"` 32 | } 33 | 34 | // NewRandomSetBenchmark returns a new instance of RandomSetBenchmark. 35 | func NewRandomSetBenchmark() *RandomSetBenchmark { 36 | return &RandomSetBenchmark{ 37 | Name: "random-set", 38 | Logger: log.New(os.Stderr, "", log.LstdFlags), 39 | } 40 | } 41 | 42 | // Run runs the benchmark. 43 | func (b *RandomSetBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 44 | result := NewResult() 45 | result.AgentNum = agentNum 46 | result.Configuration = b 47 | 48 | if b.BatchSize <= 0 { 49 | return result, fmt.Errorf("batch size must be greater than 0, currently: %d", b.BatchSize) 50 | } 51 | 52 | // Initialize schema. 53 | index, field, err := ensureSchema(client, b.Index, b.Field) 54 | if err != nil { 55 | return result, err 56 | } 57 | 58 | rand := rand.New(rand.NewSource(b.Seed)) 59 | const letters = "abcdefghijklmnopqrstuvwxyz" 60 | for n := 0; n < b.Iterations; { 61 | var a []pilosa.PQLQuery 62 | for i := 0; i < b.BatchSize && n < b.Iterations; i, n = i+1, n+1 { 63 | rowID := rand.Int63n(b.MaxRowID - b.MinRowID) 64 | columnID := rand.Int63n(b.MaxColumnID - b.MinColumnID) 65 | 66 | a = append(a, field.Set(b.MinRowID+rowID, b.MinColumnID+columnID)) 67 | 68 | if b.NumAttrs > 0 && b.NumAttrValues > 0 { 69 | attri := rand.Intn(b.NumAttrs) 70 | key := fmt.Sprintf("%c%d", letters[attri%len(letters)], attri) 71 | val := rand.Intn(b.NumAttrValues) 72 | a = append(a, field.SetRowAttrs(rowID, map[string]interface{}{key: val})) 73 | } 74 | } 75 | 76 | start := time.Now() 77 | _, err := client.Query(index.BatchQuery(a...)) 78 | result.Add(time.Since(start), nil) 79 | if err != nil { 80 | return result, err 81 | } 82 | } 83 | return result, nil 84 | } 85 | -------------------------------------------------------------------------------- /imagine/sample.md: -------------------------------------------------------------------------------- 1 | # A sample schema 2 | 3 | This is loosely inspired by things like the Star Schema Benchmark, with the 4 | idea being to create a tool to let similar tests be run on different scales, 5 | against an arbitrary install. This schema is not yet fully implemented; 6 | what follows is some sketchy design notes. 7 | 8 | ## Indexes 9 | 10 | All indexes used by this tool have names starting with `imaginary-`. (Or 11 | another name specified with `--prefix`) The intent here is that it should be 12 | safe to delete these indexes after tests. 13 | 14 | ## Sample Data 15 | 16 | In the interests of visualization, we imagine a system which has data about 17 | various entities, called `users`, and things that might happen involving 18 | those users, called `events`. We also add another table, `supplemental`, 19 | which has different data from `users` but the same column space, to facilitate 20 | cross-index benchmarks. Benchmarks might well operate on only one of 21 | these indexes, but having three of them lets us do interesting comparisons. 22 | 23 | ### Users 24 | 25 | The main index is a table of information about users. Fields: 26 | age (int) 27 | income (int) 28 | favorite numbers (N rows, bits indicate "user likes number") 29 | least favorite numbers (N rows, bits indicate "user dislikes number") 30 | 31 | The fields are obviously made-up, but they let us vary broadly in cardinality 32 | without having to have a giant list of "ingredients one could have an allergy 33 | to" or something. We will probably want/need other fields also. 34 | 35 | We should probably have column keys, even if they're obviously-trivial (like 36 | "the column ID expressed as a string with a prefix"), because that's a 37 | use case we potentially care about. 38 | 39 | ### Events 40 | 41 | The second table, `events`, has an int field which contains column IDs from the 42 | user table. This allows comparisons to go either of two ways; you can perform 43 | queries on this table, then create a new row which has bits set for all the 44 | entries in the `userid` field for the returned columns, or you can perform 45 | queries on `users`, then select entries from this table which have a value in 46 | that set of columns present in their `userid` field. Events likewise have 47 | other fields: 48 | timestamp of event 49 | int fields 50 | count-type fields 51 | fields which encourage union/intersection queries 52 | 53 | ### Supplemental 54 | 55 | The third table, `supplemental`, contains additional details about users; the 56 | column space for this table, and the column space for `users`, are the same. 57 | This is useful in cases where there is a reason to have two indexes which 58 | refer to the same columns, and not to combine those indexes. The likely join 59 | use case is to perform queries on one, then use that in intersections or unions 60 | with queries on the other. Fields: 61 | ??? 62 | -------------------------------------------------------------------------------- /dx/compare_test.go: -------------------------------------------------------------------------------- 1 | package dx 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/pilosa/go-pilosa" 7 | ) 8 | 9 | func TestIsValidQuery(t *testing.T) { 10 | res := &pilosa.RowResult{} 11 | count := int64(50) 12 | 13 | tests := []struct { 14 | name string 15 | query *Query 16 | expected bool 17 | }{ 18 | { 19 | name: "nil", 20 | query: nil, 21 | expected: false, 22 | }, 23 | { 24 | name: "empty", 25 | query: &Query{}, 26 | expected: false, 27 | }, 28 | { 29 | name: "reult-only", 30 | query: &Query{Result: res}, 31 | expected: true, 32 | }, 33 | { 34 | name: "count-only", 35 | query: &Query{ResultCount: &count}, 36 | expected: true, 37 | }, 38 | { 39 | name: "result-and-count", 40 | query: &Query{Result: res, ResultCount: &count}, 41 | expected: true, 42 | }, 43 | } 44 | 45 | for _, q := range tests { 46 | got := isValidQuery(q.query) 47 | if got != q.expected { 48 | t.Fatalf("test case %v: expected: %v, got %v", q.name, q.expected, got) 49 | } 50 | } 51 | } 52 | 53 | func TestQueryResultEqual(t *testing.T) { 54 | res0 := &pilosa.RowResult{Columns: []uint64{0, 2, 4, 6}} 55 | res1 := &pilosa.RowResult{Columns: []uint64{0, 2, 4, 6}} 56 | res2 := &pilosa.RowResult{Columns: []uint64{1, 2, 4, 6}} 57 | count3 := int64(3) 58 | count4 := int64(4) 59 | count4dup := int64(4) 60 | 61 | tests := []struct { 62 | name string 63 | query1 *Query 64 | query2 *Query 65 | expected bool 66 | }{ 67 | { 68 | name: "result-result-equal", 69 | query1: &Query{Result: res0}, 70 | query2: &Query{Result: res1}, 71 | expected: true, 72 | }, 73 | { 74 | name: "result-count-equal", 75 | query1: &Query{Result: res0}, 76 | query2: &Query{ResultCount: &count4}, 77 | expected: true, 78 | }, 79 | { 80 | name: "count-count-equal", 81 | query1: &Query{ResultCount: &count4}, 82 | query2: &Query{ResultCount: &count4dup}, 83 | expected: true, 84 | }, 85 | { 86 | name: "result-result-unequal", 87 | query1: &Query{Result: res0}, 88 | query2: &Query{Result: res2}, 89 | expected: false, 90 | }, 91 | { 92 | name: "result-count-unequal", 93 | query1: &Query{Result: res0}, 94 | query2: &Query{ResultCount: &count3}, 95 | expected: false, 96 | }, 97 | { 98 | name: "count-count-unequal", 99 | query1: &Query{ResultCount: &count4}, 100 | query2: &Query{ResultCount: &count3}, 101 | expected: false, 102 | }, 103 | { 104 | name: "default-to-results", 105 | query1: &Query{Result: res0, ResultCount: &count4}, 106 | query2: &Query{Result: res1, ResultCount: &count3}, 107 | expected: true, 108 | }, 109 | } 110 | 111 | for _, q := range tests { 112 | got := queryResultsEqual(q.query1, q.query2) 113 | if got != q.expected { 114 | t.Fatalf("test case: %v, expected %v, got: %v", q.name, q.expected, got) 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /apophenia/zipf_test.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "math/rand" 7 | "testing" 8 | ) 9 | 10 | type testCase struct { 11 | name string 12 | s, v float64 13 | m uint64 14 | } 15 | 16 | var testCases = []testCase{ 17 | {s: 1.01, v: 1, m: 100}, 18 | {s: 2, v: 1, m: 100}, 19 | {s: 1.01, v: 100, m: 1000}, 20 | {s: 2, v: 10000, m: 1000}, 21 | } 22 | 23 | func (tc testCase) Name() string { 24 | if tc.name != "" { 25 | return tc.name 26 | } 27 | return fmt.Sprintf("(zipf:s%f,v%f,m%d)", tc.s, tc.v, tc.m) 28 | } 29 | 30 | func runZipf(zf func() uint64, values []uint64, n uint64, t *testing.T) { 31 | for i := uint64(0); i < n; i++ { 32 | x := zf() 33 | if x < 0 || x >= uint64(len(values)) { 34 | t.Fatalf("got out-of-range value %d from zipf function", x) 35 | } 36 | values[x]++ 37 | } 38 | } 39 | 40 | type zipfTestCase struct { 41 | q, v float64 42 | seq Sequence 43 | exp string 44 | } 45 | 46 | func (z zipfTestCase) String() string { 47 | return fmt.Sprintf("q: %g, v: %g, seq: %t, expected error: %t", 48 | z.q, z.v, z.seq != nil, z.exp != "") 49 | } 50 | 51 | func Test_InvalidInputs(t *testing.T) { 52 | seq := NewSequence(0) 53 | testCases := []zipfTestCase{ 54 | {q: 1, v: 1.1, seq: seq, exp: "need q > 1 (got 1) and v >= 1 (got 1.1) for Zipf distribution"}, 55 | {q: 1.1, v: 0.99, seq: seq, exp: "need q > 1 (got 1.1) and v >= 1 (got 0.99) for Zipf distribution"}, 56 | {q: 1.1, v: 1.1, seq: nil, exp: "need a usable PRNG apophenia.Sequence"}, 57 | {q: math.NaN(), v: 1.1, seq: nil, exp: "q (NaN) and v (1.1) must not be NaN for Zipf distribution"}, 58 | {q: 1.01, v: 2, seq: seq, exp: ""}, 59 | } 60 | for _, c := range testCases { 61 | z, err := NewZipf(c.q, c.v, 20, 0, c.seq) 62 | if c.exp != "" { 63 | if err == nil { 64 | t.Errorf("case %v: expected error '%s', got no error", c, c.exp) 65 | } else if err.Error() != c.exp { 66 | t.Errorf("case %v: expected error '%s', got error '%s'", c, c.exp, err.Error()) 67 | } 68 | } else { 69 | if err != nil { 70 | t.Errorf("case %v: unexpected error %v", c, err) 71 | } else if z == nil { 72 | t.Errorf("case %v: nil Zipf despite no error", c) 73 | } 74 | } 75 | } 76 | } 77 | 78 | const runs = 1000000 79 | 80 | func Test_CompareWithMath(t *testing.T) { 81 | failed := false 82 | for idx, c := range testCases { 83 | stdlibValues := make([]uint64, c.m+1) 84 | zipfValues := make([]uint64, c.m+1) 85 | stdlibZipf := rand.NewZipf(rand.New(rand.NewSource(int64(idx))), c.s, c.v, c.m) 86 | seq := NewSequence(int64(idx)) 87 | zipfZipf, err := NewZipf(c.s, c.v, c.m, 0, seq) 88 | if err != nil { 89 | t.Fatalf("failed to create newZipf: %s", err) 90 | } 91 | runZipf(stdlibZipf.Uint64, stdlibValues, runs, t) 92 | runZipf(zipfZipf.Next, zipfValues, runs, t) 93 | for i := uint64(0); i < c.m; i++ { 94 | stdlibP := float64(stdlibValues[i]) / runs 95 | zipfP := float64(zipfValues[i]) / runs 96 | diff := math.Abs(stdlibP - zipfP) 97 | if diff > 0.001 { 98 | failed = true 99 | t.Logf("%s: stdlib %d, zipf %d, diff %f [s %f, v %f]", 100 | c.Name(), stdlibValues[i], zipfValues[i], diff, c.s, c.v) 101 | } 102 | } 103 | } 104 | if failed { 105 | t.Fail() 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /bench/import.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "log" 7 | "math/rand" 8 | "os" 9 | 10 | "github.com/pilosa/go-pilosa" 11 | ) 12 | 13 | var _ Benchmark = (*ImportBenchmark)(nil) 14 | 15 | type ImportBenchmark struct { 16 | Name string `json:"name"` 17 | MinRowID int64 `json:"min-row-id"` 18 | MinColumnID int64 `json:"min-column-id"` 19 | MaxRowID int64 `json:"max-row-id"` 20 | MaxColumnID int64 `json:"max-column-id"` 21 | Index string `json:"index"` 22 | Field string `json:"field"` 23 | Iterations int64 `json:"iterations"` 24 | Seed int64 `json:"seed"` 25 | Distribution string `json:"distribution"` 26 | BufferSize int `json:"-"` 27 | 28 | Logger *log.Logger `json:"-"` 29 | } 30 | 31 | // NewImportBenchmark returns a new instance of ImportBenchmark. 32 | func NewImportBenchmark() *ImportBenchmark { 33 | return &ImportBenchmark{ 34 | Name: "import", 35 | Logger: log.New(os.Stderr, "", log.LstdFlags), 36 | } 37 | } 38 | 39 | // Run runs the Import benchmark 40 | func (b *ImportBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 41 | result := NewResult() 42 | result.AgentNum = agentNum 43 | result.Configuration = b 44 | 45 | // Initialize schema. 46 | _, field, err := ensureSchema(client, b.Index, b.Field) 47 | if err != nil { 48 | return result, err 49 | } 50 | 51 | itr := b.RecordIterator(b.Seed + int64(agentNum)) 52 | err = client.ImportField(field, itr, pilosa.OptImportBatchSize(b.BufferSize)) 53 | result.Extra["actual-iterations"] = itr.actualIterations 54 | result.Extra["avgdelta"] = itr.avgdelta 55 | return result, err 56 | } 57 | 58 | func (b *ImportBenchmark) RecordIterator(seed int64) *RecordIterator { 59 | rand := rand.New(rand.NewSource(seed)) 60 | 61 | itr := NewRecordIterator() 62 | itr.maxbitnum = (b.MaxRowID - b.MinRowID + 1) * (b.MaxColumnID - b.MinColumnID + 1) 63 | itr.avgdelta = float64(itr.maxbitnum) / float64(b.Iterations) 64 | itr.minrow, itr.mincol, itr.maxrow, itr.maxcol = b.MinRowID, b.MinColumnID, b.MaxRowID, b.MaxColumnID 65 | 66 | if b.Distribution == "exponential" { 67 | itr.lambda = 1.0 / itr.avgdelta 68 | itr.fdelta = func(itr *RecordIterator) float64 { 69 | return rand.ExpFloat64() / itr.lambda 70 | } 71 | } else { // if b.Distribution == "uniform" { 72 | itr.fdelta = func(itr *RecordIterator) float64 { 73 | return rand.Float64() * itr.avgdelta * 2 74 | } 75 | } 76 | return itr 77 | } 78 | 79 | func NewRecordIterator() *RecordIterator { 80 | return &RecordIterator{} 81 | } 82 | 83 | type RecordIterator struct { 84 | actualIterations int64 85 | bitnum int64 86 | maxbitnum int64 87 | minrow int64 88 | maxrow int64 89 | mincol int64 90 | maxcol int64 91 | avgdelta float64 92 | lambda float64 93 | rand *rand.Rand 94 | fdelta func(z *RecordIterator) float64 95 | } 96 | 97 | func (itr *RecordIterator) NextRecord() (pilosa.Record, error) { 98 | delta := itr.fdelta(itr) 99 | if delta < 1.0 { 100 | delta = 1.0 101 | } 102 | itr.bitnum = int64(float64(itr.bitnum) + delta) 103 | if itr.bitnum > itr.maxbitnum { 104 | return pilosa.Column{}, io.EOF 105 | } 106 | itr.actualIterations++ 107 | return pilosa.Column{ 108 | RowID: uint64((itr.bitnum / (itr.maxcol - itr.mincol + 1)) + itr.minrow), 109 | ColumnID: uint64(itr.bitnum%(itr.maxcol-itr.mincol+1) + itr.mincol), 110 | }, nil 111 | } 112 | -------------------------------------------------------------------------------- /dx/cmd_test.go: -------------------------------------------------------------------------------- 1 | package dx 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "path/filepath" 8 | "reflect" 9 | "testing" 10 | 11 | "github.com/pilosa/pilosa" 12 | "github.com/pilosa/pilosa/test" 13 | ) 14 | 15 | func SetupMain() (*Main, string) { 16 | path, err := ioutil.TempDir("", "dx-") 17 | if err != nil { 18 | panic(err) 19 | } 20 | m := NewMain() 21 | m.DataDir = path 22 | m.ThreadCount = 2 23 | m.NumQueries = 10 24 | m.SpecFiles = []string{filepath.Join("./testdata", "spec", "spec.toml")} 25 | 26 | return m, path 27 | } 28 | 29 | func SetupBits(holder *pilosa.Holder) { 30 | idx0, err := holder.CreateIndex("index0", pilosa.IndexOptions{}) 31 | if err != nil { 32 | panic(err) 33 | } 34 | idx1, err := holder.CreateIndex("index1", pilosa.IndexOptions{}) 35 | if err != nil { 36 | panic(err) 37 | } 38 | fld0, err := idx0.CreateField("field0") 39 | if err != nil { 40 | panic(err) 41 | } 42 | fld1, err := idx0.CreateField("field1") 43 | if err != nil { 44 | panic(err) 45 | } 46 | fld2, err := idx1.CreateField("field2") 47 | if err != nil { 48 | panic(err) 49 | } 50 | 51 | fld0.SetBit(0, 0, nil) 52 | fld0.SetBit(0, 1, nil) 53 | fld0.SetBit(0, 0, nil) 54 | fld0.SetBit(0, 2, nil) 55 | fld0.SetBit(1, 1, nil) 56 | fld0.SetBit(1, 12, nil) 57 | fld0.SetBit(2, 24, nil) 58 | fld1.SetBit(1, 2, nil) 59 | fld1.SetBit(1, 13, nil) 60 | fld1.SetBit(1, 65536, nil) 61 | fld1.SetBit(2, 12, nil) 62 | fld2.SetBit(3, 36, nil) 63 | } 64 | 65 | func TestIngest(t *testing.T) { 66 | m, path := SetupMain() 67 | defer os.RemoveAll(path) 68 | 69 | cluster := test.MustRunCluster(t, 3) 70 | defer cluster.Close() 71 | for _, cmd := range cluster { 72 | host := cmd.URL() 73 | m.Hosts = append(m.Hosts, host) 74 | } 75 | 76 | if err := ExecuteIngest(m); err != nil { 77 | t.Fatalf("executing ingest: %v", err) 78 | } 79 | 80 | index := "dx-index" 81 | q := "Row(field=%v)" 82 | expectedCols := []uint64{2, 5, 10} 83 | 84 | for i := 0; i < 5; i++ { 85 | query := fmt.Sprintf(q, i) 86 | response := cluster.Query(t, index, query) 87 | columns := response.Results[0].(*pilosa.Row).Columns() 88 | if !reflect.DeepEqual(columns, expectedCols) { 89 | t.Fatalf("row %v should have values %v, got %v", i, expectedCols, columns) 90 | } 91 | } 92 | 93 | for i := 5; i < 15; i++ { 94 | query := fmt.Sprintf(q, i) 95 | response := cluster.Query(t, index, query) 96 | columns := response.Results[0].(*pilosa.Row).Columns() 97 | if reflect.DeepEqual(columns, []uint64(nil)) { 98 | t.Fatalf("row %v should have no values, got %v", i, columns) 99 | } 100 | } 101 | } 102 | 103 | func TestQuery(t *testing.T) { 104 | m, path := SetupMain() 105 | defer os.RemoveAll(path) 106 | 107 | cluster := test.MustRunCluster(t, 1) 108 | defer cluster.Close() 109 | for _, cmd := range cluster { 110 | host := cmd.URL() 111 | m.Hosts = append(m.Hosts, host) 112 | } 113 | holder := cluster[0].Server.Holder() 114 | 115 | SetupBits(holder) 116 | 117 | if err := ExecuteQueries(m); err != nil { 118 | t.Fatalf("executing queries: %+v", err) 119 | } 120 | } 121 | 122 | func TestCompare(t *testing.T) { 123 | ingest0 := filepath.Join("./testdata", "ingest", "0") 124 | ingest1 := filepath.Join("./testdata", "ingest", "1") 125 | query0 := filepath.Join("./testdata", "query", "0") 126 | query1 := filepath.Join("./testdata", "query", "1") 127 | 128 | if err := ExecuteComparison(ingest0, ingest1); err != nil { 129 | t.Fatalf("comparing ingest: %v", err) 130 | } 131 | 132 | if err := ExecuteComparison(query0, query1); err != nil { 133 | t.Fatalf("comparing query: %v", err) 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /apophenia/weighted.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import ( 4 | "errors" 5 | "math/bits" 6 | ) 7 | 8 | // Weighted provides a generator which produces weighted bits -- bits with 9 | // a specified probability of being set, as opposed to random values weighted 10 | // a given way. Bit density is specified as N/M, with M a (positive) 11 | // power of 2, and N an integer between 0 and M. 12 | // 13 | // The underlying generator can produce 2^128 128-bit values, but the 14 | // iterative process requires log2(densityScale) 128-bit values from the 15 | // source per 128-bit output, so the top 7 bits of the address are used 16 | // as an iteration counter. Thus, Weighted.Bit can produce 2^128 distinct 17 | // values, but if the offset provided to Weighted.Bits is over 2^121, there 18 | // will be overlap between the source values used for those bits, and 19 | // source values used (for different iterations) for other offsets. 20 | type Weighted struct { 21 | src Sequence 22 | // internal result cache 23 | lastValue Uint128 24 | lastOffset Uint128 25 | lastDensity, lastScale uint64 26 | } 27 | 28 | // NewWeighted yields a new Weighted using the given sequence as a source of 29 | // seekable pseudo-random bits. 30 | func NewWeighted(src Sequence) (*Weighted, error) { 31 | if src == nil { 32 | return nil, errors.New("new Weighted requires a non-nil source") 33 | } 34 | w := Weighted{src: src} 35 | return &w, nil 36 | } 37 | 38 | // Bit returns the single 0-or-1 bit at the specified offset. 39 | func (w *Weighted) Bit(offset Uint128, density uint64, scale uint64) uint64 { 40 | var bit uint64 41 | // In order to be able to cache/reuse values, we want to grab a whole 42 | // set of 128 bits including a given offset, and use the same 43 | // calculation for all of them. So we mask out the low-order 7 bits 44 | // of offset, and use them separately. Meanwhile, Bits will 45 | // always right-shift its column bits by 7, which reduces the 46 | // space of possible results but means that it produces the same 47 | // set of bits for any given batch... 48 | offset.Lo, bit = offset.Lo&^127, offset.Lo&127 49 | if offset == w.lastOffset && density == w.lastDensity && scale == w.lastScale { 50 | return w.lastValue.Bit(bit) 51 | } 52 | w.lastValue = w.Bits(offset, density, scale) 53 | w.lastOffset, w.lastDensity, w.lastScale = offset, density, scale 54 | return w.lastValue.Bit(bit) 55 | } 56 | 57 | const weightedIterationMask = (^uint64(0)) >> 7 58 | 59 | // Bits returns the 128-bit set of bits including offset. The column portion 60 | // of offset is right-shifted by 7 to match the offset calculations in Bit(), 61 | // above. Thus, you get the same values back for each sequence of 128 consecutive 62 | // offsets. 63 | func (w *Weighted) Bits(offset Uint128, density uint64, scale uint64) (out Uint128) { 64 | // magic accommodation for choices made elsewhere. 65 | offset.Lo >>= 7 66 | if density == scale { 67 | out.Not() 68 | return out 69 | } 70 | if density == 0 { 71 | return out 72 | } 73 | lz := uint(bits.TrailingZeros64(density)) 74 | density >>= lz 75 | scale >>= lz 76 | // generate the same results we would have without this hackery 77 | offset.Hi += uint64(lz) 78 | for scale > 1 { 79 | next := w.src.BitsAt(offset) 80 | if density&1 != 0 { 81 | out.Or(next) 82 | } else { 83 | out.And(next) 84 | } 85 | density >>= 1 86 | scale >>= 1 87 | // iteration is stashed in the bottom 24-bits of an offset 88 | offset.Hi++ 89 | } 90 | return out 91 | } 92 | 93 | // NextBits returns the next batch of bits after the last one retrieved. 94 | func (w *Weighted) NextBits(density, scale uint64) (out Uint128) { 95 | w.lastOffset.Inc() 96 | return w.Bits(w.lastOffset, density, scale) 97 | } 98 | -------------------------------------------------------------------------------- /bench/import_range.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "log" 7 | "math/rand" 8 | "os" 9 | 10 | "github.com/pilosa/go-pilosa" 11 | ) 12 | 13 | var _ Benchmark = (*ImportRangeBenchmark)(nil) 14 | 15 | type ImportRangeBenchmark struct { 16 | Name string `json:"name"` 17 | MinValue int64 `json:"min-value"` 18 | MinColumnID int64 `json:"min-column-id"` 19 | MaxValue int64 `json:"max-value"` 20 | MaxColumnID int64 `json:"max-column-id"` 21 | Index string `json:"index"` 22 | Field string `json:"field"` 23 | Row string `json:"row"` 24 | Iterations int64 `json:"iterations"` 25 | Seed int64 `json:"seed"` 26 | Distribution string `json:"distribution"` 27 | BufferSize int `json:"-"` 28 | 29 | Logger *log.Logger `json:"-"` 30 | } 31 | 32 | // NewImportRangeBenchmark returns a new instance of ImportRangeBenchmark. 33 | func NewImportRangeBenchmark() *ImportRangeBenchmark { 34 | return &ImportRangeBenchmark{ 35 | Name: "import-range", 36 | Logger: log.New(os.Stderr, "", log.LstdFlags), 37 | } 38 | } 39 | 40 | // Run runs the benchmark. 41 | func (b *ImportRangeBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 42 | result := NewResult() 43 | result.AgentNum = agentNum 44 | result.Configuration = b 45 | 46 | // Initialize schema. 47 | _, field, err := ensureSchema(client, b.Index, b.Field, pilosa.OptFieldTypeInt(b.MinValue, b.MaxValue)) 48 | if err != nil { 49 | return result, err 50 | } 51 | 52 | itr := b.ValueIterator(b.Seed + int64(agentNum)) 53 | err = client.ImportField(field, itr, pilosa.OptImportBatchSize(b.BufferSize)) 54 | result.Extra["actual-iterations"] = itr.actualIterations 55 | result.Extra["avgdelta"] = itr.avgdelta 56 | return result, err 57 | } 58 | 59 | func (b *ImportRangeBenchmark) ValueIterator(seed int64) *ValueIterator { 60 | rand := rand.New(rand.NewSource(seed)) 61 | 62 | itr := NewValueIterator() 63 | itr.maxbitnum = (b.MaxValue - b.MinValue + 1) * (b.MaxColumnID - b.MinColumnID + 1) 64 | itr.avgdelta = float64(itr.maxbitnum) / float64(b.Iterations) 65 | itr.minvalue, itr.mincol, itr.maxvalue, itr.maxcol = b.MinValue, b.MinColumnID, b.MaxValue, b.MaxColumnID 66 | 67 | if b.Distribution == "exponential" { 68 | itr.lambda = 1.0 / itr.avgdelta 69 | itr.fdelta = func(itr *ValueIterator) float64 { 70 | return rand.ExpFloat64() / itr.lambda 71 | } 72 | } else { // if b.Distribution == "uniform" { 73 | itr.fdelta = func(itr *ValueIterator) float64 { 74 | return rand.Float64() * itr.avgdelta * 2 75 | } 76 | } 77 | return itr 78 | } 79 | 80 | func NewValueIterator() *ValueIterator { 81 | return &ValueIterator{} 82 | } 83 | 84 | type ValueIterator struct { 85 | actualIterations int64 86 | bitnum int64 87 | maxbitnum int64 88 | minvalue int64 89 | maxvalue int64 90 | mincol int64 91 | maxcol int64 92 | avgdelta float64 93 | lambda float64 94 | rng *rand.Rand 95 | fdelta func(itr *ValueIterator) float64 96 | } 97 | 98 | func (itr *ValueIterator) NextRecord() (pilosa.Record, error) { 99 | delta := itr.fdelta(itr) 100 | if delta < 1.0 { 101 | delta = 1.0 102 | } 103 | itr.bitnum = int64(float64(itr.bitnum) + delta) 104 | if itr.bitnum > itr.maxbitnum { 105 | return pilosa.FieldValue{}, io.EOF 106 | } 107 | 108 | itr.actualIterations++ 109 | return pilosa.FieldValue{ 110 | Value: int64((itr.bitnum / (itr.maxcol - itr.mincol + 1)) + itr.minvalue), 111 | ColumnID: uint64(itr.bitnum%(itr.maxcol-itr.mincol+1) + itr.mincol), 112 | }, nil 113 | } 114 | -------------------------------------------------------------------------------- /apophenia/permute_test.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func PermutationOrBust(period int64, seed int64, expectedErr string, tb testing.TB) *Permutation { 9 | seq := NewSequence(seed) 10 | p, err := NewPermutation(period, 0, seq) 11 | if err != nil { 12 | if expectedErr == "" || expectedErr != err.Error() { 13 | tb.Fatalf("unexpected error creating permutation generator: %s", err) 14 | } 15 | return p 16 | } 17 | if p == nil { 18 | tb.Fatalf("unexpected nil permutation generator without error") 19 | } 20 | return p 21 | } 22 | 23 | func Test_PermuteCycle(t *testing.T) { 24 | sizes := []int64{8, 23, 64, 10000} 25 | for _, size := range sizes { 26 | p := PermutationOrBust(size, 0, "", t) 27 | seen := make(map[int64]struct{}, size) 28 | for i := int64(0); i < size; i++ { 29 | n := p.Next() 30 | if _, ok := seen[n]; ok { 31 | list := make([]int64, len(seen)) 32 | j := 0 33 | for k := range seen { 34 | list[j] = k 35 | j++ 36 | } 37 | t.Fatalf("size %d: got duplicate entry %d in %v", size, n, list) 38 | } 39 | seen[n] = struct{}{} 40 | } 41 | } 42 | } 43 | 44 | func TestPermuteSeed(t *testing.T) { 45 | size := int64(129) 46 | seeds := int64(8) 47 | p := make([]*Permutation, seeds) 48 | seen := make([]map[int64]struct{}, seeds) 49 | for s := int64(0); s < seeds; s++ { 50 | p[s] = PermutationOrBust(size, s, "", t) 51 | seen[s] = make(map[int64]struct{}, size) 52 | } 53 | matches := int64(0) 54 | v := make([]int64, seeds) 55 | for i := int64(0); i < size; i++ { 56 | for s := int64(0); s < seeds; s++ { 57 | v[s] = p[s].Next() 58 | if _, ok := seen[s][v[s]]; ok { 59 | t.Fatalf("duplicate entry (size %d, seed %d, entry %d): %d", 60 | size, s, i, v[s]) 61 | } 62 | seen[s][v[s]] = struct{}{} 63 | } 64 | for s := int64(1); s < seeds; s++ { 65 | if v[s] == v[s-1] { 66 | matches++ 67 | } 68 | } 69 | } 70 | // assuming number of outcomes is more than about 16, matches are pretty rare if nothing 71 | // is wrong. 72 | if (matches * 8) > (size * seeds) { 73 | t.Fatalf("too many matches: %d values to permute, %d seeds, %d matches seems suspicious.", 74 | size, seeds, matches) 75 | } else { 76 | t.Logf("permuting %d values across %d seeds: %d matches (OK)", size, seeds, matches) 77 | } 78 | } 79 | 80 | func Test_PermuteNth(t *testing.T) { 81 | size := int64(129) 82 | seeds := int64(8) 83 | p := make([][]*Permutation, seeds) 84 | seen := make([]map[int64]struct{}, seeds) 85 | for s := int64(0); s < seeds; s++ { 86 | p[s] = make([]*Permutation, 2) 87 | p[s][0] = PermutationOrBust(size, s, "", t) 88 | p[s][1] = PermutationOrBust(size, s, "", t) 89 | seen[s] = make(map[int64]struct{}, size) 90 | } 91 | matches := int64(0) 92 | v := make([]int64, seeds) 93 | for i := int64(0); i < size; i++ { 94 | for s := int64(0); s < seeds; s++ { 95 | v[s] = p[s][0].Next() 96 | if _, ok := seen[s][v[s]]; ok { 97 | t.Fatalf("duplicate entry (size %d, seed %d, entry %d): %d", 98 | size, s, i, v[s]) 99 | } 100 | seen[s][v[s]] = struct{}{} 101 | vN := p[s][1].Nth(i) 102 | if vN != v[s] { 103 | t.Fatalf("Nth entry didn't match Nth call to Next()) (size %d, seed %d, n %d): expected %d, got %d\n", 104 | size, s, i, v[s], vN) 105 | } 106 | } 107 | } 108 | // assuming number of outcomes is more than about 16, matches are pretty rare if nothing 109 | // is wrong. 110 | if (matches * 8) > (size * seeds) { 111 | t.Fatalf("too many matches: %d values to permute, %d seeds, %d matches seems suspicious.", 112 | size, seeds, matches) 113 | } else { 114 | t.Logf("permuting %d values across %d seeds: %d matches (OK)", size, seeds, matches) 115 | } 116 | } 117 | 118 | func Benchmark_PermuteCycle(b *testing.B) { 119 | sizes := []int64{5, 63, 1000000, (1 << 19)} 120 | for _, size := range sizes { 121 | b.Run(fmt.Sprintf("Pool%d", size), func(b *testing.B) { 122 | p := PermutationOrBust(size, 0, "", b) 123 | for i := 0; i < b.N; i++ { 124 | _ = p.Next() 125 | } 126 | }) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /apophenia/README.md: -------------------------------------------------------------------------------- 1 | # Apophenia -- seeking patterns in randomness 2 | 3 | Apophenia provides an approximate emulation of a seekable pseudo-random 4 | number generator. You provide a seed, and get a generator which can generate 5 | a large number of pseudo-random bits which will occur in a predictable 6 | pattern, but you can seek anywhere in that pattern in constant time. 7 | 8 | Apophenia's interface is intended to be similar to that of stdlib's 9 | `math/rand`. In fact, the `Sequence` interface type is a strict superset 10 | of `rand.Source`. 11 | 12 | ## Implementation Notes 13 | 14 | AES-128, for a given seed, consists of a PRNG which, from 2^128 input 15 | coordinates, produces 2^128 possible outputs, each of which is 128 bits 16 | long. This is equivalent to a single-bit PRNG of period 2^135. It is 17 | also possible to treat the input 128 bits as a coordinate system of 18 | some sort, to allow multiple parallel sequences, etcetera. 19 | 20 | This design may have serious fundamental flaws, but it worked out in 21 | light testing and I'm an optimist. 22 | 23 | ### Sequences and Offsets 24 | 25 | Apophenia's underlying implementation admits 128-bit keys, and 128-bit 26 | offsets within each sequence. In most cases: 27 | 28 | * That's more space than we need. 29 | * Working with a non-native type for item numbers is annoying, 30 | but 64-bits is enough range. 31 | * It would be nice to avoid using the *same* pseudo-random values 32 | for different things. 33 | * Even when those things have the same basic identifying ID or 34 | value. 35 | 36 | For instance, say you wish to generate a billion items. Each item should 37 | have several "random" values. Some values might follow a Zipf distribution, 38 | others might just be "U % N" for some N. If you use the item number as a 39 | key, and seek to the same position for each of these, and get the same bits 40 | for each of these, you may get unintended similarities or correlations between 41 | them. 42 | 43 | With this in mind, apophenia divides its 128-bit offset space into a number 44 | of spaces. The most significant bits are used for a sequence-type value, one 45 | of: 46 | 47 | * SequenceDefault 48 | * SequencePermutationK/SequencePermutationF: permutations 49 | * SequenceWeighted: weighted bits 50 | * SequenceLinear: linear values within a range 51 | * SequenceZipfU: uniforms to use for Zipf values 52 | * SequenceRandSource: default offsets for the rand.Source 53 | * SequenceUser1/SequenceUser2: reserved for non-apophenia usage 54 | 55 | Other values are not yet defined, but are reserved. 56 | 57 | Within most of these spaces, the rest of the high word of the offset is used 58 | for a 'seed' (used to select different sequences) and an 'iteration' (used 59 | for successive values consumed by an algorithm). The low-order word is treated 60 | as a 64-bit item ID. 61 | 62 | ``` 63 | High-order word: 64 | 0 1 2 3 65 | 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef 66 | [iteration ][seed ][seq ] 67 | Low-order word: 68 | 0 1 2 3 69 | 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef 70 | [id ] 71 | ``` 72 | 73 | The convenience function `OffsetFor(sequence, seed, iteration, id)` 74 | supports this usage. 75 | 76 | As a side effect, if generating additional values for a given seed and 77 | id, you can increment the high-order word of the `Uint128`, 78 | and if generating values for a new id, you can increment the low-order 79 | word. If your algorithm consumes more than 2^24 values for a single 80 | operation, you could start hitting values shared with other seeds. Oh, 81 | well. 82 | 83 | #### Iteration usage 84 | 85 | For the built-in consumers: 86 | 87 | * Weighted consumes log2(scale) iterated values. 88 | * Zipf consumes an *average* of no more than about 1.1 values. 89 | * Permutation consumes one iterated value per 128 rounds of permutation, 90 | where rounds is equal to `6*ceil(log2(max))`. (For instance, a second 91 | value is consumed around a maximum of 2^22, and a third around 2^43.) 92 | * Nothing else uses more than one iterated value. 93 | -------------------------------------------------------------------------------- /apophenia/zipf.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | ) 7 | 8 | // Zipf produces a series of values following a Zipf distribution. 9 | // It is initialized with values q, v, and max, and produces values 10 | // in the range [0,max) such that the probability of a value k is 11 | // proportional to (v+k) ** -q. v must be >= 1, q must be > 1. 12 | // 13 | // This is based on the same paper used for the golang stdlib Zipf 14 | // distribution: 15 | // 16 | // "Rejection-Inversion to Generate Variates 17 | // from Monotone Discrete Distributions" 18 | // W.Hormann, G.Derflinger [1996] 19 | // http://eeyore.wu-wien.ac.at/papers/96-04-04.wh-der.ps.gz 20 | // 21 | // This implementation differs from stdlib's in that it is seekable; you 22 | // can get the Nth value in a theoretical series of results in constant 23 | // time, without having to generate the whole series linearly. 24 | type Zipf struct { 25 | src Sequence 26 | seed uint32 27 | q float64 28 | v float64 29 | max float64 30 | oneMinusQ float64 31 | oneOverOneMinusQ float64 32 | h func(float64) float64 33 | hInv func(float64) float64 34 | hImaxOneHalf float64 35 | hX0MinusHImaxOneHalf float64 // hX0 is only ever used as hX0 - h(i[max] + 1/2) 36 | s float64 37 | idx uint64 38 | } 39 | 40 | // NewZipf returns a new Zipf object with the specified q, v, and 41 | // max, and with its random source seeded in some way by seed. 42 | // The sequence of values returned is consistent for a given set 43 | // of inputs. The seed parameter can select one of multiple sub-sequences 44 | // of the given sequence. 45 | func NewZipf(q float64, v float64, max uint64, seed uint32, src Sequence) (z *Zipf, err error) { 46 | if math.IsNaN(q) || math.IsNaN(v) { 47 | return nil, fmt.Errorf("q (%g) and v (%g) must not be NaN for Zipf distribution", q, v) 48 | } 49 | if q <= 1 || v < 1 { 50 | return nil, fmt.Errorf("need q > 1 (got %g) and v >= 1 (got %g) for Zipf distribution", q, v) 51 | } 52 | if src == nil { 53 | return nil, fmt.Errorf("need a usable PRNG apophenia.Sequence") 54 | } 55 | oneMinusQ := 1 - q 56 | oneOverOneMinusQ := 1 / (1 - q) 57 | z = &Zipf{ 58 | q: q, 59 | v: v, 60 | max: float64(max), 61 | seed: seed, 62 | oneMinusQ: oneMinusQ, 63 | oneOverOneMinusQ: oneOverOneMinusQ, 64 | } 65 | z.h = func(x float64) float64 { 66 | return math.Exp((1-q)*math.Log(v+x)) * oneOverOneMinusQ 67 | } 68 | z.hInv = func(x float64) float64 { 69 | return -v + math.Exp(oneOverOneMinusQ*math.Log(oneMinusQ*x)) 70 | } 71 | hX0 := z.h(0.5) - math.Exp(math.Log(v)*-q) 72 | z.hImaxOneHalf = z.h(z.max + 0.5) 73 | z.hX0MinusHImaxOneHalf = hX0 - z.hImaxOneHalf 74 | z.s = 1 - z.hInv(z.h(1.5)-math.Exp(math.Log(v+1)*-q)) 75 | z.src = src 76 | if err != nil { 77 | return nil, err 78 | } 79 | return z, nil 80 | } 81 | 82 | // Nth returns the Nth value from the sequence associated with the 83 | // given Zipf. For a given set of input values (q, v, max, and seed), 84 | // and a given index, the same value is returned. 85 | func (z *Zipf) Nth(index uint64) uint64 { 86 | z.idx = index 87 | offset := OffsetFor(SequenceZipfU, z.seed, 0, index) 88 | for { 89 | bits := z.src.BitsAt(offset) 90 | uInt := bits.Lo 91 | u := float64(uInt&(1<<53-1)) / (1 << 53) 92 | u = z.hImaxOneHalf + u*z.hX0MinusHImaxOneHalf 93 | x := z.hInv(u) 94 | k := math.Floor(x + 0.5) 95 | if k-x <= z.s { 96 | return uint64(k) 97 | } 98 | if u >= z.h(k+0.5)-math.Exp(-math.Log(z.v+k)*z.q) { 99 | return uint64(k) 100 | } 101 | // the low-order 24 bits of the high-order 64-bit word 102 | // are the "iteration", which started as zero. Assuming we 103 | // don't need more than ~16.7M values, we're good. The expected 104 | // average is about 1.1. 105 | offset.Hi++ 106 | } 107 | } 108 | 109 | // Next returns the "next" value -- the one after the last one requested, or 110 | // value 1 if none have been requested before. 111 | func (z *Zipf) Next() uint64 { 112 | return z.Nth(z.idx + 1) 113 | } 114 | -------------------------------------------------------------------------------- /apophenia/apophenia.go: -------------------------------------------------------------------------------- 1 | // Package apophenia provides seekable pseudo-random numbers, allowing 2 | // reproducibility of pseudo-random results regardless of the order they're 3 | // generated in. 4 | package apophenia 5 | 6 | import ( 7 | "crypto/aes" 8 | "crypto/cipher" 9 | "encoding/binary" 10 | "math/rand" 11 | ) 12 | 13 | // Sequence represents a specific deterministic but pseudo-random-ish series 14 | // of bits. A Sequence can be used as a `rand.Source` or `rand.Source64` 15 | // for `math/rand`. 16 | type Sequence interface { 17 | rand.Source 18 | Seek(Uint128) Uint128 19 | BitsAt(Uint128) Uint128 20 | } 21 | 22 | // aesSequence128 implements Sequence on top of an AES block cipher. 23 | type aesSequence128 struct { 24 | key [16]byte 25 | cipher cipher.Block 26 | plainText, cipherText [16]byte 27 | offset Uint128 28 | err error 29 | } 30 | 31 | // NewSequence generates a sequence initialized with the given seed. 32 | func NewSequence(seed int64) Sequence { 33 | s := aesSequence128{offset: OffsetFor(SequenceRandSource, 0, 0, 0)} 34 | s.Seed(seed) 35 | if s.err != nil { 36 | panic("impossible error: " + s.err.Error()) 37 | } 38 | return &s 39 | } 40 | 41 | // Seed sets the generator to a known state. 42 | func (s *aesSequence128) Seed(seed int64) { 43 | var newKey [16]byte 44 | binary.LittleEndian.PutUint64(newKey[:8], uint64(seed)) 45 | newCipher, err := aes.NewCipher(newKey[:]) 46 | if err != nil { 47 | // we can't return an error, because Seed() can't fail. also 48 | // note that this can't actually happen, supposedly. 49 | s.err = err 50 | return 51 | } 52 | copy(s.key[:], newKey[:]) 53 | s.cipher = newCipher 54 | s.offset = Uint128{0, 0} 55 | } 56 | 57 | // Int63 returns a value in 0..(1<<63)-1. 58 | func (s *aesSequence128) Int63() int64 { 59 | return int64(s.Uint64() >> 1) 60 | } 61 | 62 | // Uint64 returns a value in 0..(1<<64)-1. 63 | func (s *aesSequence128) Uint64() uint64 { 64 | out := s.BitsAt(s.offset) 65 | s.offset.Inc() 66 | return out.Lo 67 | } 68 | 69 | // SequenceClass denotes one of the sequence types, which are used to allow 70 | // sequences to avoid hitting each other's pseudo-random results. 71 | type SequenceClass uint8 72 | 73 | const ( 74 | // SequenceDefault is the zero value, used if you didn't think to pick one. 75 | SequenceDefault SequenceClass = iota 76 | // SequencePermutationK is the K values for the permutation algorithm. 77 | SequencePermutationK 78 | // SequencePermutationF is the F values for the permutation algorithm. 79 | SequencePermutationF 80 | // SequenceWeighted is used to generate weighted values for a given 81 | // position. 82 | SequenceWeighted 83 | // SequenceLinear is the random numbers for U%N type usage. 84 | SequenceLinear 85 | // SequenceZipfU is the random numbers for the Zipf computations. 86 | SequenceZipfU 87 | // SequenceRandSource is used by default when a Sequence is being 88 | // used as a rand.Source. 89 | SequenceRandSource 90 | // SequenceUser1 is eserved for non-apophenia package usage. 91 | SequenceUser1 92 | // SequenceUser2 is reserved for non-apophenia package usage. 93 | SequenceUser2 94 | ) 95 | 96 | // OffsetFor determines the Uint128 offset for a given class/seed/iteration/id. 97 | func OffsetFor(class SequenceClass, seed uint32, iter uint32, id uint64) Uint128 { 98 | return Uint128{Hi: (uint64(class) << 56) | (uint64(seed) << 24) | uint64(iter), 99 | Lo: id} 100 | } 101 | 102 | // Seek seeks to the specified offset, yielding the previous offset. This 103 | // sets the stream to a specific point in its cycle, affecting future calls 104 | // to Int63 or Uint64. 105 | func (s *aesSequence128) Seek(offset Uint128) (old Uint128) { 106 | old, s.offset = s.offset, offset 107 | return old 108 | } 109 | 110 | // BitsAt yields the sequence of bits at the provided offset into the stream. 111 | func (s *aesSequence128) BitsAt(offset Uint128) (out Uint128) { 112 | binary.LittleEndian.PutUint64(s.plainText[:8], offset.Lo) 113 | binary.LittleEndian.PutUint64(s.plainText[8:], offset.Hi) 114 | s.cipher.Encrypt(s.cipherText[:], s.plainText[:]) 115 | out.Lo, out.Hi = binary.LittleEndian.Uint64(s.cipherText[:8]), binary.LittleEndian.Uint64(s.cipherText[8:]) 116 | return out 117 | } 118 | -------------------------------------------------------------------------------- /bench/zipf.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "math" 8 | "math/rand" 9 | "os" 10 | "time" 11 | 12 | "github.com/pilosa/go-pilosa" 13 | "github.com/pilosa/tools/apophenia" 14 | ) 15 | 16 | // ZipfBenchmark sets random bits according to the Zipf-Mandelbrot distribution. 17 | // This distribution accepts two parameters, Exponent and Ratio, for both rows and columns. 18 | // It also uses PermutationGenerator to permute IDs randomly. 19 | type ZipfBenchmark struct { 20 | Name string `json:"name"` 21 | MinRowID int64 `json:"min-row-id"` 22 | MinColumnID int64 `json:"min-column-id"` 23 | MaxRowID int64 `json:"max-row-id"` 24 | MaxColumnID int64 `json:"max-column-id"` 25 | Iterations int `json:"iterations"` 26 | Seed int64 `json:"seed"` 27 | Index string `json:"index"` 28 | Field string `json:"field"` 29 | RowExponent float64 `json:"row-exponent"` 30 | RowRatio float64 `json:"row-ratio"` 31 | ColumnExponent float64 `json:"column-exponent"` 32 | ColumnRatio float64 `json:"column-ratio"` 33 | Operation string `json:"operation"` 34 | 35 | Logger *log.Logger `json:"-"` 36 | } 37 | 38 | // NewZipfBenchmark returns a new instance of ZipfBenchmark. 39 | func NewZipfBenchmark() *ZipfBenchmark { 40 | return &ZipfBenchmark{ 41 | Name: "zipf", 42 | Logger: log.New(os.Stderr, "", log.LstdFlags), 43 | } 44 | } 45 | 46 | // Run runs the Zipf benchmark 47 | func (b *ZipfBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 48 | result := NewResult() 49 | result.AgentNum = agentNum 50 | result.Configuration = b 51 | 52 | // Initialize schema. 53 | _, field, err := ensureSchema(client, b.Index, b.Field) 54 | if err != nil { 55 | return result, err 56 | } 57 | 58 | seed := b.Seed + int64(agentNum) 59 | rowOffset := getZipfOffset(b.MaxRowID-b.MinRowID, b.RowExponent, b.RowRatio) 60 | rowRand := rand.NewZipf(rand.New(rand.NewSource(seed)), b.RowExponent, rowOffset, uint64(b.MaxRowID-b.MinRowID-1)) 61 | columnOffset := getZipfOffset(b.MaxColumnID-b.MinColumnID, b.ColumnExponent, b.ColumnRatio) 62 | columnRand := rand.NewZipf(rand.New(rand.NewSource(seed)), b.ColumnExponent, columnOffset, uint64(b.MaxColumnID-b.MinColumnID-1)) 63 | rowSeq := apophenia.NewSequence(seed) 64 | colSeq := apophenia.NewSequence(seed + 1) 65 | rowPerm, err := apophenia.NewPermutation(b.MaxRowID-b.MinRowID, 0, rowSeq) 66 | if err != nil { 67 | return result, err 68 | } 69 | columnPerm, err := apophenia.NewPermutation(b.MaxColumnID-b.MinColumnID, 0, colSeq) 70 | if err != nil { 71 | return result, err 72 | } 73 | 74 | for n := 0; n < b.Iterations; n++ { 75 | // generate IDs from Zipf distribution 76 | rowIDOriginal := rowRand.Uint64() 77 | profIDOriginal := columnRand.Uint64() 78 | 79 | // permute IDs randomly, but repeatably 80 | rowID := rowPerm.Nth(int64(rowIDOriginal)) 81 | profID := columnPerm.Nth(int64(profIDOriginal)) 82 | 83 | var q pilosa.PQLQuery 84 | switch b.Operation { 85 | case "set": 86 | q = field.Set(b.MinRowID+int64(rowID), b.MinColumnID+int64(profID)) 87 | case "clear": 88 | q = field.Clear(b.MinRowID+int64(rowID), b.MinColumnID+int64(profID)) 89 | default: 90 | return result, fmt.Errorf("Unsupported operation: \"%s\" (must be \"set\" or \"clear\")", b.Operation) 91 | } 92 | 93 | start := time.Now() 94 | _, err := client.Query(q) 95 | result.Add(time.Since(start), nil) 96 | if err != nil { 97 | return result, err 98 | } 99 | } 100 | return result, err 101 | } 102 | 103 | // Offset is the true parameter used by the Zipf distribution, but the ratio, 104 | // as defined here, is a simpler, readable way to define the distribution. 105 | // Offset is in [1, inf), and its meaning depends on N (a pain for updating benchmark configs) 106 | // ratio is in (0, 1), and its meaning does not depend on N. 107 | // it is the ratio of the lowest probability in the distribution to the highest. 108 | // ratio=0.01 corresponds to a very small offset - the most skewed distribution for a given pair (N, exp) 109 | // ratio=0.99 corresponds to a very large offset - the most nearly uniform distribution for a given (N, exp) 110 | func getZipfOffset(N int64, exp, ratio float64) float64 { 111 | z := math.Pow(ratio, 1/exp) 112 | return z * float64(N-1) / (1 - z) 113 | } 114 | -------------------------------------------------------------------------------- /apophenia/int128.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import "fmt" 4 | 5 | // Uint128 is an array of 2 uint64, treated as a single 6 | // object to simplify calling conventions. 7 | type Uint128 struct { 8 | Lo, Hi uint64 // low-order and high-order uint64 words. Value is ``(Hi << 64) | Lo`. 9 | } 10 | 11 | // Add adds value to its receiver in place. 12 | func (u *Uint128) Add(value Uint128) { 13 | u.Lo += value.Lo 14 | if u.Lo < value.Lo { 15 | u.Hi++ 16 | } 17 | u.Hi += value.Hi 18 | } 19 | 20 | // Sub subtracts value from its receiver in place. 21 | func (u *Uint128) Sub(value Uint128) { 22 | u.Lo -= value.Lo 23 | if u.Lo > value.Lo { 24 | u.Hi-- 25 | } 26 | u.Hi -= value.Hi 27 | } 28 | 29 | // And does a bitwise and with value, in place. 30 | func (u *Uint128) And(value Uint128) { 31 | u.Lo, u.Hi = u.Lo&value.Lo, u.Hi&value.Hi 32 | } 33 | 34 | // Or does a bitwise or with value, in place. 35 | func (u *Uint128) Or(value Uint128) { 36 | u.Lo, u.Hi = u.Lo|value.Lo, u.Hi|value.Hi 37 | } 38 | 39 | // Xor does a bitwise xor with value, in place. 40 | func (u *Uint128) Xor(value Uint128) { 41 | u.Lo, u.Hi = u.Lo^value.Lo, u.Hi^value.Hi 42 | } 43 | 44 | // Not does a bitwise complement in place. 45 | func (u *Uint128) Not() { 46 | u.Lo, u.Hi = ^u.Lo, ^u.Hi 47 | } 48 | 49 | // Mask produces a mask of the lower n bits of u. 50 | func (u *Uint128) Mask(n uint64) { 51 | if n >= 128 { 52 | return 53 | } 54 | if n >= 64 { 55 | u.Hi &= (1 << (n & 63)) - 1 56 | } else { 57 | u.Lo &= (1 << n) - 1 58 | } 59 | } 60 | 61 | // Mask produces a bitmask with n bits set. 62 | func Mask(n uint64) (u Uint128) { 63 | if n >= 128 { 64 | u.Not() 65 | return u 66 | } 67 | if n >= 64 { 68 | u.Lo-- 69 | u.Hi = (1 << n & 63) - 1 70 | return u 71 | } 72 | u.Lo = (1 << n) - 1 73 | return u 74 | } 75 | 76 | // String provides a string representation. 77 | func (u Uint128) String() string { 78 | return fmt.Sprintf("0x%x%016x", u.Hi, u.Lo) 79 | } 80 | 81 | // bit rotation: for 1-63 bits, we are moving the low-order N bits of u.Lo 82 | // into the high-order N bits of u.Hi, and vice versa. For 64-127, it's that 83 | // plus swapping u.Lo and u.Hi. 84 | 85 | // RotateRight rotates u right by n bits. 86 | func (u *Uint128) RotateRight(n uint64) { 87 | if n&64 != 0 { 88 | u.Lo, u.Hi = u.Hi, u.Lo 89 | } 90 | n &= 63 91 | if n == 0 { 92 | return 93 | } 94 | unbits := 64 - n 95 | 96 | u.Lo, u.Hi = (u.Lo>>n)|(u.Hi<>n)|(u.Lo<>unbits), (u.Hi<>unbits) 111 | } 112 | 113 | // ShiftRight shifts u right by n bits. 114 | func (u *Uint128) ShiftRight(n uint64) { 115 | if n > 127 { 116 | u.Lo, u.Hi = 0, 0 117 | return 118 | } 119 | if n >= 64 { 120 | u.Lo, u.Hi = u.Hi>>(n&63), 0 121 | return 122 | } 123 | unbits := 64 - n 124 | 125 | u.Lo, u.Hi = (u.Lo>>n)|(u.Hi<> n) 126 | } 127 | 128 | // ShiftRightCarry returns both the shifted value and the bits that 129 | // were shifted out. Useful for when you want both x%N and x/N for 130 | // N a power of 2. Only sane if bits <= 64. 131 | func (u *Uint128) ShiftRightCarry(n uint64) (out Uint128, carry uint64) { 132 | if n > 64 { 133 | return out, carry 134 | } 135 | if n == 64 { 136 | out.Lo, carry = u.Hi, u.Lo 137 | return out, carry 138 | } 139 | unbits := 64 - n 140 | 141 | out.Lo, out.Hi, carry = (u.Lo>>n)|(u.Hi<> n), u.Lo&((1< 127 { 148 | u.Lo, u.Hi = 0, 0 149 | return 150 | } 151 | if n >= 64 { 152 | u.Lo, u.Hi = 0, u.Lo<<(n&63) 153 | return 154 | } 155 | n &= 63 156 | if n == 0 { 157 | return 158 | } 159 | unbits := 64 - n 160 | 161 | u.Lo, u.Hi = (u.Lo << n), (u.Hi<>unbits) 162 | } 163 | 164 | // Bit returns 1 if the nth bit is set, 0 otherwise. 165 | func (u *Uint128) Bit(n uint64) uint64 { 166 | if n >= 128 { 167 | return 0 168 | } 169 | if n >= 64 { 170 | return (u.Hi >> (n & 63)) & 1 171 | } 172 | return (u.Lo >> n) & 1 173 | } 174 | 175 | // Inc increments its receiver in place. 176 | func (u *Uint128) Inc() { 177 | u.Lo++ 178 | if u.Lo == 0 { 179 | u.Hi++ 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /dx/ingest.go: -------------------------------------------------------------------------------- 1 | package dx 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "os" 9 | "path/filepath" 10 | "strconv" 11 | "sync" 12 | "time" 13 | 14 | "github.com/pilosa/tools/imagine" 15 | "github.com/pkg/errors" 16 | "github.com/spf13/cobra" 17 | ) 18 | 19 | // NewIngestCommand initializes an ingest command. 20 | func NewIngestCommand(m *Main) *cobra.Command { 21 | ingestCmd := &cobra.Command{ 22 | Use: "ingest", 23 | Short: "ingest on cluster/s using imagine", 24 | Long: `Perform ingest the cluster/s using imagine.`, 25 | Run: func(cmd *cobra.Command, args []string) { 26 | if err := ExecuteIngest(m); err != nil { 27 | if m.Verbose { 28 | fmt.Printf("%+v\n", err) 29 | } else { 30 | fmt.Printf("%v\n", err) 31 | } 32 | os.Exit(1) 33 | } 34 | }, 35 | } 36 | 37 | flags := ingestCmd.PersistentFlags() 38 | flags.StringVarP(&m.Prefix, "prefix", "p", "dx-", "Prefix to use for index") 39 | flags.StringSliceVar(&m.SpecFiles, "specfiles", nil, "Path to imagine spec file") 40 | ingestCmd.MarkFlagRequired("specfile") 41 | 42 | return ingestCmd 43 | } 44 | 45 | // ExecuteIngest executes an ingest command on the cluster/s, ensuring that the order of clusters 46 | // specified in the flags corresponds to the filenames that the results are saved in. 47 | func ExecuteIngest(m *Main) error { 48 | for _, file := range m.SpecFiles { 49 | found, err := checkFileExists(file) 50 | if err != nil { 51 | return errors.Wrapf(err, "error checking existence of %v", file) 52 | } 53 | if !found { 54 | return errors.Errorf("%s does not exist", file) 55 | } 56 | } 57 | 58 | path, err := makeFolder(cmdIngest, m.DataDir) 59 | if err != nil { 60 | return errors.Wrap(err, "error creating folder for ingest results") 61 | } 62 | 63 | // TODO: copy spec file? 64 | configs := make([]*imagine.Config, 0) 65 | 66 | allClusterHosts := getAllClusterHosts(m.Hosts) 67 | for _, clusterHosts := range allClusterHosts { 68 | config := newConfig(clusterHosts, m.SpecFiles, m.Prefix, m.ThreadCount) 69 | configs = append(configs, config) 70 | } 71 | 72 | var wg sync.WaitGroup 73 | 74 | for i, config := range configs { 75 | wg.Add(1) 76 | go func(i int, config *imagine.Config) { 77 | defer wg.Done() 78 | ingestAndWriteResult(i, config, path) 79 | }(i, config) 80 | } 81 | 82 | wg.Wait() 83 | fmt.Printf("result(s) successfully saved in %s\n", path) 84 | return nil 85 | } 86 | 87 | func ingestAndWriteResult(instanceNum int, config *imagine.Config, path string) { 88 | bench, err := ingestOnInstance(config) 89 | if err != nil { 90 | log.Printf("error ingesting on instance %v: %+v", instanceNum, err) 91 | } 92 | 93 | filename := strconv.Itoa(instanceNum) 94 | if err := writeResultFile(bench, filename, path); err != nil { 95 | log.Printf("error writing result file: %v", err) 96 | } 97 | } 98 | 99 | // runIngestOnInstance ingests data based on a config file. 100 | func ingestOnInstance(conf *imagine.Config) (*Benchmark, error) { 101 | bench := NewBenchmark() 102 | bench.Type = cmdIngest 103 | bench.ThreadCount = conf.ThreadCount 104 | 105 | err := conf.ReadSpecs() 106 | if err != nil { 107 | return nil, errors.Wrap(err, "error reading spec from config") 108 | } 109 | 110 | client, err := initializeClient(conf.Hosts...) 111 | if err != nil { 112 | return nil, errors.Wrap(err, "error creating Pilosa client") 113 | } 114 | 115 | if err = conf.UpdateIndexes(client); err != nil { 116 | return nil, errors.Wrap(err, "error updating indexes") 117 | } 118 | 119 | now := time.Now() 120 | err = conf.ApplyWorkloads(client) 121 | if err != nil { 122 | return nil, errors.Wrap(err, "error applying workloads") 123 | } 124 | 125 | bench.Time.Duration = time.Since(now) 126 | return bench, nil 127 | } 128 | 129 | // writeResultFile writes the results of a SoloBenchmark to a JSON file. 130 | func writeResultFile(bench *Benchmark, filename, dir string) error { 131 | jsonBytes, err := json.Marshal(bench) 132 | if err != nil { 133 | return errors.Wrap(err, "could not marshal results to JSON") 134 | } 135 | 136 | path := filepath.Join(dir, filename) 137 | if err = ioutil.WriteFile(path, jsonBytes, 0666); err != nil { 138 | return errors.Wrap(err, "could not write JSON to file") 139 | } 140 | return nil 141 | } 142 | 143 | func newConfig(hosts []string, specFiles []string, prefix string, threadCount int) *imagine.Config { 144 | conf := &imagine.Config{ 145 | Hosts: hosts, 146 | Prefix: prefix, 147 | ThreadCount: threadCount, 148 | } 149 | conf.NewSpecsFiles(specFiles) 150 | 151 | return conf 152 | } 153 | -------------------------------------------------------------------------------- /apophenia/permute.go: -------------------------------------------------------------------------------- 1 | package apophenia 2 | 3 | import ( 4 | "errors" 5 | "math/bits" 6 | ) 7 | 8 | // PermutationGenerator provides a way to pass integer IDs through a permutation 9 | // map that is pseudorandom but repeatable. This could be done with rand.Perm, 10 | // but that would require storing a slice of [Items]int64, which we want to avoid 11 | // for large values of Items. 12 | // 13 | // Not actually cryptographically secure. 14 | type Permutation struct { 15 | src Sequence 16 | permSeed uint32 17 | max int64 18 | counter int64 19 | rounds int 20 | bits Uint128 21 | k []uint64 22 | } 23 | 24 | // Design notes: 25 | // 26 | // This is based on: 27 | // http://arxiv.org/abs/1208.1176v2 28 | // 29 | // This simulates the results of a shuffle in a way allowing a lookup of 30 | // the results of the shuffle for any given position, in time proportional 31 | // to a number of "rounds", each of which is 50% likely to swap a slot 32 | // with another slot. The number of rounds needed to achieve a reasonable 33 | // probability of safety is log(N)*6 or so. 34 | // 35 | // Each permutation is fully defined by a "key", consisting of: 36 | // 1. A key "KF" naming a value in [0,max) for each round. 37 | // 2. A series of round functions mapping values in [0,max) to bits, 38 | // one for each round. 39 | // I refer to these as K[r] and F[r]. Thus, K[0] is the index used to 40 | // compute swap operations four round 0, and F[0] is the series of bits 41 | // used to determine whether a swap is performed, with F[0][0] being 42 | // the swap decision for slot 0 in round 0. (Except it probably isn't, 43 | // because the swap decision is actually made based on the highest index 44 | // in a pair, to ensure that a swap between A and B always uses the same 45 | // decision bit.) 46 | // 47 | // K values are generated using the SequencePermutationK range of offsets, 48 | // with 49 | // 50 | // For F values, we set byte 8 of the plain text to 0x00, and use 51 | // encoding/binary to dump the slot number into the first 8 bytes. This 52 | // yields 128 values, which we treat as the values for the first 128 rounds, 53 | // and then recycle for rounds 129+ if those exist. This is not very 54 | // secure, but we're already at 1/2^128 chances by that time and don't care. 55 | // We could probably trim rounds to 64 or so and not lose much data. 56 | 57 | // NewPermutation creates a Permutation which generates values in [0,m), 58 | // from a given Sequence and seed value. 59 | // 60 | // The seed parameter selects different shuffles, and is useful if you need 61 | // to generate multiple distinct shuffles from the same underlying sequence. 62 | // Treat it as a secondary seed. 63 | func NewPermutation(max int64, seed uint32, src Sequence) (*Permutation, error) { 64 | if max < 1 { 65 | return nil, errors.New("period must be positive") 66 | } 67 | // number of rounds to get "good" results is roughly 6 log N. 68 | bits := 64 - bits.LeadingZeros64(uint64(max)) 69 | p := Permutation{max: max, rounds: 6 * bits, counter: 0} 70 | 71 | p.src = src 72 | p.k = make([]uint64, p.rounds) 73 | p.permSeed = seed 74 | offset := OffsetFor(SequencePermutationK, p.permSeed, 0, 0) 75 | for i := uint64(0); i < uint64(p.rounds); i++ { 76 | offset.Lo = i 77 | p.k[i] = p.src.BitsAt(offset).Lo % uint64(p.max) 78 | } 79 | return &p, nil 80 | } 81 | 82 | // Next generates the next value from the permutation. 83 | func (p *Permutation) Next() (ret int64) { 84 | return p.nextValue() 85 | } 86 | 87 | // Nth generates the Nth value from the permutation. For instance, 88 | // given a new permutation, calling Next once produces the same 89 | // value you'd get from calling Nth(0). This is a seek which changes 90 | // the offset that Next will count from; after calling Nth(x), you 91 | // would get the same result from Next() that you would from Nth(x+1). 92 | func (p *Permutation) Nth(n int64) (ret int64) { 93 | p.counter = n 94 | ret = p.nextValue() 95 | return ret 96 | } 97 | 98 | func (p *Permutation) nextValue() int64 { 99 | p.counter = int64(uint64(p.counter) % uint64(p.max)) 100 | x := uint64(p.counter) 101 | p.counter++ 102 | // a value which can't possibly be the next value we need, so we 103 | // always hash on the first pass. 104 | prev := uint64(p.max) + 1 105 | offset := OffsetFor(SequencePermutationF, p.permSeed, 0, 0) 106 | for i := uint64(0); i < uint64(p.rounds); i++ { 107 | if i > 0 && i&127 == 0 { 108 | offset.Hi++ 109 | // force regeneration of bits down below 110 | prev = uint64(p.max) + 1 111 | } 112 | xPrime := (p.k[i] + uint64(p.max) - x) % uint64(p.max) 113 | xCaret := x 114 | if xPrime > xCaret { 115 | xCaret = xPrime 116 | } 117 | if xCaret != prev { 118 | offset.Lo = xCaret 119 | p.bits = p.src.BitsAt(offset) 120 | prev = xCaret 121 | } 122 | if p.bits.Bit(i) != 0 { 123 | x = xPrime 124 | } 125 | } 126 | return int64(x) 127 | } 128 | -------------------------------------------------------------------------------- /cmd/pi/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/tls" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "net/http" 9 | _ "net/http/pprof" 10 | "os" 11 | "strings" 12 | 13 | "github.com/pilosa/go-pilosa" 14 | "github.com/pilosa/tools" 15 | "github.com/spf13/cobra" 16 | "github.com/spf13/pflag" 17 | "github.com/spf13/viper" 18 | ) 19 | 20 | func main() { 21 | go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }() 22 | 23 | if err := NewRootCommand().Execute(); err != nil { 24 | fmt.Println(err) 25 | os.Exit(1) 26 | } 27 | } 28 | 29 | func NewRootCommand() *cobra.Command { 30 | rc := &cobra.Command{ 31 | Use: "pi", 32 | Short: "Pilosa Tools", 33 | Long: `Contains various benchmarking and cluster creation and management tools for 34 | Pilosa. Try "pi --help for more information." 35 | 36 | Version: ` + tools.Version + ` 37 | Build Time: ` + tools.BuildTime + "\n", 38 | PersistentPreRunE: func(cmd *cobra.Command, args []string) error { 39 | v := viper.New() 40 | err := setAllConfig(v, cmd.Flags(), "PI") 41 | if err != nil { 42 | return err 43 | } 44 | 45 | // return "dry run" error if "dry-run" flag is set 46 | if ret, err := cmd.Flags().GetBool("dry-run"); ret && err == nil { 47 | if cmd.Parent() != nil { 48 | return fmt.Errorf("dry run") 49 | } else if err != nil { 50 | return fmt.Errorf("problem getting dry-run flag: %v", err) 51 | } 52 | } 53 | 54 | return nil 55 | }, 56 | } 57 | rc.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose logging.") 58 | rc.PersistentFlags().Bool("dry-run", false, "Stop before executing. Useful for testing.") 59 | _ = rc.PersistentFlags().MarkHidden("dry-run") 60 | rc.PersistentFlags().StringP("config", "c", "", "Configuration file to read from.") 61 | _ = rc.PersistentFlags().MarkHidden("config") 62 | 63 | rc.AddCommand(NewBenchCommand()) 64 | rc.AddCommand(NewReplayCommand()) 65 | 66 | rc.SetOutput(os.Stderr) 67 | return rc 68 | } 69 | 70 | // setAllConfig takes a FlagSet to be the definition of all configuration 71 | // options, as well as their defaults. It then reads from the command line, the 72 | // environment, and a config file (if specified), and applies the configuration 73 | // in that priority order. Since each flag in the set contains a pointer to 74 | // where its value should be stored, setAllConfig can directly modify the value 75 | // of each config variable. 76 | // 77 | // setAllConfig looks for environment variables which are capitalized versions 78 | // of the flag names with dashes replaced by underscores, and prefixed with 79 | // envPrefix plus an underscore. 80 | func setAllConfig(v *viper.Viper, flags *pflag.FlagSet, envPrefix string) error { 81 | // add cmd line flag def to viper 82 | err := v.BindPFlags(flags) 83 | if err != nil { 84 | return err 85 | } 86 | 87 | // add env to viper 88 | v.SetEnvPrefix(envPrefix) 89 | v.SetEnvKeyReplacer(strings.NewReplacer("-", "_")) 90 | v.AutomaticEnv() 91 | 92 | c := v.GetString("config") 93 | var flagErr error 94 | validTags := make(map[string]bool) 95 | flags.VisitAll(func(f *pflag.Flag) { 96 | validTags[f.Name] = true 97 | }) 98 | 99 | // add config file to viper 100 | if c != "" { 101 | v.SetConfigFile(c) 102 | v.SetConfigType("toml") 103 | err := v.ReadInConfig() 104 | if err != nil { 105 | return fmt.Errorf("error reading configuration file '%s': %v", c, err) 106 | } 107 | 108 | for _, key := range v.AllKeys() { 109 | if _, ok := validTags[key]; !ok { 110 | return fmt.Errorf("invalid option in configuration file: %v", key) 111 | } 112 | } 113 | 114 | } 115 | 116 | // set all values from viper 117 | flags.VisitAll(func(f *pflag.Flag) { 118 | if flagErr != nil { 119 | return 120 | } 121 | var value string 122 | if f.Value.Type() == "stringSlice" { 123 | // special handling is needed for stringSlice as v.GetString will 124 | // always return "" in the case that the value is an actual string 125 | // slice from a config file rather than a comma separated string 126 | // from a flag or env var. 127 | vss := v.GetStringSlice(f.Name) 128 | value = strings.Join(vss, ",") 129 | } else { 130 | value = v.GetString(f.Name) 131 | } 132 | 133 | if f.Changed { 134 | // If f.Changed is true, that means the value has already been set 135 | // by a flag, and we don't need to ask viper for it since the flag 136 | // is the highest priority. This works around a problem with string 137 | // slices where f.Value.Set(csvString) would cause the elements of 138 | // csvString to be appended to the existing value rather than 139 | // replacing it. 140 | return 141 | } 142 | flagErr = f.Value.Set(value) 143 | }) 144 | return flagErr 145 | } 146 | 147 | // NewClientFromFlags returns a new Pilosa client based on the flag arguments. 148 | func NewClientFromFlags(flags *pflag.FlagSet) (*pilosa.Client, error) { 149 | hosts, err := flags.GetStringSlice("hosts") 150 | if err != nil { 151 | return nil, err 152 | } 153 | tlsSkipVerify, err := flags.GetBool("tls.skip-verify") 154 | if err != nil { 155 | return nil, err 156 | } 157 | clientOptions := []pilosa.ClientOption{ 158 | pilosa.OptClientTLSConfig(&tls.Config{InsecureSkipVerify: tlsSkipVerify}), 159 | } 160 | return pilosa.NewClient(hosts, clientOptions...) 161 | } 162 | 163 | func NewLoggerFromFlags(flags *pflag.FlagSet) *log.Logger { 164 | if verbose, _ := flags.GetBool("verbose"); verbose { 165 | return log.New(os.Stderr, "", log.LstdFlags) 166 | } 167 | return log.New(ioutil.Discard, "", log.LstdFlags) 168 | } 169 | -------------------------------------------------------------------------------- /bench/tps.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "math/rand" 7 | "os" 8 | "runtime" 9 | "time" 10 | 11 | "github.com/pilosa/go-pilosa" 12 | "github.com/pkg/errors" 13 | "golang.org/x/sync/errgroup" 14 | ) 15 | 16 | type TPSBenchmark struct { 17 | Name string `json:"name"` 18 | Intersect bool `json:"intersect" help:"If true, include Intersect queries in benchmark."` 19 | Union bool `json:"union" help:"If true, include Union queries in benchmark."` 20 | Difference bool `json:"difference" help:"If true, include Difference queries in benchmark."` 21 | Xor bool `json:"xor" help:"If true, include XOR queries in benchmark."` 22 | Fields []string `json:"fields" help:"Comma separated list of fields. If blank, use all fields in index schema."` 23 | MinRowID int64 `json:"min-row-id" help:"Minimum row ID to use in queries."` 24 | MaxRowID int64 `json:"max-row-id" help:"Max row ID to use in queries. If 0, determine max available."` 25 | Index string `json:"index" help:"Index to use. If blank, one is chosen randomly from the schema."` 26 | Concurrency int `json:"concurrency" help:"Run this many goroutines concurrently." short:"y"` 27 | Iterations int `json:"iterations" help:"Each goroutine will perform this many queries."` 28 | 29 | // Complexity int `help:"Number of Rows calls to include in each query."` 30 | // Depth int `help:"Nesting depth of queries. (e.g. Xor(Row(blah=2), Intersect(Row(ha=3), Row(blah=4))))"` 31 | 32 | Logger *log.Logger `json:"-"` 33 | } 34 | 35 | func NewTPSBenchmark() *TPSBenchmark { 36 | return &TPSBenchmark{ 37 | Name: "tps", 38 | Intersect: true, 39 | Concurrency: runtime.NumCPU(), 40 | MaxRowID: 100, 41 | Iterations: 1000, 42 | Logger: log.New(os.Stderr, "", log.LstdFlags), 43 | } 44 | } 45 | 46 | // Run runs the benchmark. 47 | func (b *TPSBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) { 48 | result := NewResult() 49 | result.AgentNum = agentNum 50 | result.Configuration = b 51 | 52 | // get the schema to validate existence of index/fields or pick ones to use. 53 | s, err := client.Schema() 54 | if err != nil { 55 | return result, errors.Wrap(err, "getting schema") 56 | } 57 | 58 | // deal with indexes 59 | indexes := s.Indexes() 60 | var index *pilosa.Index 61 | if b.Index == "" { 62 | if len(indexes) == 0 { 63 | return result, errors.New("no indexes in Pilosa, aborting.") 64 | } 65 | for name, idx := range indexes { 66 | b.Index = name 67 | index = idx 68 | break 69 | } 70 | } else { 71 | var ok bool 72 | index, ok = indexes[b.Index] 73 | if !ok { 74 | return result, errors.Errorf("index '%s' not found in schema.", b.Index) 75 | } 76 | } 77 | 78 | // we have an index, deal with fields 79 | var fields []*pilosa.Field 80 | fieldsMap := index.Fields() 81 | if len(b.Fields) == 0 { 82 | fields = make([]*pilosa.Field, 0, len(fieldsMap)) 83 | for _, fld := range fieldsMap { 84 | fields = append(fields, fld) 85 | } 86 | } else { 87 | fields = make([]*pilosa.Field, 0, len(b.Fields)) 88 | for _, name := range b.Fields { 89 | fld, ok := fieldsMap[name] 90 | if !ok { 91 | return result, errors.Errorf("field '%s' not found in index '%s'.", name, index.Name()) 92 | } 93 | fields = append(fields, fld) 94 | } 95 | } 96 | if len(fields) == 0 { 97 | return result, errors.Errorf("no fields to query in index '%s'", b.Index) 98 | } 99 | 100 | queries := make([]func(...*pilosa.PQLRowQuery) *pilosa.PQLRowQuery, 0) 101 | if b.Intersect { 102 | queries = append(queries, index.Intersect) 103 | } 104 | if b.Difference { 105 | queries = append(queries, index.Difference) 106 | } 107 | if b.Union { 108 | queries = append(queries, index.Union) 109 | } 110 | if b.Xor { 111 | queries = append(queries, index.Xor) 112 | } 113 | 114 | // TODO: Figure out set of rows to use for each field. For now, just apply MaxRowID to all fields. 115 | 116 | start := time.Now() 117 | eg := errgroup.Group{} 118 | stats := make([]*NumStats, b.Concurrency) 119 | for i := 0; i < b.Concurrency; i++ { 120 | i := i 121 | stats[i] = NewNumStats() 122 | eg.Go(func() error { 123 | return b.runQueries(client, index, fields, queries, i, stats[i]) 124 | }) 125 | } 126 | err = eg.Wait() 127 | duration := time.Since(start) 128 | if err == nil { 129 | for i := 1; i < len(stats); i++ { 130 | stats[0].Combine(stats[i]) 131 | } 132 | result.Extra["countstats"] = stats[0] 133 | seconds := float64(duration) / 1000000000 134 | result.Extra["tps"] = float64(b.Iterations*b.Concurrency) / seconds 135 | } 136 | return result, err 137 | } 138 | 139 | func (b *TPSBenchmark) runQueries(client *pilosa.Client, index *pilosa.Index, fields []*pilosa.Field, queries []func(...*pilosa.PQLRowQuery) *pilosa.PQLRowQuery, seed int, stats *NumStats) error { 140 | r := rand.New(rand.NewSource(int64(seed))) 141 | for i := 0; i < b.Iterations; i++ { 142 | f1 := fields[r.Intn(len(fields))] 143 | f2 := fields[r.Intn(len(fields))] 144 | 145 | r1 := r.Int63n(b.MaxRowID) + b.MinRowID 146 | r2 := r.Int63n(b.MaxRowID) + b.MinRowID 147 | 148 | q := queries[r.Intn(len(queries))] 149 | 150 | cntq := index.Count(q(f1.Row(r1), f2.Row(r2))) 151 | resp, err := client.Query(cntq) 152 | if err != nil { 153 | return errors.Wrap(err, "performing query") 154 | } 155 | if !resp.Success { 156 | return errors.Errorf("unsuccessful query: %s", resp.ErrorMessage) 157 | } 158 | stats.Add(int64(resp.Result().Count())) 159 | } 160 | return nil 161 | } 162 | -------------------------------------------------------------------------------- /dx/README.md: -------------------------------------------------------------------------------- 1 | # dx 2 | 3 | `dx` is a load-testing tool used to measure the differences between Pilosa versions. This is typically used to compare a version of Pilosa in development and the last known-good version of Pilosa for any regressions or improvements. Alternately, `dx` can be used to just perform a heavy ingest or query load on a single cluster to see how it performs under load. 4 | 5 | ## Invocation 6 | 7 | ``` 8 | dx [command] [flags] 9 | ``` 10 | 11 | `dx` can only be used when the Pilosa clusters are already running. You can then specify the configuration using the following global flags: 12 | 13 | ``` 14 | -o --hosts strings Comma-separated list of 'host:port' pairs (default localhost:10101) 15 | -h, --help help for dx 16 | -t, --threadcount int Number of concurrent goroutines to allocate (default 1) 17 | -v, --verbose bool Enable verbose logging (default false) 18 | -d, --datadir string Data directory to store resuls (default ~/dx) 19 | ``` 20 | 21 | Use one `--hosts` flag for each cluster. Ex. 22 | 23 | ``` 24 | dx [command] --hosts host1,host2 --hosts host3 --hosts host4,host5,host6 25 | ``` 26 | 27 | is interpreted as cluster0 having hosts host1 and host2, cluster1 having host3, and cluster2 having host4, host5, and host6. 28 | 29 | ## Commands 30 | 31 | Along with the flags, the following commands are used by `dx` to determine what to do: 32 | 33 | * `ingest` --- ingest data from an `imagine` spec file on all clusters 34 | * `query` --- generate and run queries on all clusters 35 | * `compare` --- compare the results from a `dx ingest` or `dx compare` command 36 | 37 | ### ingest 38 | 39 | Aside from the global flags, the following flags can be used for `dx ingest`: 40 | 41 | ``` 42 | -h, --help help for dx ingest 43 | -p, --prefix string Prefix to use for index (default "dx-") 44 | --specfiles strings Path to imagine spec file 45 | ``` 46 | 47 | The `ingest` command requires one or more [`imagine` spec file](https://github.com/pilosa/tools/tree/master/imagine) that describes its workload in order to generate data. 48 | 49 | Sample ingest: 50 | 51 | ``` 52 | > dx ingest --specfiles spec.toml --hosts localhost:10101 --hosts localhost:10102 53 | ``` 54 | 55 | will result in the two files (named `0` and `1`) written to a folder in `--datadir`. The folder is named "ingest-{timestamp}" (ex. ingest-2019-07-15T12/59/24-05/00). The files contain a single JSON object describing the results of the ingest. 56 | 57 | ``` 58 | {"type":"ingest","time":"635.162153ms","threadcount":1} 59 | ``` 60 | 61 | ### query 62 | 63 | Aside from the global flags, the following flags can be used for `dx query`: 64 | 65 | ``` 66 | -q, --queries int Number of queries to run (default 100) 67 | -r, --rows int Number of rows to perform intersect query on (default 2) 68 | -i, --indexes strings Indexes to run queries on (default all indexes from first cluster) 69 | -a, --actualresults bool Save actual results of queries instead of counts (default false) 70 | --querytemplate string Run the queries from a previous result file 71 | --seed int Seed for generating random rows and columns (default 1) 72 | ``` 73 | 74 | To compare a current query benchmark to an older one, usae `dx query` with the `--querytemplate` set to the old result so that the queries ran on the newer cluster will be the same. If `--querytemplate` is not set, then `dx` automatically generates `--queries` number of queries using the indexes from `indexes`. If `indexes` is also not specified, then `dx` will default to using all of the indexes present in the first cluster. 75 | 76 | Sample query: 77 | ``` 78 | > dx query --hosts localhost:10101 --hosts localhost:10102 --hosts localhost:8000 --threadcount=4 79 | ``` 80 | 81 | will result in the three files (named `0`, `1`, and `2` in order of the flags) written to the folder "query-{timestamp}" in `--datadir`. The files contain `--queries + 1` number of JSON objects. The objects describe the queries and their results, while the last object describes the total time the whole run took. 82 | 83 | ``` 84 | {"type":"query","time":"532.164µs","threadcount":1,"query":{"id":0,"query":1,"index":"dx-users","field":"numbers","rows":[21,51],"time":"532.164µs","resultcount":82}} 85 | ... 86 | {"type":"query","time":"1.275702ms","threadcount":14,"query":{"id":0,"query":0,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"1.275702ms","resultcount":7}} 87 | {"type":"total","time":"164.410886ms","threadcount":4,"query":{"id":-1,"query":0,"index":"","field":"","rows":null,"time":"164.410886ms"}} 88 | ``` 89 | 90 | ### compare 91 | 92 | The JSON files output by `dx ingest` and `dx query` are not actually meant to be read by humans. The final step in comparing results between different clusters is `dx compare`. 93 | 94 | `dx compare` does not take any flags, but it takes two arguments that specify the paths of the two result files to compare. These two result files must be of the same type, or `dx` will return an error. If the two files are valid, `dx` will automatically determine whether they are of type ingest or query and perform the appropriate comparisons. 95 | 96 | ### default behavior 97 | 98 | If no commands are specified, `dx` checks that the clusters are running and prints out their information. 99 | ``` 100 | > dx 101 | 102 | dx is a tool used to analyze accuracy and performance regression across Pilosa versions. 103 | The following checks whether the clusters specified by the hosts flag are running. 104 | 105 | Cluster with hosts localhost:10101 106 | server memory: 16GB [16384MB] 107 | server CPU: Intel(R) Core(TM) i7-6567U CPU @ 3.30GHz 108 | [2 physical cores, 4 logical cores available] 109 | cluster nodes: 1 110 | 111 | ``` 112 | -------------------------------------------------------------------------------- /imagine/generators_test.go: -------------------------------------------------------------------------------- 1 | package imagine 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "testing" 7 | "time" 8 | 9 | gopilosa "github.com/pilosa/go-pilosa" 10 | ) 11 | 12 | func testSequenceGenerator(s sequenceGenerator, min int64, max int64, total int64) error { 13 | seen := make(map[int64]struct{}) 14 | var done bool 15 | var value int64 16 | 17 | for !done { 18 | value, done = s.Next() 19 | if _, ok := seen[value]; ok { 20 | return fmt.Errorf("generator produced %d more than once", value) 21 | } 22 | if value < min || value > max { 23 | return fmt.Errorf("generator produced value %d, out of range %d..%d", value, min, max) 24 | } 25 | seen[value] = struct{}{} 26 | } 27 | if int64(len(seen)) != total { 28 | return fmt.Errorf("generator produced %d values from %d..%d, expecting %d", len(seen), min, max, total) 29 | } 30 | return nil 31 | } 32 | 33 | func testValueGenerator(v valueGenerator, min int64, max int64, total int64) error { 34 | seen := make(map[int64]struct{}) 35 | var value int64 36 | 37 | for i := int64(0); i < (max - min); i++ { 38 | value = v.Nth(i) 39 | if _, ok := seen[value]; ok { 40 | return fmt.Errorf("generator produced %d more than once", value) 41 | } 42 | if value < min || value > max { 43 | return fmt.Errorf("generator produced value %d, out of range %d..%d", value, min, max) 44 | } 45 | seen[value] = struct{}{} 46 | } 47 | if int64(len(seen)) != total { 48 | return fmt.Errorf("generator produced %d values from %d..%d, expecting %d", len(seen), min, max, total) 49 | } 50 | return nil 51 | } 52 | 53 | func Test_Generators(t *testing.T) { 54 | inc := newIncrementGenerator(-3, 5) 55 | testSequenceGenerator(inc, -3, 5, 9) 56 | inc2, err := newPermutedGenerator(-3, 5, 7, 0, 0, 1) 57 | if err != nil { 58 | t.Fatalf("unexpected error: %v", err) 59 | } 60 | testSequenceGenerator(inc2, -3, 5, 7) 61 | lin, err := newLinearValueGenerator(-3, 5, 0) 62 | if err != nil { 63 | t.Fatalf("unexpected error: %v", err) 64 | } 65 | testValueGenerator(lin, -3, 5, 9) 66 | perm, err := newPermutedValueGenerator(lin, -3, 5, 9) 67 | if err != nil { 68 | t.Fatalf("unexpected error: %v", err) 69 | } 70 | testValueGenerator(perm, -3, 5, 9) 71 | } 72 | 73 | func float64p(v float64) *float64 { 74 | return &v 75 | } 76 | func uint64p(v uint64) *uint64 { 77 | return &v 78 | } 79 | func int64p(v int64) *int64 { 80 | return &v 81 | } 82 | func durationp(v duration) *duration { 83 | return &v 84 | } 85 | 86 | func TestFieldMin(t *testing.T) { 87 | startTime := time.Date(2000, time.Month(1), 2, 3, 4, 5, 6, time.UTC) 88 | dur := time.Hour * 120 89 | spec := &taskSpec{ 90 | FieldSpec: &fieldSpec{ 91 | Type: fieldTypeTime, 92 | Min: 10, 93 | Max: 12, 94 | Chance: float64p(1.0), 95 | DensityScale: uint64p(2097152), 96 | Density: 1.0, 97 | }, 98 | ColumnOrder: valueOrderLinear, 99 | DimensionOrder: dimensionOrderRow, 100 | Columns: uint64p(10), 101 | RowOrder: valueOrderLinear, 102 | Seed: int64p(0), 103 | Stamp: stampTypeIncreasing, 104 | StampStart: &startTime, 105 | StampRange: durationp(duration(dur)), 106 | } 107 | 108 | updateChan := make(chan taskUpdate, 10) 109 | go func() { 110 | for _, ok := <-updateChan; ok; { 111 | } 112 | }() 113 | sg, err := newSetGenerator(spec, updateChan, "updateid") 114 | if err != nil { 115 | t.Fatalf("getting new set generator: %v", err) 116 | } 117 | 118 | r, err := sg.NextRecord() 119 | if err != nil { 120 | t.Fatalf("Error in iterator: %v", err) 121 | } 122 | col, ok := r.(gopilosa.Column) 123 | if !ok { 124 | t.Fatalf("%v not a Column", r) 125 | } 126 | if col.RowID != 10 { 127 | t.Fatalf("field.Min not respected, got row %d, expected 10", col.RowID) 128 | } 129 | 130 | } 131 | 132 | func TestNewSetGenerator(t *testing.T) { 133 | startTime := time.Date(2000, time.Month(1), 2, 3, 4, 5, 6, time.UTC) 134 | dur := time.Hour * 120 135 | spec := &taskSpec{ 136 | FieldSpec: &fieldSpec{ 137 | Type: fieldTypeTime, 138 | Max: 1, 139 | Chance: float64p(1.0), 140 | DensityScale: uint64p(2097152), 141 | Density: 1.0, 142 | }, 143 | ColumnOrder: valueOrderLinear, 144 | DimensionOrder: dimensionOrderRow, 145 | Columns: uint64p(10), 146 | RowOrder: valueOrderLinear, 147 | Seed: int64p(0), 148 | Stamp: stampTypeIncreasing, 149 | StampStart: &startTime, 150 | StampRange: durationp(duration(dur)), 151 | } 152 | 153 | updateChan := make(chan taskUpdate, 10) 154 | go func() { 155 | for _, ok := <-updateChan; ok; { 156 | } 157 | }() 158 | sg, err := newSetGenerator(spec, updateChan, "updateid") 159 | if err != nil { 160 | t.Fatalf("getting new set generator: %v", err) 161 | } 162 | lastT := int64(0) 163 | i := -1 164 | endTime := startTime.Add(dur) 165 | for r, err := sg.NextRecord(); err != io.EOF; r, err = sg.NextRecord() { 166 | if err != nil { 167 | t.Fatalf("Error in iterator: %v", err) 168 | } 169 | col, ok := r.(gopilosa.Column) 170 | if !ok { 171 | t.Fatalf("%v not a Column", r) 172 | } 173 | i++ 174 | if col.RowID != 0 { 175 | t.Fatalf("unexpected row at record %d: %v", i, col) 176 | } 177 | if int(col.ColumnID) != i { 178 | t.Fatalf("unexpected col: exp: %d got %d", i, col.ColumnID) 179 | } 180 | if col.Timestamp <= lastT { 181 | t.Fatalf("unexpected... timestamp did not increase: last: %d this: %v", lastT, col) 182 | } 183 | if lastT >= col.Timestamp { 184 | t.Fatalf("time stamp did not increase, last: %d, this: %d", lastT, col.Timestamp) 185 | } 186 | lastT = col.Timestamp 187 | tim := time.Unix(0, col.Timestamp) 188 | if tim.Before(startTime) { 189 | t.Fatalf("got a time before start time: %v", tim) 190 | } 191 | if tim.After(endTime) { 192 | t.Fatalf("got a time after start+duration: %v", tim) 193 | } 194 | } 195 | if endTime.Sub(time.Unix(0, lastT)) > dur/2 { 196 | t.Fatalf("less than half the duration was used - lastT: %v", lastT) 197 | } 198 | 199 | close(updateChan) 200 | } 201 | 202 | func TestMutexGen(t *testing.T) { 203 | spec := &taskSpec{ 204 | FieldSpec: &fieldSpec{ 205 | Type: fieldTypeMutex, 206 | Max: 2, 207 | Chance: float64p(1.0), 208 | DensityScale: uint64p(2097152), 209 | Density: 0.9, 210 | ValueRule: densityTypeZipf, 211 | Cache: cacheTypeLRU, 212 | ZipfS: 1.1, 213 | ZipfV: 1, 214 | }, 215 | ColumnOrder: valueOrderLinear, 216 | DimensionOrder: dimensionOrderRow, 217 | Columns: uint64p(10), 218 | RowOrder: valueOrderLinear, 219 | Seed: int64p(0), 220 | } 221 | 222 | updateChan := make(chan taskUpdate, 10) 223 | go func() { 224 | for _, ok := <-updateChan; ok; { 225 | } 226 | }() 227 | sg, err := newMutexGenerator(spec, updateChan, "updateid") 228 | if err != nil { 229 | t.Fatalf("getting new set generator: %v", err) 230 | } 231 | 232 | done := make(chan error) 233 | go func() { 234 | for _, err := sg.NextRecord(); err != io.EOF; _, err = sg.NextRecord() { 235 | if err != nil { 236 | done <- err 237 | } 238 | } 239 | close(done) 240 | }() 241 | 242 | select { 243 | case err = <-done: 244 | if err != nil { 245 | t.Fatalf("error in iterator: %v", err) 246 | } 247 | case <-time.After(time.Second): 248 | t.Fatalf("mutex generator hanging") 249 | } 250 | 251 | } 252 | -------------------------------------------------------------------------------- /dx/main.go: -------------------------------------------------------------------------------- 1 | package dx 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | "time" 11 | 12 | "github.com/pilosa/go-pilosa" 13 | "github.com/pkg/errors" 14 | "github.com/spf13/cobra" 15 | ) 16 | 17 | const ( 18 | cmdIngest = "ingest" 19 | cmdQuery = "query" 20 | cmdTotal = "total" 21 | ) 22 | 23 | // Main contains the flags dx uses. 24 | type Main struct { 25 | Hosts []string 26 | ThreadCount int 27 | SpecFiles []string 28 | Verbose bool 29 | Prefix string 30 | NumQueries int64 31 | NumRows int64 32 | DataDir string 33 | ActualResults bool 34 | QueryTemplate string 35 | Indexes []string 36 | Seed int64 37 | } 38 | 39 | // NewMain creates a new Main object. 40 | func NewMain() *Main { 41 | return &Main{ 42 | Prefix: "dx-", 43 | } 44 | } 45 | 46 | // NewRootCmd creates an instance of the cobra root command for dx. 47 | func NewRootCmd() *cobra.Command { 48 | // m is persisted to all subcommands 49 | m := NewMain() 50 | 51 | rc := &cobra.Command{ 52 | Use: "dx", 53 | Short: "analyze accuracy and performance regression across Pilosa versions", 54 | Long: `Analyze accuracy and performance regression across Pilosa versions by running high-load ingest and queries.`, 55 | PersistentPreRun: func(cmd *cobra.Command, args []string) { 56 | // set logger 57 | if m.Verbose { 58 | log.SetOutput(os.Stderr) 59 | } else { 60 | log.SetOutput(ioutil.Discard) 61 | } 62 | }, 63 | Run: func(cmd *cobra.Command, args []string) { 64 | fmt.Printf("dx is a tool used to analyze accuracy and performance regression across Pilosa versions.\nThe following checks whether the clusters specified by the hosts flag are running.\n\n") 65 | if err := printServers(m.Hosts); err != nil { 66 | if m.Verbose { 67 | fmt.Printf("%+v\n", err) 68 | } else { 69 | fmt.Printf("%v\n", err) 70 | } 71 | os.Exit(1) 72 | } 73 | }, 74 | } 75 | 76 | // default 77 | var usrHomeDirDx string 78 | home, err := os.UserHomeDir() 79 | if err == nil { 80 | usrHomeDirDx = filepath.Join(home, "dx") 81 | } 82 | 83 | // TODO: flag for which folder to store run in? 84 | flags := rc.PersistentFlags() 85 | flags.StringArrayVarP(&m.Hosts, "hosts", "o", []string{"localhost:10101"}, "Comma-separated list of 'host:port' pairs. Repeat this flag for each cluster") 86 | flags.IntVarP(&m.ThreadCount, "threadcount", "t", 1, "Number of goroutines to allocate") 87 | flags.BoolVarP(&m.Verbose, "verbose", "v", false, "Enable verbose logging") 88 | flags.StringVarP(&m.DataDir, "datadir", "d", usrHomeDirDx, "Data directory to store results") 89 | 90 | rc.AddCommand(NewIngestCommand(m)) 91 | rc.AddCommand(NewQueryCommand(m)) 92 | rc.AddCommand(NewCompareCommand(m)) 93 | 94 | return rc 95 | } 96 | 97 | func printServers(hosts []string) error { 98 | for _, clusterHostsString := range hosts { 99 | fmt.Printf("Cluster with hosts %v\n", clusterHostsString) 100 | clusterHosts := strings.Split(clusterHostsString, ",") 101 | 102 | client, err := initializeClient(clusterHosts...) 103 | if err != nil { 104 | fmt.Println(fmt.Errorf("error initializing client: %v", err)) 105 | } 106 | if err = printServerInfo(client); err != nil { 107 | return errors.Wrap(err, "could not print server info") 108 | } 109 | } 110 | return nil 111 | } 112 | 113 | // modified from package imagine 114 | func printServerInfo(client *pilosa.Client) error { 115 | serverInfo, err := client.Info() 116 | if err != nil { 117 | return errors.Wrap(err, "couldn't get server info") 118 | } 119 | serverMemMB := serverInfo.Memory / (1024 * 1024) 120 | serverMemGB := (serverMemMB + 1023) / 1024 121 | fmt.Printf("server memory: %dGB [%dMB]\n", serverMemGB, serverMemMB) 122 | fmt.Printf("server CPU: %s\n[%d physical cores, %d logical cores available]\n", serverInfo.CPUType, serverInfo.CPUPhysicalCores, serverInfo.CPULogicalCores) 123 | serverStatus, err := client.Status() 124 | if err != nil { 125 | return errors.Wrap(err, "couldn't get cluster status info") 126 | } 127 | fmt.Printf("cluster nodes: %d\n\n", len(serverStatus.Nodes)) 128 | return nil 129 | } 130 | 131 | // initalizeClient creates the Pilosa clients from a slice of strings, where each string is 132 | // a comma-separated list of host:port pairs in a cluster. The order in which the clusters 133 | // appear in hosts is the same order as they appear in output slice. 134 | func initializeClients(hosts []string) ([]*pilosa.Client, error) { 135 | // len(hosts) is the number of clusters 136 | clients := make([]*pilosa.Client, 0, len(hosts)) 137 | for _, clusterHostsString := range hosts { 138 | clusterHosts := strings.Split(clusterHostsString, ",") 139 | 140 | client, err := initializeClient(clusterHosts...) 141 | if err != nil { 142 | return nil, errors.Wrap(err, "error creating client for cluster") 143 | } 144 | 145 | clients = append(clients, client) 146 | } 147 | return clients, nil 148 | } 149 | 150 | // initializeClient creates a Pilosa client using a list of hosts from the cluster. 151 | func initializeClient(clusterHosts ...string) (*pilosa.Client, error) { 152 | // initialize uris from this cluster 153 | uris := make([]*pilosa.URI, 0, len(clusterHosts)) 154 | for _, host := range clusterHosts { 155 | uri, err := pilosa.NewURIFromAddress(host) 156 | if err != nil { 157 | return nil, errors.Wrapf(err, "error creating Pilosa URI from host %s", host) 158 | } 159 | uris = append(uris, uri) 160 | } 161 | 162 | // initialize cluster and client from the uris 163 | cluster := pilosa.NewClusterWithHost(uris...) 164 | client, err := pilosa.NewClient(cluster) 165 | if err != nil { 166 | return nil, errors.Wrap(err, "error creating Pilosa client from URIs") 167 | } 168 | 169 | return client, nil 170 | } 171 | 172 | // getAllClusterHosts creates a slice for each cluster containing the hosts in that cluster. 173 | // Ex. ["host1,host2", "host3"] -> [[host1, host2], [host3]] 174 | func getAllClusterHosts(hosts []string) [][]string { 175 | allClusterHosts := make([][]string, 0) 176 | for _, clusterHostsString := range hosts { 177 | clusterHosts := strings.Split(clusterHostsString, ",") 178 | allClusterHosts = append(allClusterHosts, clusterHosts) 179 | } 180 | return allClusterHosts 181 | } 182 | 183 | // makeFolder makes a folder with name "{cmd}-{time now}" in the directory. 184 | // Ex. ("ingest", "usr/home") -> creates the directory usr/home/ingest-2019-07-10T15/52/44-05/00. 185 | func makeFolder(cmdType, dir string) (string, error) { 186 | timestamp := time.Now().Format(time.RFC3339) 187 | folderName := cmdType + "-" + timestamp 188 | path := filepath.Join(dir, folderName) 189 | 190 | if err := os.MkdirAll(path, 0777); err != nil { 191 | return "", errors.Wrapf(err, "error mkdir for %v", path) 192 | } 193 | return path, nil 194 | } 195 | 196 | // TimeDuration wraps time.Duration to encode to JSON. 197 | type TimeDuration struct { 198 | Duration time.Duration 199 | } 200 | 201 | // UnmarshalJSON deserializes json to TimeDuration. 202 | func (d *TimeDuration) UnmarshalJSON(b []byte) (err error) { 203 | d.Duration, err = time.ParseDuration(strings.Trim(string(b), `"`)) 204 | return 205 | } 206 | 207 | // MarshalJSON serializes TimeDuration to json. 208 | func (d *TimeDuration) MarshalJSON() (b []byte, err error) { 209 | return []byte(fmt.Sprintf(`"%v"`, d.Duration)), nil 210 | } 211 | 212 | // Benchmark contains the information related to an ingest or query benchmark. 213 | type Benchmark struct { 214 | Type string `json:"type"` 215 | Time TimeDuration `json:"time"` 216 | ThreadCount int `json:"threadcount"` 217 | Query *Query `json:"query,omitempty"` 218 | } 219 | 220 | // NewBenchmark creates an empty benchmark of type cmdType. 221 | func NewBenchmark() *Benchmark { 222 | return &Benchmark{} 223 | } 224 | -------------------------------------------------------------------------------- /bench/bench.go: -------------------------------------------------------------------------------- 1 | package bench 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math/rand" 7 | "time" 8 | 9 | "github.com/pilosa/go-pilosa" 10 | ) 11 | 12 | // Benchmark is an interface run benchmark components. Benchmarks should Marshal 13 | // to valid JSON so that their configuration may be recorded with their results. 14 | type Benchmark interface { 15 | Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) 16 | } 17 | 18 | type HostSetup struct { 19 | Hosts []string 20 | ClientOptions []pilosa.ClientOption 21 | } 22 | 23 | // Results holds the output from the run of a benchmark - the Benchmark's Run() 24 | // method may set Stats, Responses, and Extra, and the RunBenchmark helper 25 | // function will set the Duration, AgentNum, PilosaVersion, and Configuration. 26 | // Either may set Error if there is an error. The structure of Result assumes 27 | // that most benchmarks will run multiple queries and track statistics about how 28 | // long each one takes. The Extra field is for benchmarks which either do not 29 | // fit this model, or want to return additional information not covered by Stats 30 | // and Responses. 31 | type Result struct { 32 | Stats *Stats `json:"stats"` 33 | Responses []*pilosa.QueryResponse `json:"responses"` 34 | Extra map[string]interface{} `json:"extra"` 35 | AgentNum int `json:"agentnum"` 36 | PilosaVersion string `json:"pilosa-version"` 37 | Configuration interface{} `json:"configuration"` 38 | 39 | // Error exists so that errors can be correctly marshalled to JSON. It is set using Result.err.Error() 40 | Error string `json:"error,omitempty"` 41 | } 42 | 43 | // NewResult intializes and returns a Result. 44 | func NewResult() *Result { 45 | return &Result{ 46 | Stats: NewStats(), 47 | Extra: make(map[string]interface{}), 48 | Responses: make([]*pilosa.QueryResponse, 0), 49 | } 50 | } 51 | 52 | // Add adds the duration to the Result's Stats object. If resp is non-nil, it 53 | // also adds it to the slice of responses. 54 | func (r *Result) Add(d time.Duration, resp *pilosa.QueryResponse) { 55 | r.Stats.Add(d) 56 | if resp != nil { 57 | r.Responses = append(r.Responses, resp) 58 | } 59 | } 60 | 61 | // ensureSchema ensures that a given index and field exist. 62 | func ensureSchema(client *pilosa.Client, indexName, fieldName string, opts ...interface{}) (index *pilosa.Index, field *pilosa.Field, err error) { 63 | var indexOpts []pilosa.IndexOption 64 | var fieldOpts []pilosa.FieldOption 65 | for _, opt := range opts { 66 | switch opt := opt.(type) { 67 | case pilosa.IndexOption: 68 | indexOpts = append(indexOpts, opt) 69 | case pilosa.FieldOption: 70 | fieldOpts = append(fieldOpts, opt) 71 | } 72 | } 73 | 74 | schema, err := client.Schema() 75 | if err != nil { 76 | return nil, nil, fmt.Errorf("cannot read schema: %v", err) 77 | } 78 | 79 | index = schema.Index(indexName, indexOpts...) 80 | if err := client.EnsureIndex(index); err != nil { 81 | return nil, nil, fmt.Errorf("cannot ensure index: %v", err) 82 | } 83 | if fieldName != "" { 84 | field = index.Field(fieldName, fieldOpts...) 85 | if err := client.EnsureField(field); err != nil { 86 | return nil, nil, fmt.Errorf("cannot ensure field: %v", err) 87 | } 88 | } 89 | if err := client.SyncSchema(schema); err != nil { 90 | return nil, nil, fmt.Errorf("cannot sync schema: %v", err) 91 | } 92 | return index, field, nil 93 | } 94 | 95 | // wrapper type to force human-readable JSON output 96 | type PrettyDuration time.Duration 97 | 98 | // MarshalJSON returns a nicely formatted duration, instead of it just being 99 | // treated like an int. 100 | func (d PrettyDuration) MarshalJSON() ([]byte, error) { 101 | s := time.Duration(d).String() 102 | return []byte("\"" + s + "\""), nil 103 | } 104 | 105 | // Recursively replaces elements of ugly types with their pretty wrappers 106 | func Prettify(m map[string]interface{}) map[string]interface{} { 107 | newmap := make(map[string]interface{}) 108 | for k, v := range m { 109 | switch v.(type) { 110 | case map[string]interface{}: 111 | newmap[k] = Prettify(v.(map[string]interface{})) 112 | case []time.Duration: 113 | newslice := make([]PrettyDuration, len(v.([]time.Duration))) 114 | slice := v.([]time.Duration) 115 | for n, e := range slice { 116 | newslice[n] = PrettyDuration(e) 117 | } 118 | newmap[k] = newslice 119 | case time.Duration: 120 | newmap[k] = PrettyDuration(v.(time.Duration)) 121 | default: 122 | if interv, ok := v.([]map[string]interface{}); ok { 123 | for i, iv := range interv { 124 | interv[i] = Prettify(iv) 125 | } 126 | } 127 | newmap[k] = v 128 | } 129 | } 130 | return newmap 131 | } 132 | 133 | // NewQueryGenerator returns a new QueryGenerator. 134 | func NewQueryGenerator(index *pilosa.Index, field *pilosa.Field, seed int64) *QueryGenerator { 135 | return &QueryGenerator{ 136 | index: index, 137 | field: field, 138 | rand: rand.New(rand.NewSource(seed)), 139 | } 140 | } 141 | 142 | // QueryGenerator holds the configuration and state for randomly generating queries. 143 | type QueryGenerator struct { 144 | index *pilosa.Index 145 | field *pilosa.Field 146 | rand *rand.Rand 147 | } 148 | 149 | // Random returns a randomly generated query. 150 | func (g *QueryGenerator) Random(maxN, depth, maxargs int, idmin, idmax uint64) pilosa.PQLQuery { 151 | val := g.rand.Intn(5) 152 | switch val { 153 | case 0: 154 | return g.RandomTopN(maxN, depth, maxargs, idmin, idmax) 155 | default: 156 | return g.RandomBitmapCall(depth, maxargs, idmin, idmax) 157 | } 158 | } 159 | 160 | // RandomRangeQuery returns a randomly generated sum or range query. 161 | func (g *QueryGenerator) RandomRangeQuery(depth, maxargs int, idmin, idmax uint64) pilosa.PQLQuery { 162 | switch g.rand.Intn(5) { 163 | case 1: 164 | return g.RandomSum(depth, maxargs, idmin, idmax) 165 | default: 166 | return g.RandomRange(maxargs, idmin, idmax) 167 | } 168 | } 169 | 170 | func (g *QueryGenerator) RandomRange(numArg int, idmin, idmax uint64) *pilosa.PQLRowQuery { 171 | choose := g.rand.Intn(4) 172 | if choose == 0 { 173 | return g.RangeCall(idmin, idmax) 174 | } 175 | a := make([]*pilosa.PQLRowQuery, numArg) 176 | for i := 0; i < numArg; i++ { 177 | a[i] = g.RangeCall(idmin, idmax) 178 | } 179 | 180 | switch choose { 181 | case 1: 182 | return g.index.Difference(a...) 183 | case 2: 184 | return g.index.Intersect(a...) 185 | case 3: 186 | return g.index.Union(a...) 187 | default: 188 | panic("unreachable") 189 | } 190 | } 191 | 192 | func (g *QueryGenerator) RangeCall(idmin, idmax uint64) *pilosa.PQLRowQuery { 193 | const operationN = 5 194 | switch g.rand.Intn(operationN) { 195 | case 0: 196 | return g.field.GT(g.rand.Intn(int(idmax - idmin))) 197 | case 1: 198 | return g.field.LT(g.rand.Intn(int(idmax - idmin))) 199 | case 2: 200 | return g.field.GTE(g.rand.Intn(int(idmax - idmin))) 201 | case 3: 202 | return g.field.LTE(g.rand.Intn(int(idmax - idmin))) 203 | case 4: 204 | return g.field.Equals(g.rand.Intn(int(idmax - idmin))) 205 | default: 206 | panic("unreachable") 207 | } 208 | } 209 | 210 | // RandomSum returns a randomly generated sum query. 211 | func (g *QueryGenerator) RandomSum(depth, maxargs int, idmin, idmax uint64) pilosa.PQLQuery { 212 | switch g.rand.Intn(4) { 213 | case 0: 214 | return g.field.Sum(g.RandomBitmapCall(depth, maxargs, idmin, idmax)) 215 | default: 216 | return g.field.Sum(g.RandomRange(maxargs, idmin, idmax)) 217 | } 218 | } 219 | 220 | // RandomTopN returns a randomly generated TopN query. 221 | func (g *QueryGenerator) RandomTopN(maxN, depth, maxargs int, idmin, idmax uint64) *pilosa.PQLRowQuery { 222 | return g.field.RowTopN(uint64(g.rand.Intn(maxN-1)+1), g.RandomBitmapCall(depth, maxargs, idmin, idmax)) 223 | } 224 | 225 | // RandomBitmapCall returns a randomly generate query which returns a bitmap. 226 | func (g *QueryGenerator) RandomBitmapCall(depth, maxargs int, idmin, idmax uint64) *pilosa.PQLRowQuery { 227 | if depth <= 1 { 228 | return g.field.Row(uint64(g.rand.Int63n(int64(idmax)-int64(idmin)) + int64(idmin))) 229 | } 230 | choose := g.rand.Intn(4) 231 | if choose == 0 { 232 | return g.RandomBitmapCall(1, 0, idmin, idmax) 233 | } 234 | 235 | numargs := 2 236 | if maxargs > 2 { 237 | numargs = g.rand.Intn(maxargs-2) + 2 238 | } 239 | a := make([]*pilosa.PQLRowQuery, numargs) 240 | for i := 0; i < numargs; i++ { 241 | a[i] = g.RandomBitmapCall(depth-1, maxargs, idmin, idmax) 242 | } 243 | 244 | switch choose { 245 | case 1: 246 | return g.index.Difference(a...) 247 | case 2: 248 | return g.index.Intersect(a...) 249 | case 3: 250 | return g.index.Union(a...) 251 | default: 252 | panic("unreachable") 253 | } 254 | } 255 | 256 | // Stats object helps track timing stats. 257 | type Stats struct { 258 | sumSquareDelta float64 259 | 260 | Min time.Duration `json:"min"` 261 | Max time.Duration `json:"max"` 262 | Mean time.Duration `json:"mean"` 263 | Total time.Duration `json:"total-time"` 264 | Num int64 `json:"num"` 265 | All []time.Duration `json:"all"` 266 | SaveAll bool `json:"-"` 267 | } 268 | 269 | // NewStats gets a Stats object. 270 | func NewStats() *Stats { 271 | return &Stats{ 272 | Min: 1<<63 - 1, 273 | All: make([]time.Duration, 0), 274 | } 275 | } 276 | 277 | // Add adds a new time to the stats object. 278 | func (s *Stats) Add(td time.Duration) { 279 | if s.SaveAll { 280 | s.All = append(s.All, td) 281 | } 282 | s.Num += 1 283 | s.Total += td 284 | if td < s.Min { 285 | s.Min = td 286 | } 287 | if td > s.Max { 288 | s.Max = td 289 | } 290 | 291 | // online variance calculation 292 | // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm 293 | delta := td - s.Mean 294 | s.Mean += delta / time.Duration(s.Num) 295 | s.sumSquareDelta += float64(delta * (td - s.Mean)) 296 | } 297 | 298 | func (s *Stats) Combine(other *Stats) { 299 | if other.Min < s.Min { 300 | s.Min = other.Min 301 | } 302 | if other.Max > s.Max { 303 | s.Max = other.Max 304 | } 305 | s.Total += other.Total 306 | s.Num += other.Num 307 | s.Mean = s.Total / time.Duration(s.Num) 308 | s.All = append(s.All, other.All...) 309 | } 310 | 311 | // NumStats object helps track stats. This and Stats (which was 312 | // originally made specifically for time) should probably be unified. 313 | type NumStats struct { 314 | sumSquareDelta float64 315 | 316 | // NumZero counts the number of values that have been added which 317 | // are zero. This is a cheap, simple, replacement for more 318 | // sophisticated tracking of the distribution of the data that 319 | // let's us know if (e.g.) we have a bunch of queries doing 320 | // nothing because we're querying empty rows or something. 321 | NumZero int64 `json:"num-zero"` 322 | Min int64 `json:"min"` 323 | Max int64 `json:"max"` 324 | Mean int64 `json:"mean"` 325 | Total int64 `json:"total"` 326 | Num int64 `json:"num"` 327 | All []int64 `json:"all"` 328 | SaveAll bool `json:"-"` 329 | } 330 | 331 | // NewNumStats gets a NumStats object. 332 | func NewNumStats() *NumStats { 333 | return &NumStats{ 334 | Min: 1<<63 - 1, 335 | All: make([]int64, 0), 336 | } 337 | } 338 | 339 | // Add adds a new value to the stats object. 340 | func (s *NumStats) Add(td int64) { 341 | if s.SaveAll { 342 | s.All = append(s.All, td) 343 | } 344 | if td == 0 { 345 | s.NumZero++ 346 | } 347 | s.Num += 1 348 | s.Total += td 349 | if td < s.Min { 350 | s.Min = td 351 | } 352 | if td > s.Max { 353 | s.Max = td 354 | } 355 | 356 | // online variance calculation 357 | // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm 358 | delta := td - s.Mean 359 | s.Mean += delta / int64(s.Num) 360 | s.sumSquareDelta += float64(delta * (td - s.Mean)) 361 | } 362 | 363 | func (s *NumStats) Combine(other *NumStats) { 364 | if other.Min < s.Min { 365 | s.Min = other.Min 366 | } 367 | if other.Max > s.Max { 368 | s.Max = other.Max 369 | } 370 | s.NumZero += other.NumZero 371 | s.Total += other.Total 372 | s.Num += other.Num 373 | s.Mean = s.Total / s.Num 374 | s.All = append(s.All, other.All...) 375 | } 376 | -------------------------------------------------------------------------------- /imagine/README.md: -------------------------------------------------------------------------------- 1 | # Imagine that you had a database with... 2 | 3 | This tool intends to provide a way to populate a Pilosa database with 4 | predictable contents in a reasonably efficient fashion, without needing 5 | enormous static data files. Indexes and fields within them can be specified 6 | in a TOML file. 7 | 8 | ## Invocation 9 | 10 | The `imagine` utility takes command line options, followed by one or more 11 | spec files, which are TOML files containing specs. 12 | 13 | What `imagine` does with the spec files is controlled by the following behavior options: 14 | 15 | * `--describe` describe the specs 16 | * `--verify string` index structure validation: create/error/purge/update/none 17 | * `--generate` generate data as specified by workloads 18 | * `--delete` delete specified fields 19 | 20 | Invoked without behavior options, or with only `--describe`, `imagine` will 21 | describe the indexes and workloads from its spec files, and terminate. If one 22 | or more of verify, generate, or delete is provided, it will do those in order. 23 | 24 | The following verification options exist: 25 | 26 | * `create`: Attempts to create all specified indexes and fields, errors out 27 | if any already existed. 28 | * `error`: Verify that indexes and fields exist, error out if they don't. 29 | * `purge`: Delete all existing indexes and fields, then try to create them. 30 | Error out if either part of this fails. 31 | * `update`: Try to create any missing indexes or fields. Error out if this fails. 32 | * `none`: Do no verification. (Workloads will still check for index/field 33 | existence.) 34 | 35 | The default for `--verify` is determined by other parameters; if `--delete` is 36 | present, and `--generate` is not, the default verification is "none" (there's 37 | no point in verifying that things exist right before deleting them), otherwise 38 | the default verification is "error". 39 | 40 | The following options change how `imagine` goes about its work: 41 | 42 | * `--column-scale int` scale number of columns provided by specs 43 | * `--cpu-profile string` record CPU profile to file 44 | * `--dry-run` dry-run; describe what would be done 45 | * `--hosts string` comma separated list of "host:port" pairs of the Pilosa cluster (default "localhost:10101") 46 | * `--mem-profile string` record allocation profile to file 47 | * `--prefix string` prefix to use on index names 48 | * `--row-scale int` scale number of rows provided by specs 49 | * `--thread-count int` number of threads to use for import, overrides value in config file (default 1) 50 | * `--time` report on time elapsed for operations 51 | 52 | ## Spec files 53 | 54 | The following global settings exist for each spec: 55 | 56 | * densityscale: A density scale factor used to determine the precision 57 | used for density computations. Density scale should be a power of two. 58 | Higher density scales will take longer to compute and process. (The 59 | operation is O(log2(N)).) 60 | * prefix: A preferred prefix to use for index names. If absent, 61 | `imaginary` is used. The `--prefix` command line option overrides 62 | this. 63 | * version: The string "1.0". The intent is that future versions of the 64 | tool will attempt to ensure that a given spec produces identical results. 65 | If a later version would change the results from a spec, it should do so 66 | only when a different string is specified here. However, *this 67 | guarantee is not yet in force*. The software is still in an immature 68 | state, and may change output significantly during development. 69 | * seed: A default PRNG seed, used for indexes/fields that don't specify 70 | their own. 71 | 72 | A spec can specify two other kinds of things, indexes and workloads. 73 | Indexes describe the data that will go in a Pilosa index, such as the index's 74 | name, size (in columns), and number of fields. Workloads describe specific 75 | patterns of creating and inserting data in fields. 76 | 77 | When multiple specs are provided, they are combined. Indexes and fields are 78 | merged; any conflicts between them are an error, and `imagine` will report 79 | such errors and then stop. Workloads are concatenated, with specs processed 80 | in command-line order. 81 | 82 | ### Indexes 83 | 84 | Indexes are defined in a top-level map, using the index name as the key. Each 85 | index would typically be written as `[indexes.indexname]`. Each index has 86 | settings, plus field entries under the index. Fields are a mapping of names 87 | to field specifications. 88 | 89 | * name: The index's name. (This will be prefixed later.) 90 | * description: A longer description of the index's purpose within a set. 91 | * columns: The number of columns. 92 | * seed: A default PRNG seed to use for fields that don't specify their own. 93 | 94 | ### Fields 95 | 96 | Fields can take several kinds, specified as "type". Defined types: 97 | * `set`: The default "set" field type, where rows correspond to specific 98 | values. 99 | * `mutex`: The "mutex" field type, which is like a set, only it enforces 100 | that only one row is set per column. 101 | * `int`: The binary-representation field type, usable for range queries. 102 | * `time`: The "time" field type, which is a set with additional optional 103 | timestamp information. 104 | 105 | All fields share some common parameters: 106 | 107 | * `zipfV`, `zipfS`: the V/S values used for a Zipf distribution of values. 108 | * `min`, `max`: Minimum and maximum values. For int fields, this is the value 109 | range; for set/mutex fields, it's the range of rows that will be 110 | potentially generated. 111 | * `sourceIndex`: An index to use for values; the value range will be the 112 | source index's column range. If source index has 100,000 columns, this 113 | is equivalent to "min: 0, max: 99999". 114 | * `density`: The field's base density of bits set. For a set, this density 115 | applies to each row independently; for a mutex or int field, it 116 | determines how many columns should have a value set. 117 | * `valueRule`: "linear" or "zipf". Exact interpretation varies by field type, 118 | but "linear" indicates that all rows should have the same density of 119 | values, while "zipf" indicates that they should follow a Zipf distribution. 120 | 121 | #### Set/Mutex Fields 122 | 123 | Set and mutex fields can also configure a cache type: 124 | 125 | * `cache`: Cache type, one of "lru" or "none". 126 | 127 | ##### Set/Time Fields 128 | 129 | Set (and time) fields can be generated either in row-major order (generate 130 | one row at a time for all columns) or column-major order (generate all rows for 131 | each column). 132 | 133 | * `dimensionOrder`: string, one of "row" or "column". default is "row". 134 | * `quantum`: string, one of "Y", "YM", "YMD", or "YMDH". Valid only for 135 | time fields. Default is "YMDH". 136 | 137 | Zipf parameters: The first row will have bits set based on the base density 138 | provided. Following rows will follow the Zipf distribution's probabilities. 139 | For instance, with v=2, s=2, the k=0 probability is proportional to 140 | `(2+0)**(-2)` (1/4), and the k=1 probability is proportional to 141 | `(2+1)**(-2)` (1/9). Thus, the probability of a bit being set in the k=1 row is 142 | 4/9 the base density. 143 | 144 | The final set of bits does not depend on whether values were computed in 145 | rowMajor order. (This guarantee is slightly weaker than other guarantees.) 146 | 147 | ##### Mutex Fields 148 | 149 | Zipf parameters: This just follows the behavior of the Zipf generator in 150 | `math/rand`. A single value is determined for each column, determining which 151 | bit is set. 152 | 153 | #### Int Fields 154 | 155 | By default, every member of a int field is set to a random value within the 156 | range. 157 | 158 | Zipf parameters: This follows the behavior of the Zipf generator in `math/rand`, 159 | with an offset of the minimum value. For instance, a field with min/max of 160 | 10/20 behaves exactly like a field with a min/max of 0/10, with 10 added to 161 | each value. 162 | 163 | ### Workloads 164 | 165 | A workload describes a named series of steps, which apply to indexes 166 | and fields previously described. Workloads don't have to be in the same 167 | spec files as the indexes and fields they refer to. Workloads are defined 168 | in a top-level array, usually using `[[workloads]]` to refer to them. 169 | Workloads are sequential. They have the following attributes: 170 | 171 | * `name`: The name of the workload. 172 | * `description`: A description of the workload. 173 | * `threadCount`: Number of importer threads to use in imports. 174 | * `batchSize`: The default size of import batches (number of records before 175 | the client transmits records to the server). 176 | 177 | Each workload also has an array of tasks, which are all executed in parallel. 178 | 179 | #### Tasks 180 | 181 | Each task outlines a specific set of data to populate in a given field. 182 | 183 | * `index`, `field`: the index and field names to identify the field to be 184 | populated. The index name should match the name in the spec, not including 185 | any prefixes. 186 | * `seed`: the random number seed to use when populating this field. Defaults 187 | to the seed for the field's parent index. 188 | * `columns`: the number of columns to populate. default: populate the 189 | entire field, using the index's columns. 190 | * `columnOffset`: column to start with. The special value "append" means 191 | to create new columns starting immediately after the highest column 192 | previously created. 193 | * `columnOrder`: "linear", "stride", "zipf", or "permute" (default linear). 194 | Indicates order in which to generate column values. 195 | * `stride`: The stride to use with a columnOrder of "stride". 196 | * `rowOrder`: "linear" or "permute" (default linear). Determines the order 197 | in which row values are computed, for set fields, or whether to permute 198 | generated values, for mutex or int fields. 199 | * `batchSize`: Size of import batches (overrides, but defaults to, 200 | batch's batchSize). 201 | * `stamp`: Controls timestamp behavior. One of "none", "random", "increasing". 202 | * `stampRange`: A duration over which to spread timestamps when generating 203 | them. 204 | * `stampStart`: A specific time to start timestamps at. Defaults to current 205 | time minus stamp range. 206 | * `zipfV`, `zipfS`: V and S values for a zipf distribution of columns. 207 | * `zipfRange`: The range to use for the zipf distribution (defaults to 208 | `columns`). 209 | 210 | As a special case, when `columnOffset` is "append" and `columnOrder` is "zipf", 211 | values are randomly generated using a zipf distribution over [0,`zipfRange`). 212 | These values are then subtracted from the *next* column number -- the lowest 213 | column number not currently known to `imagine`, to produce a range which might 214 | indicate updates to existing columns, or might indicate a new column. A value 215 | is generated for each column. Note that this will pick values the same way 216 | mutex or int fields do, rather than generating all the values for each column, 217 | and the same column may be generated more than once. This behavior attempts 218 | to simulate likely behavior for event streams. 219 | 220 | The "zipf" `columnOrder` is not supported except with `columnOffset` of 221 | "append", and the Zipf parameters are not defined for any other column order. 222 | 223 | ## Data Generation 224 | 225 | Reproducible data generation means being able to generate the same bits every 226 | time. To this end, we use a seekable PRNG -- you can specify an offset into 227 | its stream and get the same bits every time. See the related package 228 | `apophenia` for details. 229 | 230 | Set values are computed using `apophenia.Weighted`, with `seed` equal to the 231 | row number, and `id` equal to the column number. 232 | 233 | Mutex/Int: Mutex and int fields both generate a single value in their range. 234 | Linear values are computed using `row` 0, `iter` 0, and are computed as 235 | `min + U % (max - min)`. (For a mutex, the minimum value is always 0.) Zipf 236 | values are computed using iterated values for `row` 0 as inputs to another 237 | algorithm which treats them as [0,1) range values. If RowOrder is set to 238 | `permute`, the permutation is computed using permutation row 2. 239 | 240 | Permuted column values are generated by requesting a permutation generator for 241 | row 0 with the given seed. Permuted row values for sets are generated using 242 | a permutation generator for row 1. 243 | -------------------------------------------------------------------------------- /dx/compare.go: -------------------------------------------------------------------------------- 1 | package dx 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "reflect" 10 | "text/tabwriter" 11 | "time" 12 | 13 | "github.com/pkg/errors" 14 | "github.com/spf13/cobra" 15 | ) 16 | 17 | // NewCompareCommand initializes a new compare command for dx. 18 | func NewCompareCommand(m *Main) *cobra.Command { 19 | compareCmd := &cobra.Command{ 20 | Use: "compare", 21 | Short: "compare two dx results", 22 | Long: `Compare two result files generated by "dx ingest" or "dx query".`, 23 | PreRun: func(cmd *cobra.Command, args []string) { 24 | if err := validateComparisonArgs(args); err != nil { 25 | if m.Verbose { 26 | fmt.Printf("%+v\n", err) 27 | } else { 28 | fmt.Printf("%v\n", err) 29 | } 30 | os.Exit(1) 31 | } 32 | }, 33 | Run: func(cmd *cobra.Command, args []string) { 34 | if err := ExecuteComparison(args[0], args[1]); err != nil { 35 | if m.Verbose { 36 | fmt.Printf("%+v\n", err) 37 | } else { 38 | fmt.Printf("%v\n", err) 39 | } 40 | os.Exit(1) 41 | } 42 | }, 43 | } 44 | 45 | return compareCmd 46 | } 47 | 48 | // validateCompareArgs validates that the args passed to compare command has length exactly 2 49 | // and are valid filenames. 50 | func validateComparisonArgs(args []string) error { 51 | if len(args) != 2 { 52 | return errors.New("need exactly two files to compare") 53 | } 54 | fileExists, err := checkFileExists(args[0]) 55 | if err != nil { 56 | return errors.Wrapf(err, "error verifying file %s exists", args[0]) 57 | } 58 | if !fileExists { 59 | return errors.Errorf("%s does not exist or is not a file", args[0]) 60 | } 61 | fileExists, err = checkFileExists(args[1]) 62 | if err != nil { 63 | return errors.Wrapf(err, "error verifying file %s exists", args[1]) 64 | } 65 | if !fileExists { 66 | return errors.Errorf("%s does not exist or is not a file", args[1]) 67 | } 68 | return nil 69 | } 70 | 71 | // Comparison struct contains the information of a comparison. RunTime is the total time it took for the run to complete. 72 | // The TotalTime is the total of all the individual times of each operation, which may have been running in separate goroutines. 73 | type Comparison struct { 74 | Type string 75 | RunTime1 time.Duration 76 | RunTime2 time.Duration 77 | RunTimeDelta float64 78 | TotalTime1 time.Duration 79 | TotalTime2 time.Duration 80 | TotalTimeDelta float64 81 | ThreadCount1 int 82 | ThreadCount2 int 83 | Accuracy float64 84 | Size int64 85 | } 86 | 87 | // ExecuteComparison executes a comparison on the two files. 88 | func ExecuteComparison(file1, file2 string) error { 89 | benchChan1 := make(chan *Benchmark) 90 | benchChan2 := make(chan *Benchmark) 91 | cmdTypeChan1 := make(chan string) 92 | cmdTypeChan2 := make(chan string) 93 | 94 | go readResults(file1, benchChan1, cmdTypeChan1) 95 | go readResults(file2, benchChan2, cmdTypeChan2) 96 | 97 | cmdType1 := <-cmdTypeChan1 98 | cmdType2 := <-cmdTypeChan2 99 | 100 | if cmdType1 != cmdType2 { 101 | return errors.Errorf("results files types don't match: %v and %v", cmdType1, cmdType2) 102 | } 103 | 104 | switch cmdType1 { 105 | case cmdIngest: 106 | 107 | b1 := <-benchChan1 108 | b2 := <-benchChan2 109 | 110 | // compare ingest 111 | comparison, err := compareIngest(b1, b2) 112 | if err != nil { 113 | return errors.Wrap(err, "error comparing ingest") 114 | } 115 | // print results 116 | if err := printIngestResults(comparison); err != nil { 117 | return errors.Wrap(err, "error printing ingest results") 118 | } 119 | return nil 120 | case cmdQuery: 121 | // consolidate benches 122 | benches1 := make([]*Benchmark, 0) 123 | benches2 := make([]*Benchmark, 0) 124 | for b := range benchChan1 { 125 | benches1 = append(benches1, b) 126 | } 127 | for b := range benchChan2 { 128 | benches2 = append(benches2, b) 129 | } 130 | 131 | // compare queries 132 | comparison, err := compareQueries(benches1, benches2) 133 | if err != nil { 134 | return errors.Wrap(err, "error comparing queries") 135 | } 136 | 137 | // print results 138 | if err := printQueryResults(comparison); err != nil { 139 | return errors.Wrap(err, "error printing query results") 140 | } 141 | return nil 142 | // even though there is cmdTotal, it must never be at the start of a file, so that is an error. 143 | default: 144 | return errors.Errorf("invalid command type: %v", cmdType1) 145 | } 146 | } 147 | 148 | // compareQueries returns the total time of all the individual queries, as well as the total time of the run and additional analysis. 149 | func compareQueries(benches1, benches2 []*Benchmark) (*Comparison, error) { 150 | var runTime1, runTime2 time.Duration 151 | 152 | // queryMap only contains valid queries from benches1 153 | queryMap := make(map[int64]*Query) 154 | for _, b1 := range benches1 { 155 | if b1.Type == cmdTotal { 156 | runTime1 = b1.Time.Duration 157 | continue 158 | } 159 | if isValidQuery(b1.Query) { 160 | queryMap[b1.Query.ID] = b1.Query 161 | } else { 162 | queryT := b1.Query.Type.String() 163 | log.Printf("invalid query from first file: ID: %v, %s %v from index: %s field: %s\n", 164 | b1.Query.ID, queryT, b1.Query.Rows, b1.Query.IndexName, b1.Query.FieldName) 165 | } 166 | } 167 | 168 | // validQueries is the number of queries that successfully ran on both clusters. 169 | // This is not equivalent to the number of queries with correct results. 170 | var validQueries int64 171 | var numCorrect int64 172 | var totalTime1, totalTime2 time.Duration 173 | 174 | for _, b2 := range benches2 { 175 | if b2.Type == cmdTotal { 176 | runTime2 = b2.Time.Duration 177 | continue 178 | } 179 | 180 | query2 := b2.Query 181 | // here we assume that same IDs mean the same queries. 182 | // if query1 is found, it must already be a valid query. 183 | if query1, found := queryMap[query2.ID]; found { 184 | if isValidQuery(query2) { 185 | if queryResultsEqual(query1, query2) { 186 | numCorrect++ 187 | } else { 188 | // valid query2 but unequal results 189 | queryT := query2.Type.String() 190 | log.Printf("unequal results: ID: %v, %s %v from index: %s field: %s. Got results %v and %v, and result counts %v and %v\n", 191 | query1.ID, queryT, query1.Rows, query1.IndexName, query1.FieldName, query1.Result, query2.Result, query1.ResultCount, query2.ResultCount) 192 | } 193 | } else { 194 | // invalid query2 195 | queryT := query2.Type.String() 196 | log.Printf("invalid query from second file: ID: %v, %s %v from index: %s field: %s\n", 197 | query2.ID, queryT, query2.Rows, query2.IndexName, query2.FieldName) 198 | } 199 | 200 | // regardless of validity or equality of resuls, add time for valid queries. 201 | totalTime1 += query1.Time.Duration 202 | totalTime2 += query2.Time.Duration 203 | validQueries++ 204 | } else { 205 | // first result does not contain this query 206 | queryT := query2.Type.String() 207 | log.Printf("query found in second file but not in first: ID: %v, %s %v from index: %s field: %s\n", 208 | query2.ID, queryT, query2.Rows, query2.IndexName, query2.FieldName) 209 | } 210 | } 211 | 212 | totalTimeDelta, err := compareTime(totalTime1, totalTime2) 213 | if err != nil { 214 | return nil, errors.Wrap(err, "error comparing time") 215 | } 216 | runTimeDelta, err := compareTime(runTime1, runTime2) 217 | if err != nil { 218 | return nil, errors.Wrap(err, "error comparing time") 219 | } 220 | accuracy := float64(numCorrect) / float64(validQueries) 221 | 222 | threadCount1 := benches1[0].ThreadCount 223 | threadCount2 := benches2[0].ThreadCount 224 | 225 | return &Comparison{ 226 | Type: cmdQuery, 227 | RunTime1: runTime1, 228 | RunTime2: runTime2, 229 | RunTimeDelta: runTimeDelta, 230 | TotalTime1: totalTime1, 231 | TotalTime2: totalTime2, 232 | TotalTimeDelta: totalTimeDelta, 233 | ThreadCount1: threadCount1, 234 | ThreadCount2: threadCount2, 235 | Accuracy: accuracy, 236 | Size: validQueries, 237 | }, nil 238 | } 239 | 240 | func compareIngest(b1, b2 *Benchmark) (*Comparison, error) { 241 | // analyze time 242 | timeDelta, err := compareTime(b1.Time.Duration, b2.Time.Duration) 243 | if err != nil { 244 | return nil, errors.Wrap(err, "error comparing time") 245 | } 246 | 247 | return &Comparison{ 248 | Type: cmdIngest, 249 | RunTime1: b1.Time.Duration, 250 | RunTime2: b2.Time.Duration, 251 | RunTimeDelta: timeDelta, 252 | ThreadCount1: b1.ThreadCount, 253 | ThreadCount2: b2.ThreadCount, 254 | }, nil 255 | } 256 | 257 | // compareTime takes two durations and returns the delta. 258 | func compareTime(time1, time2 time.Duration) (float64, error) { 259 | if time1 == 0 { 260 | return 0, errors.New("time1 is zero") 261 | } 262 | if time2 == 0 { 263 | return 0, errors.New("time2 is zero") 264 | } 265 | timeDelta := float64(time2-time1) / float64(time1) 266 | return timeDelta, nil 267 | } 268 | 269 | // queryResultsEqual compares the results of two valid queries. If both queries have 270 | // different result types (i.e. result and resultCount), we count the number of results 271 | // and compare the two counts. Results are prioritized over resultCounts. 272 | func queryResultsEqual(query1, query2 *Query) bool { 273 | // one of query1.Result and query1.ResultCount is not nil 274 | if query1.Result == nil { 275 | if query2.Result == nil { 276 | return *query1.ResultCount == *query2.ResultCount 277 | } 278 | return *query1.ResultCount == int64(len(query2.Result.Columns)) 279 | } 280 | // else, query1.Result is not nil 281 | if query2.Result == nil { 282 | return int64(len(query1.Result.Columns)) == *query2.ResultCount 283 | } 284 | return reflect.DeepEqual(query1.Result, query2.Result) 285 | } 286 | 287 | // isValidQuery checks if a query is valid. A valid query has at least one of 288 | // result or resultCount as a non-nil value. 289 | func isValidQuery(query *Query) bool { 290 | if query == nil { 291 | return false 292 | } 293 | if query.Result == nil && query.ResultCount == nil { 294 | return false 295 | } 296 | return true 297 | } 298 | 299 | // readResults streams the decoded benchmarks from file into benchChan. This also checks 300 | // the bench type of the first decoded benchmark and sends this to the cmdTypeChan. The function 301 | // closes both channels if no errors are returned. 302 | func readResults(file string, benchChan chan *Benchmark, cmdTypeChan chan string) { 303 | fileReader, err := os.Open(file) 304 | if err != nil { 305 | log.Fatalf("error opening file %v: %v", file, err) 306 | } 307 | 308 | decoder := json.NewDecoder(fileReader) 309 | 310 | // check first benchmark for its type 311 | bench := NewBenchmark() 312 | if err := decoder.Decode(&bench); err == io.EOF { 313 | log.Fatalf("empty file %v: %v", file, err) 314 | } else if err != nil { 315 | log.Fatalf("error decoding json from %v: %v", file, err) 316 | } 317 | 318 | cmdTypeChan <- bench.Type 319 | close(cmdTypeChan) 320 | benchChan <- bench 321 | 322 | // keep reading until EOF or error 323 | for { 324 | bench := NewBenchmark() 325 | if err := decoder.Decode(&bench); err == io.EOF { 326 | break 327 | } else if err != nil { 328 | log.Fatalf("error decoding json from %v: %v", file, err) 329 | } 330 | benchChan <- bench 331 | } 332 | close(benchChan) 333 | } 334 | 335 | // printIngestResults prints the results of dx ingest. 336 | func printIngestResults(c *Comparison) error { 337 | w := new(tabwriter.Writer) 338 | w.Init(os.Stdout, 10, 5, 5, ' ', tabwriter.AlignRight) 339 | 340 | // print in percentage 341 | delta := c.RunTimeDelta * 100 342 | 343 | fmt.Fprintf(w, "ingest\t\tfirst-threads%v\tsecond-threads%v\tdelta\t\n", c.ThreadCount1, c.ThreadCount2) 344 | fmt.Fprintf(w, "\t\t%v\t%v\t%.1f%%\t\n", c.RunTime1, c.RunTime2, delta) 345 | fmt.Fprintln(w) 346 | if err := w.Flush(); err != nil { 347 | return errors.Wrap(err, "could not flush writer") 348 | } 349 | return nil 350 | } 351 | 352 | // printQueryResults prints the results of dx query. 353 | func printQueryResults(c *Comparison) error { 354 | w := new(tabwriter.Writer) 355 | w.Init(os.Stdout, 10, 5, 5, ' ', tabwriter.AlignRight) 356 | fmt.Fprintf(w, "queries\taccuracy\tfirst-threads%v\tsecond-threads%v\tdelta\t\n", c.ThreadCount1, c.ThreadCount2) 357 | 358 | // print in percentages 359 | accuracy := c.Accuracy * 100 360 | runTimeDelta := c.RunTimeDelta * 100 361 | totalTimeDelta := c.TotalTimeDelta * 100 362 | 363 | // average ms/op 364 | ave1 := (float64(c.TotalTime1) / float64(c.Size)) / float64(1000000) 365 | ave2 := (float64(c.TotalTime2) / float64(c.Size)) / float64(1000000) 366 | fmt.Fprintf(w, "%v\t%.1f%%\t%.3f ms/op\t%.3f ms/op\t%.1f%%\t\n", c.Size, accuracy, ave1, ave2, totalTimeDelta) 367 | fmt.Fprintf(w, "%v\t%v\t%v\t%v\t%.1f%%\t\n", "TOTAL", "", c.RunTime1, c.RunTime2, runTimeDelta) 368 | fmt.Fprintln(w) 369 | if err := w.Flush(); err != nil { 370 | return errors.Wrap(err, "could not flush writer") 371 | } 372 | return nil 373 | } 374 | 375 | // checkFileExists checks whether a file exists at path. 376 | func checkFileExists(path string) (bool, error) { 377 | fileInfo, err := os.Stat(path) 378 | if os.IsNotExist(err) { 379 | return false, nil 380 | } else if err != nil { 381 | return false, errors.Wrap(err, "error statting path") 382 | } 383 | fileMode := fileInfo.Mode() 384 | return fileMode.IsRegular(), nil 385 | } 386 | -------------------------------------------------------------------------------- /dx/testdata/query/0: -------------------------------------------------------------------------------- 1 | {"type":"query","time":"1.966745ms","threadcount":1,"query":{"id":0,"query":1,"index":"dx-users","field":"numbers","rows":[21,51],"time":"1.966745ms","resultcount":82}} 2 | {"type":"query","time":"601.715µs","threadcount":1,"query":{"id":1,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"601.715µs","resultcount":0}} 3 | {"type":"query","time":"1.433244ms","threadcount":1,"query":{"id":2,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.433244ms","resultcount":473}} 4 | {"type":"query","time":"544.12µs","threadcount":1,"query":{"id":3,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"544.12µs","resultcount":0}} 5 | {"type":"query","time":"750.829µs","threadcount":1,"query":{"id":4,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"750.829µs","resultcount":0}} 6 | {"type":"query","time":"750.931µs","threadcount":1,"query":{"id":5,"query":3,"index":"imaginary-users","field":"numbers","rows":[0,0],"time":"750.931µs","resultcount":0}} 7 | {"type":"query","time":"1.584166ms","threadcount":1,"query":{"id":6,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.584166ms","resultcount":0}} 8 | {"type":"query","time":"654.381µs","threadcount":1,"query":{"id":7,"query":2,"index":"dx","field":"candidate","rows":[0,0],"time":"654.381µs","resultcount":0}} 9 | {"type":"query","time":"1.312452ms","threadcount":1,"query":{"id":8,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.312452ms","resultcount":0}} 10 | {"type":"query","time":"505.567µs","threadcount":1,"query":{"id":9,"query":1,"index":"dx-users","field":"numbers","rows":[62,110],"time":"505.567µs","resultcount":81}} 11 | {"type":"query","time":"1.286691ms","threadcount":1,"query":{"id":10,"query":0,"index":"dx","field":"candidate","rows":[0,0],"time":"1.286691ms","resultcount":0}} 12 | {"type":"query","time":"464.001µs","threadcount":1,"query":{"id":11,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"464.001µs","resultcount":0}} 13 | {"type":"query","time":"988.405µs","threadcount":1,"query":{"id":12,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"988.405µs","resultcount":0}} 14 | {"type":"query","time":"636.468µs","threadcount":1,"query":{"id":13,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"636.468µs","resultcount":0}} 15 | {"type":"query","time":"1.15774ms","threadcount":1,"query":{"id":14,"query":3,"index":"imaginary-users","field":"numbers","rows":[0,0],"time":"1.15774ms","resultcount":0}} 16 | {"type":"query","time":"1.545607ms","threadcount":1,"query":{"id":15,"query":0,"index":"dx-index","field":"field","rows":[301,19],"time":"1.545607ms","resultcount":473}} 17 | {"type":"query","time":"640.937µs","threadcount":1,"query":{"id":16,"query":1,"index":"dx","field":"candidate","rows":[0,0],"time":"640.937µs","resultcount":0}} 18 | {"type":"query","time":"1.092996ms","threadcount":1,"query":{"id":17,"query":2,"index":"dx-index","field":"field","rows":[55,41],"time":"1.092996ms","resultcount":0}} 19 | {"type":"query","time":"445.509µs","threadcount":1,"query":{"id":18,"query":0,"index":"dx-users","field":"numbers","rows":[31,18],"time":"445.509µs","resultcount":63}} 20 | {"type":"query","time":"1.690497ms","threadcount":1,"query":{"id":19,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"1.690497ms","resultcount":0}} 21 | {"type":"query","time":"453.54µs","threadcount":1,"query":{"id":20,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"453.54µs","resultcount":0}} 22 | {"type":"query","time":"1.271531ms","threadcount":1,"query":{"id":21,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.271531ms","resultcount":473}} 23 | {"type":"query","time":"427.067µs","threadcount":1,"query":{"id":22,"query":0,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"427.067µs","resultcount":7}} 24 | {"type":"query","time":"1.053598ms","threadcount":1,"query":{"id":23,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.053598ms","resultcount":0}} 25 | {"type":"query","time":"558.563µs","threadcount":1,"query":{"id":24,"query":2,"index":"ibench","field":"fbench","rows":[0,0],"time":"558.563µs","resultcount":0}} 26 | {"type":"query","time":"1.349004ms","threadcount":1,"query":{"id":25,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"1.349004ms","resultcount":0}} 27 | {"type":"query","time":"500.181µs","threadcount":1,"query":{"id":26,"query":3,"index":"dx-users","field":"numbers","rows":[111,188],"time":"500.181µs","resultcount":7}} 28 | {"type":"query","time":"1.002989ms","threadcount":1,"query":{"id":27,"query":3,"index":"dx","field":"candidate","rows":[0,0],"time":"1.002989ms","resultcount":0}} 29 | {"type":"query","time":"830.771µs","threadcount":1,"query":{"id":28,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"830.771µs","resultcount":473}} 30 | {"type":"query","time":"414.337µs","threadcount":1,"query":{"id":29,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"414.337µs","resultcount":0}} 31 | {"type":"query","time":"1.06754ms","threadcount":1,"query":{"id":30,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.06754ms","resultcount":0}} 32 | {"type":"query","time":"446.4µs","threadcount":1,"query":{"id":31,"query":2,"index":"dx","field":"candidate","rows":[0,0],"time":"446.4µs","resultcount":0}} 33 | {"type":"query","time":"1.03081ms","threadcount":1,"query":{"id":32,"query":2,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.03081ms","resultcount":0}} 34 | {"type":"query","time":"701.229µs","threadcount":1,"query":{"id":33,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"701.229µs","resultcount":473}} 35 | {"type":"query","time":"1.341858ms","threadcount":1,"query":{"id":34,"query":0,"index":"dx-users","field":"numbers","rows":[44,165],"time":"1.341858ms","resultcount":62}} 36 | {"type":"query","time":"515.69µs","threadcount":1,"query":{"id":35,"query":3,"index":"dx","field":"server1","rows":[0,0],"time":"515.69µs","resultcount":0}} 37 | {"type":"query","time":"1.107886ms","threadcount":1,"query":{"id":36,"query":0,"index":"dx","field":"candidate","rows":[0,0],"time":"1.107886ms","resultcount":0}} 38 | {"type":"query","time":"1.041797ms","threadcount":1,"query":{"id":37,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.041797ms","resultcount":0}} 39 | {"type":"query","time":"467.045µs","threadcount":1,"query":{"id":38,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"467.045µs","resultcount":473}} 40 | {"type":"query","time":"969.646µs","threadcount":1,"query":{"id":39,"query":0,"index":"dx-index","field":"field","rows":[450,56],"time":"969.646µs","resultcount":473}} 41 | {"type":"query","time":"456.993µs","threadcount":1,"query":{"id":40,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"456.993µs","resultcount":0}} 42 | {"type":"query","time":"890.36µs","threadcount":1,"query":{"id":41,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"890.36µs","resultcount":0}} 43 | {"type":"query","time":"361.448µs","threadcount":1,"query":{"id":42,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"361.448µs","resultcount":0}} 44 | {"type":"query","time":"818.009µs","threadcount":1,"query":{"id":43,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"818.009µs","resultcount":473}} 45 | {"type":"query","time":"393.876µs","threadcount":1,"query":{"id":44,"query":1,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"393.876µs","resultcount":9}} 46 | {"type":"query","time":"1.012226ms","threadcount":1,"query":{"id":45,"query":0,"index":"dx-index","field":"field","rows":[967,276],"time":"1.012226ms","resultcount":473}} 47 | {"type":"query","time":"443.125µs","threadcount":1,"query":{"id":46,"query":1,"index":"dx-index","field":"field","rows":[700,576],"time":"443.125µs","resultcount":473}} 48 | {"type":"query","time":"506.99µs","threadcount":1,"query":{"id":47,"query":0,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"506.99µs","resultcount":8}} 49 | {"type":"query","time":"1.650675ms","threadcount":1,"query":{"id":48,"query":0,"index":"bla","field":"aint","rows":[0,0],"time":"1.650675ms","resultcount":0}} 50 | {"type":"query","time":"771.629µs","threadcount":1,"query":{"id":49,"query":1,"index":"dx-users","field":"numbers","rows":[59,80],"time":"771.629µs","resultcount":81}} 51 | {"type":"query","time":"561.013µs","threadcount":1,"query":{"id":50,"query":3,"index":"dx-users","field":"numbers","rows":[62,66],"time":"561.013µs","resultcount":14}} 52 | {"type":"query","time":"1.412485ms","threadcount":1,"query":{"id":51,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.412485ms","resultcount":473}} 53 | {"type":"query","time":"630.564µs","threadcount":1,"query":{"id":52,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"630.564µs","resultcount":0}} 54 | {"type":"query","time":"1.375873ms","threadcount":1,"query":{"id":53,"query":0,"index":"imaginary-users","field":"numbers","rows":[0,1],"time":"1.375873ms","resultcount":7}} 55 | {"type":"query","time":"1.260144ms","threadcount":1,"query":{"id":54,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.260144ms","resultcount":0}} 56 | {"type":"query","time":"630.756µs","threadcount":1,"query":{"id":55,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"630.756µs","resultcount":0}} 57 | {"type":"query","time":"1.652645ms","threadcount":1,"query":{"id":56,"query":1,"index":"dx-users","field":"numbers","rows":[103,189],"time":"1.652645ms","resultcount":77}} 58 | {"type":"query","time":"466.839µs","threadcount":1,"query":{"id":57,"query":0,"index":"dx-index","field":"field","rows":[199,98],"time":"466.839µs","resultcount":473}} 59 | {"type":"query","time":"1.352384ms","threadcount":1,"query":{"id":58,"query":3,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"1.352384ms","resultcount":0}} 60 | {"type":"query","time":"485.024µs","threadcount":1,"query":{"id":59,"query":3,"index":"dx","field":"candidate","rows":[0,0],"time":"485.024µs","resultcount":0}} 61 | {"type":"query","time":"1.328906ms","threadcount":1,"query":{"id":60,"query":2,"index":"dx-users","field":"numbers","rows":[52,156],"time":"1.328906ms","resultcount":25}} 62 | {"type":"query","time":"452.404µs","threadcount":1,"query":{"id":61,"query":1,"index":"imaginary-index","field":"field","rows":[0,0],"time":"452.404µs","resultcount":473}} 63 | {"type":"query","time":"1.16612ms","threadcount":1,"query":{"id":62,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.16612ms","resultcount":0}} 64 | {"type":"query","time":"420.155µs","threadcount":1,"query":{"id":63,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"420.155µs","resultcount":0}} 65 | {"type":"query","time":"991.668µs","threadcount":1,"query":{"id":64,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"991.668µs","resultcount":0}} 66 | {"type":"query","time":"1.155675ms","threadcount":1,"query":{"id":65,"query":0,"index":"dx","field":"server2","rows":[0,0],"time":"1.155675ms","resultcount":0}} 67 | {"type":"query","time":"479.89µs","threadcount":1,"query":{"id":66,"query":1,"index":"dx-index","field":"field","rows":[509,100],"time":"479.89µs","resultcount":473}} 68 | {"type":"query","time":"1.05993ms","threadcount":1,"query":{"id":67,"query":0,"index":"dx-users","field":"numbers","rows":[178,7],"time":"1.05993ms","resultcount":62}} 69 | {"type":"query","time":"422.199µs","threadcount":1,"query":{"id":68,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"422.199µs","resultcount":0}} 70 | {"type":"query","time":"930.148µs","threadcount":1,"query":{"id":69,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"930.148µs","resultcount":0}} 71 | {"type":"query","time":"689.416µs","threadcount":1,"query":{"id":70,"query":0,"index":"dx-users","field":"numbers","rows":[96,139],"time":"689.416µs","resultcount":62}} 72 | {"type":"query","time":"1.525221ms","threadcount":1,"query":{"id":71,"query":3,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.525221ms","resultcount":0}} 73 | {"type":"query","time":"505.411µs","threadcount":1,"query":{"id":72,"query":0,"index":"dx","field":"server1","rows":[0,0],"time":"505.411µs","resultcount":0}} 74 | {"type":"query","time":"1.175815ms","threadcount":1,"query":{"id":73,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"1.175815ms","resultcount":0}} 75 | {"type":"query","time":"1.031795ms","threadcount":1,"query":{"id":74,"query":1,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.031795ms","resultcount":473}} 76 | {"type":"query","time":"493.309µs","threadcount":1,"query":{"id":75,"query":1,"index":"dx","field":"server1","rows":[0,0],"time":"493.309µs","resultcount":0}} 77 | {"type":"query","time":"1.144053ms","threadcount":1,"query":{"id":76,"query":3,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"1.144053ms","resultcount":1}} 78 | {"type":"query","time":"461.322µs","threadcount":1,"query":{"id":77,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"461.322µs","resultcount":0}} 79 | {"type":"query","time":"1.027492ms","threadcount":1,"query":{"id":78,"query":0,"index":"dx","field":"primary","rows":[0,0],"time":"1.027492ms","resultcount":0}} 80 | {"type":"query","time":"403.309µs","threadcount":1,"query":{"id":79,"query":1,"index":"dx-index","field":"field","rows":[808,764],"time":"403.309µs","resultcount":473}} 81 | {"type":"query","time":"1.307595ms","threadcount":1,"query":{"id":80,"query":3,"index":"dx-users","field":"numbers","rows":[129,147],"time":"1.307595ms","resultcount":10}} 82 | {"type":"query","time":"415.849µs","threadcount":1,"query":{"id":81,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"415.849µs","resultcount":0}} 83 | {"type":"query","time":"1.073093ms","threadcount":1,"query":{"id":82,"query":3,"index":"bla","field":"aint","rows":[0,0],"time":"1.073093ms","resultcount":0}} 84 | {"type":"query","time":"579.685µs","threadcount":1,"query":{"id":83,"query":3,"index":"dx-users","field":"numbers","rows":[124,127],"time":"579.685µs","resultcount":8}} 85 | {"type":"query","time":"897.954µs","threadcount":1,"query":{"id":84,"query":1,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"897.954µs","resultcount":8}} 86 | {"type":"query","time":"483.96µs","threadcount":1,"query":{"id":85,"query":3,"index":"dx-users","field":"numbers","rows":[168,187],"time":"483.96µs","resultcount":8}} 87 | {"type":"query","time":"1.277825ms","threadcount":1,"query":{"id":86,"query":3,"index":"bla","field":"aint","rows":[0,0],"time":"1.277825ms","resultcount":0}} 88 | {"type":"query","time":"1.072135ms","threadcount":1,"query":{"id":87,"query":2,"index":"dx-users","field":"numbers","rows":[15,8],"time":"1.072135ms","resultcount":16}} 89 | {"type":"query","time":"409.217µs","threadcount":1,"query":{"id":88,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"409.217µs","resultcount":0}} 90 | {"type":"query","time":"959.141µs","threadcount":1,"query":{"id":89,"query":2,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"959.141µs","resultcount":2}} 91 | {"type":"query","time":"441.65µs","threadcount":1,"query":{"id":90,"query":1,"index":"dx","field":"server2","rows":[0,0],"time":"441.65µs","resultcount":0}} 92 | {"type":"query","time":"1.233297ms","threadcount":1,"query":{"id":91,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"1.233297ms","resultcount":0}} 93 | {"type":"query","time":"469.64µs","threadcount":1,"query":{"id":92,"query":2,"index":"dx-index","field":"field","rows":[86,262],"time":"469.64µs","resultcount":0}} 94 | {"type":"query","time":"1.0249ms","threadcount":1,"query":{"id":93,"query":0,"index":"bla","field":"aint","rows":[0,0],"time":"1.0249ms","resultcount":0}} 95 | {"type":"query","time":"392.983µs","threadcount":1,"query":{"id":94,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"392.983µs","resultcount":0}} 96 | {"type":"query","time":"896.985µs","threadcount":1,"query":{"id":95,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"896.985µs","resultcount":0}} 97 | {"type":"query","time":"429.046µs","threadcount":1,"query":{"id":96,"query":2,"index":"dx-users","field":"numbers","rows":[49,43],"time":"429.046µs","resultcount":14}} 98 | {"type":"query","time":"896.324µs","threadcount":1,"query":{"id":97,"query":3,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"896.324µs","resultcount":0}} 99 | {"type":"query","time":"510.944µs","threadcount":1,"query":{"id":98,"query":3,"index":"ibench","field":"fbench","rows":[0,0],"time":"510.944µs","resultcount":0}} 100 | {"type":"query","time":"1.035103ms","threadcount":1,"query":{"id":99,"query":0,"index":"bla","field":"aint","rows":[0,0],"time":"1.035103ms","resultcount":0}} 101 | {"type":"total","time":"169.797748ms","threadcount":1,"query":{"id":-1,"query":0,"index":"","field":"","rows":null,"time":"169.797748ms"}} 102 | --------------------------------------------------------------------------------