├── dx
    ├── .gitignore
    ├── testdata
    │   ├── ingest
    │   │   ├── 0
    │   │   └── 1
    │   ├── spec
    │   │   └── spec.toml
    │   └── query
    │   │   └── 0
    ├── query_test.go
    ├── compare_test.go
    ├── cmd_test.go
    ├── ingest.go
    ├── README.md
    ├── main.go
    └── compare.go
├── imagine
    ├── .gitignore
    ├── samples
    │   ├── long-tail-import.toml
    │   ├── small-mutex-import.toml
    │   ├── small-mutex.toml
    │   ├── parallel-import.toml
    │   ├── low-cardinality.toml
    │   ├── games.toml
    │   ├── long-tail.toml
    │   ├── age.toml
    │   ├── parallel.toml
    │   ├── sports.toml
    │   ├── time.toml
    │   ├── README.md
    │   ├── bsi.toml
    │   └── students.toml
    ├── sample_fast.toml
    ├── sample.toml
    ├── enums_stamptype.go
    ├── enums_densitytype.go
    ├── enums_cachetype.go
    ├── enums_fieldtype.go
    ├── enums_valueorder.go
    ├── enums_timequantum.go
    ├── enums_verifytype.go
    ├── enums_dimensionorder.go
    ├── sample.md
    ├── generators_test.go
    └── README.md
├── version.go
├── .gitignore
├── cmd
    ├── imagine
    │   └── main.go
    ├── dx
    │   └── main.go
    └── pi
    │   ├── query.go
    │   ├── slicewidth.go
    │   ├── basic_query.go
    │   ├── diagonal.go
    │   ├── range_query.go
    │   ├── random_query.go
    │   ├── replay.go
    │   ├── tps.go
    │   ├── import.go
    │   ├── import_range.go
    │   ├── random_set.go
    │   ├── bench.go
    │   ├── zipf.go
    │   └── main.go
├── apophenia
    ├── weighted_test.go
    ├── int128_test.go
    ├── zipf_test.go
    ├── weighted.go
    ├── permute_test.go
    ├── README.md
    ├── zipf.go
    ├── apophenia.go
    ├── int128.go
    └── permute.go
├── bench
    ├── query.go
    ├── doc.go
    ├── slicewidth.go
    ├── diagonal.go
    ├── range_query.go
    ├── random_query.go
    ├── bench_test.go
    ├── basic_query.go
    ├── random_set.go
    ├── import.go
    ├── import_range.go
    ├── zipf.go
    ├── tps.go
    └── bench.go
├── README.md
├── LICENSE
├── go.mod
└── Makefile


/dx/.gitignore:
--------------------------------------------------------------------------------
1 | dx
2 | 


--------------------------------------------------------------------------------
/imagine/.gitignore:
--------------------------------------------------------------------------------
1 | /imagine
2 | 


--------------------------------------------------------------------------------
/dx/testdata/ingest/0:
--------------------------------------------------------------------------------
1 | {"type":"ingest","time":"6.918462ms","threadcount":1}


--------------------------------------------------------------------------------
/dx/testdata/ingest/1:
--------------------------------------------------------------------------------
1 | {"type":"ingest","time":"6.481175ms","threadcount":1}


--------------------------------------------------------------------------------
/version.go:
--------------------------------------------------------------------------------
1 | package tools
2 | 
3 | var Version = "v0.0.0"
4 | var BuildTime = "not set"
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | vendor/
2 | /.DS_Store
3 | 
4 | /influx/etc/.DS_Store
5 | 
6 | /influx/.DS_Store
7 | 
8 | /influx/data/.DS_Store
9 | 


--------------------------------------------------------------------------------
/imagine/samples/long-tail-import.toml:
--------------------------------------------------------------------------------
1 | version = "1.0"
2 | [[workloads]]
3 | name = "initial import"
4 | tasks = [
5 | { index = "users", field = "long-tail" }
6 | ]
7 | 


--------------------------------------------------------------------------------
/cmd/imagine/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"github.com/pilosa/tools/imagine"
 5 | )
 6 | 
 7 | func main() {
 8 | 	imagine.NewConfig().Execute()
 9 | }
10 | 


--------------------------------------------------------------------------------
/imagine/samples/small-mutex-import.toml:
--------------------------------------------------------------------------------
1 | version = "1.0"
2 | [[workloads]]
3 | name = "initial import"
4 | tasks = [
5 |     { index = "users", field = "small-mutex", seed = 1 }
6 | ]
7 | 


--------------------------------------------------------------------------------
/imagine/samples/small-mutex.toml:
--------------------------------------------------------------------------------
1 | densityscale = 2097152
2 | version = "1.0"
3 | [indexes.users]
4 | columns = 1000000
5 | fields = [
6 | { name = "small-mutex", type = "mutex", max = 3, density = 0.99, },
7 | ]
8 | 


--------------------------------------------------------------------------------
/imagine/samples/parallel-import.toml:
--------------------------------------------------------------------------------
1 | version = "1.0"
2 | [[workloads]]
3 | name = "initial import"
4 | tasks = [
5 |     { index = "users", field = "long-tail" },
6 |     { index = "users", field = "small-mutex", seed = 1 },
7 | ]
8 | 


--------------------------------------------------------------------------------
/imagine/samples/low-cardinality.toml:
--------------------------------------------------------------------------------
1 | densityscale = 2097152
2 | version = "1.0"
3 | [indexes.users]
4 | columns = 100000
5 | fields = [
6 | { name = "flags", type = "set", max = 200, density = 0.99, valueRule = "zipf", zipfV = 1.0, zipfS = 4.0, },
7 | ]
8 | 


--------------------------------------------------------------------------------
/cmd/dx/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/dx"
 8 | )
 9 | 
10 | func main() {
11 | 	if err := dx.NewRootCmd().Execute(); err != nil {
12 | 		fmt.Printf("%+v", err)
13 | 		os.Exit(1)
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/dx/testdata/spec/spec.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.index]
 4 | columns = 20
 5 | fields = [
 6 | { name = "field", type = "set", min = 0, max = 5, chance = 0.1, density = 1.0, },
 7 | ]
 8 | [[workloads]]
 9 | name = "sample"
10 | threadCount = 1
11 | tasks = [
12 | { index = "index", field = "field", columnOrder = "stride", stride = 3 },
13 | ]


--------------------------------------------------------------------------------
/imagine/samples/games.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.players]
 4 | columns = 5000000000
 5 | fields = [
 6 | { name = "timey", type = "set", max = 10, density = 0.03, },
 7 | ]
 8 | [[workloads]]
 9 | name = "ingest"
10 | threadCount = 1
11 | tasks = [
12 | { index = "players", field = "timey", columnOrder = "permute", columns = 500 },
13 | ]
14 | 


--------------------------------------------------------------------------------
/imagine/samples/long-tail.toml:
--------------------------------------------------------------------------------
1 | densityscale = 2097152
2 | version = "1.0"
3 | [indexes.users]
4 | columns = 10000
5 | fields = [
6 | { name = "long-tail", type = "set", max = 1000, density = 0.00001, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, },
7 | { name = "long-tail", type = "set", chance = 0.01, density = 0.001, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, },
8 | ]
9 | 


--------------------------------------------------------------------------------
/imagine/sample_fast.toml:
--------------------------------------------------------------------------------
 1 | version = "1.0"
 2 | [indexes.users]
 3 | columns = 1000000000
 4 | fields = [
 5 | {name = "numbers", type = "set", min=0, max=10000, zipfA=1.0, fastSparse = true, density = 0.1 },
 6 | ]
 7 | [[workloads]]
 8 | name = "sample"
 9 | threadCount = 1
10 | tasks = [
11 | { index = "users", field = "numbers", columnOrder = "stride", stride = 3, dimensionOrder="row" },
12 | ]
13 | 


--------------------------------------------------------------------------------
/imagine/samples/age.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.students_ts]
 4 | columns = 10000000
 5 | fields = [
 6 | { name = "age", type = "int", max = 100, min=14, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 },
 7 | ]
 8 | [[workloads]]
 9 | name = "ingest"
10 | threadCount = 6
11 | batchSize = 1048576
12 | useRoaring = true
13 | tasks = [{ index = "students_ts", field = "age", seed = 1 }]
14 | 


--------------------------------------------------------------------------------
/imagine/samples/parallel.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.users]
 4 | columns = 1000000
 5 | fields = [
 6 | { name = "long-tail", type = "set", max = 100, density = 0.001, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, },
 7 | { name = "long-tail", type = "set", chance = 0.01, density = 0.1, valueRule = "zipf", zipfV = 9999.0, zipfS = 1.001, },
 8 | { name = "small-mutex", type = "mutex", max = 3, density = 0.99, },
 9 | ]
10 | 


--------------------------------------------------------------------------------
/imagine/samples/sports.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.users]
 4 | columns = 100000000
 5 | fields = [
 6 | { name = "timey", type = "time", max = 100, density = 0.10, valueRule = "zipf", quantum = "YMDH" },
 7 | ]
 8 | [[workloads]]
 9 | name = "ingest"
10 | threadCount = 4
11 | batchSize = 1048576
12 | useRoaring = true
13 | tasks = [
14 |     { index = "users", field = "timey", stamp = "increasing", stampRange = "7560h", columns = 100000000 },
15 | ]
16 | 


--------------------------------------------------------------------------------
/imagine/samples/time.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.testidx]
 4 | columns = 10000000
 5 | fields = [
 6 | { name = "timestamp", type = "time", max=1, density = 1.0, valueRule="zipf", quantum="YMDH" },
 7 | ]
 8 | [[workloads]]
 9 | name = "ingest"
10 | threadCount = 6
11 | batchSize = 1048576
12 | useRoaring = true
13 | tasks = [
14 |     { index = "testidx", field = "timestamp", stamp = "increasing", stampStart = "2019-01-01T00:00:00Z", stampRange = "240h" },
15 | ]
16 | 


--------------------------------------------------------------------------------
/imagine/sample.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.users]
 4 | columns = 100
 5 | fields = [
 6 | # age = { type = "int", min = 10, max = 120 }
 7 | # { name = "income", type = "int", min = 0, max = 640000, density = 1.0 },
 8 | { name = "numbers", type = "set", max = 2, density = 0.01, },
 9 | { name = "numbers", type = "set", chance = 0.05, density = 1.0, },
10 | ]
11 | [[workloads]]
12 | name = "sample"
13 | threadCount = 1
14 | tasks = [
15 | # { index = "users", field = "income", },
16 | { index = "users", field = "numbers", columnOrder = "stride", stride = 3 },
17 | ]
18 | # tasks = [
19 | # { index = "users", field = "income", columnOrder = "permute" },
20 | # ]
21 | 


--------------------------------------------------------------------------------
/apophenia/weighted_test.go:
--------------------------------------------------------------------------------
 1 | package apophenia
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func Benchmark_WeightedDistribution(b *testing.B) {
 9 | 	src := NewSequence(0)
10 | 	w, err := NewWeighted(src)
11 | 	if err != nil {
12 | 		b.Fatalf("couldn't make weighted: %v", err)
13 | 	}
14 | 	scales := []uint64{3, 6, 12, 18, 24, 63}
15 | 	for _, scale := range scales {
16 | 		off := OffsetFor(SequenceWeighted, 0, 0, 0)
17 | 		scaled := uint64(1 << scale)
18 | 		b.Run(fmt.Sprintf("Scale%d", scale), func(b *testing.B) {
19 | 			for i := 0; i < b.N; i++ {
20 | 				w.Bits(off, 1, scaled)
21 | 				w.Bits(off, scaled/2, scaled)
22 | 				w.Bits(off, scaled-1, scaled)
23 | 			}
24 | 		})
25 | 	}
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/imagine/samples/README.md:
--------------------------------------------------------------------------------
 1 | I'd like to see something like each of the following 6 cases with a single in
 2 | column order import followed by random updates.
 3 | 
 4 | - high cardinality (10 million?), long tail of values with very few bits set.
 5 |   Some columns have only a few (or zero) bits set, some have thousands.
 6 |   in column order import followed by random updates.
 7 | 
 8 | - medium/low cardinality set field (hundreds). zipfian distribution among
 9 |   values. 95% of columns have 1 value. 4% have more, 1% have none. (roughish numbers)
10 | 
11 | - low cardinality mutex field (3), even distribution, 99% of columns have values.
12 | 
13 | - 16 bit int field 99% of columns have a value. zipfian distribution within the 16 bit range
14 | - 32 bit int field 99% of columns have a value. zipfian distribution within the 32 bit range
15 | - 64 bit int field 99% of columns have a value. simulated high precision
16 |   timestamp - each update increases slightly from the previous number.
17 | 


--------------------------------------------------------------------------------
/bench/query.go:
--------------------------------------------------------------------------------
 1 | package bench
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log"
 6 | 	"os"
 7 | 	"time"
 8 | 
 9 | 	"github.com/pilosa/go-pilosa"
10 | )
11 | 
12 | type QueryBenchmark struct {
13 | 	Name       string `json:"name"`
14 | 	Query      string `json:"query"`
15 | 	Index      string `json:"index"`
16 | 	Iterations int    `json:"iterations"`
17 | 
18 | 	Logger *log.Logger `json:"-"`
19 | }
20 | 
21 | func NewQueryBenchmark() *QueryBenchmark {
22 | 	return &QueryBenchmark{
23 | 		Name:   "query",
24 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
25 | 	}
26 | }
27 | 
28 | // Run runs the benchmark.
29 | func (b *QueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
30 | 	result := NewResult()
31 | 	result.AgentNum = agentNum
32 | 	result.Configuration = b
33 | 
34 | 	// Initialize schema.
35 | 	index, _, err := ensureSchema(client, b.Index, "")
36 | 	if err != nil {
37 | 		return result, err
38 | 	}
39 | 
40 | 	for n := 0; n < b.Iterations; n++ {
41 | 		start := time.Now()
42 | 		resp, err := client.Query(index.RawQuery(b.Query))
43 | 		result.Add(time.Since(start), resp)
44 | 		if err != nil {
45 | 			return result, err
46 | 		}
47 | 	}
48 | 	return result, nil
49 | }
50 | 


--------------------------------------------------------------------------------
/dx/query_test.go:
--------------------------------------------------------------------------------
 1 | package dx
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestGenerateRandomRows(t *testing.T) {
 8 | 	tests := []struct{ min, max, numRows int64 }{
 9 | 		{min: 4, max: 4, numRows: 3},
10 | 		{min: 3, max: 9, numRows: 2},
11 | 		{min: 5, max: 6, numRows: 4},
12 | 	}
13 | 	for _, f := range tests {
14 | 		rows, err := generateRandomRows(f.min, f.max, f.numRows)
15 | 		if err != nil {
16 | 			t.Fatalf("generating rows for min: %v, max: %v, err: %v", f.min, f.max, err)
17 | 		}
18 | 		if int64(len(rows)) != f.numRows {
19 | 			t.Fatalf("expected %v rows, got %v", f.numRows, rows)
20 | 		}
21 | 		for _, rowNum := range rows {
22 | 			if !(f.min <= rowNum && rowNum <= f.max) {
23 | 				t.Fatalf("row num %v is not in range [%v, %v]", rowNum, f.min, f.max)
24 | 			}
25 | 		}
26 | 	}
27 | }
28 | 
29 | func TestIndexSpec_RandomIndexField(t *testing.T) {
30 | 	fs := newFieldSpec()
31 | 	fs["field0"] = pair{min: 12, max: 13}
32 | 	is := newIndexSpec()
33 | 	is["index0"] = fs
34 | 
35 | 	indexName, fieldName, err := is.randomIndexField()
36 | 	if err != nil {
37 | 		t.Fatalf("unexpected error: %v", err)
38 | 	}
39 | 	if indexName != "index0" {
40 | 		t.Fatalf("expected index name: %v, got %v", "index0", indexName)
41 | 	}
42 | 	if fieldName != "field0" {
43 | 		t.Fatalf("expected field name: %v, got %v", "field0", fieldName)
44 | 	}
45 | }
46 | 


--------------------------------------------------------------------------------
/imagine/samples/bsi.toml:
--------------------------------------------------------------------------------
 1 | # int ingest/update for 16/32/63-bit values.
 2 | # things act weird if the actual range exceeds 2^63
 3 | densityscale = 2097152
 4 | version = "1.0"
 5 | [indexes.inttest]
 6 | columns = 1000000
 7 | fields = [
 8 | { name = "int16", type = "int", max = 65535, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 },
 9 | { name = "int32", type = "int", max = 4294967295, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 },
10 | { name = "int63", type = "int", max = 9223372036854775807, density = 0.99, valueRule = "zipf", zipfV = 3.0, zipfS = 1.1 },
11 | ]
12 | [[workloads]]
13 | name = "Int initial import"
14 | tasks = [{ index = "inttest", field = "int16", seed = 1 }]
15 | 
16 | # TODO: move to another workload
17 | #tasks = [{ index = "inttest", field = "int32", seed = 2 }]
18 | 
19 | # TODO: move to another workload
20 | #tasks = [{ index = "inttest", field = "int63", seed = 3 }]
21 | 
22 | [[workloads]]
23 | name = "Int updates"
24 | tasks = [{ index = "inttest", field = "int16", seed = 4, columns = 1000, columnOrder = "permute" }]
25 | 
26 | 
27 | # TODO: move to another workload
28 | #tasks = [{ index = "inttest", field = "int32", seed = 5, columns = 1000, columnOrder = "permute" }]
29 | 
30 | # TODO: move to another workload
31 | #tasks = [{ index = "inttest", field = "int63", seed = 6, columns = 1000, columnOrder = "permute" }]
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Pilosa Tools
 2 | ===========================================
 3 | 
 4 | This repo contains the `pi` tool which can:
 5 | 
 6 | - run a variety of predefined benchmarks (that can be configured in various way)
 7 | - run combinations of predefined benchmarks
 8 | - run benchmarks from multiple "agents" simultaneously
 9 | - store the results of complex combinations of benchmarks running on multiple agents locally.
10 | 
11 | The `pi` tool contains several subcommands which are described in more detail below. To get help for pi, or any subcommand of pi (or any subcommand of any subcommand of pi, etc.), just append `--help` at the command line: e.g. `pi --help` or `pi bench --help` or `pi bench import --help`.
12 | 
13 | 
14 | ## bench
15 | 
16 | The bench command has a set of subcommands, one for each available benchmark. All of them take a `--hosts` argument which specifies the Pilosa cluster, and a `--agent-num` argument. The agent num argument is mostly used by `pi spawn` and we discuss it in more detail in that section. 
17 | 
18 | Example:
19 | 
20 | ```
21 | pi bench import --hosts=one.example.com:10101,two.example.com:10101,three.example.com:10101 --iterations=100000 --max-column-id=10000 --max-row-id=1000
22 | ```
23 | 
24 | The above would import 100,000 random bits into the three node Pilosa cluster specified. All bits would have column ID between 0 and 10,000, and row ID between 0 and 1000.
25 | 
26 | 


--------------------------------------------------------------------------------
/cmd/pi/query.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | // NewQueryCommand subcommands
12 | func NewQueryCommand() *cobra.Command {
13 | 	b := bench.NewQueryBenchmark()
14 | 	cmd := &cobra.Command{
15 | 		Use:   "query",
16 | 		Short: "Runs the given PQL query against pilosa and records the results along with the duration.",
17 | 		Long: `Runs the given PQL query against pilosa and records the results along with the duration.
18 | Agent num has no effect`,
19 | 		RunE: func(cmd *cobra.Command, args []string) error {
20 | 			flags := cmd.Flags()
21 | 			b.Logger = NewLoggerFromFlags(flags)
22 | 			client, err := NewClientFromFlags(flags)
23 | 			if err != nil {
24 | 				return err
25 | 			}
26 | 			agentNum, err := flags.GetInt("agent-num")
27 | 			if err != nil {
28 | 				return err
29 | 			}
30 | 			result, err := b.Run(context.Background(), client, agentNum)
31 | 			if err != nil {
32 | 				result.Error = err.Error()
33 | 			}
34 | 			return PrintResults(cmd, result, os.Stdout)
35 | 		},
36 | 	}
37 | 
38 | 	flags := cmd.Flags()
39 | 	flags.IntVar(&b.Iterations, "iterations", 1, "Number of times to repeat the query.")
40 | 	flags.StringVar(&b.Query, "query", "Count(Row(fbench=1))", "PQL query to perform.")
41 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.")
42 | 
43 | 	return cmd
44 | }
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2017 Pilosa Corp.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 | 
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 | 


--------------------------------------------------------------------------------
/cmd/pi/slicewidth.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewSliceWidthCommand() *cobra.Command {
12 | 	b := bench.NewSliceWidthBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "slice-width",
15 | 		Short: "Imports a given density of data uniformly over a configurable number of slices.",
16 | 		Long:  `Imports a given density of data uniformly over a configurable number of slices based on bit density and slice count`,
17 | 		RunE: func(cmd *cobra.Command, args []string) error {
18 | 			flags := cmd.Flags()
19 | 			b.Logger = NewLoggerFromFlags(flags)
20 | 			client, err := NewClientFromFlags(flags)
21 | 			if err != nil {
22 | 				return err
23 | 			}
24 | 			agentNum, err := flags.GetInt("agent-num")
25 | 			if err != nil {
26 | 				return err
27 | 			}
28 | 			result, err := b.Run(context.Background(), client, agentNum)
29 | 			if err != nil {
30 | 				result.Error = err.Error()
31 | 			}
32 | 			return PrintResults(cmd, result, os.Stdout)
33 | 		},
34 | 	}
35 | 
36 | 	flags := cmd.Flags()
37 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.")
38 | 	flags.StringVar(&b.Field, "field", defaultField, "Field to import into.")
39 | 	flags.Float64Var(&b.BitDensity, "bit-density", 0.1, "data density.")
40 | 	flags.Int64Var(&b.SliceWidth, "slice-width", 1048576, "slice width, default to 2^20")
41 | 	flags.Int64Var(&b.SliceCount, "slice-count", 1, "slice count")
42 | 
43 | 	return cmd
44 | }
45 | 


--------------------------------------------------------------------------------
/cmd/pi/basic_query.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewBasicQueryCommand() *cobra.Command {
12 | 	b := bench.NewBasicQueryBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "basic-query",
15 | 		Short: "Runs the given PQL query against pilosa multiple times with different arguments.",
16 | 		Long: `Runs the given PQL query against pilosa multiple times with different arguments.
17 | 
18 | Agent num has no effect.`,
19 | 		RunE: func(cmd *cobra.Command, args []string) error {
20 | 			flags := cmd.Flags()
21 | 			b.Logger = NewLoggerFromFlags(flags)
22 | 			client, err := NewClientFromFlags(flags)
23 | 			if err != nil {
24 | 				return err
25 | 			}
26 | 			agentNum, err := flags.GetInt("agent-num")
27 | 			if err != nil {
28 | 				return err
29 | 			}
30 | 			result, err := b.Run(context.Background(), client, agentNum)
31 | 			if err != nil {
32 | 				result.Error = err.Error()
33 | 			}
34 | 			return PrintResults(cmd, result, os.Stdout)
35 | 		},
36 | 	}
37 | 
38 | 	flags := cmd.Flags()
39 | 	flags.IntVar(&b.Iterations, "iterations", 1, "Number of queries to make.")
40 | 	flags.IntVar(&b.NumArgs, "num-args", 2, "Number of rows to put in each query (i.e. number of rows to intersect)")
41 | 	flags.StringVar(&b.Query, "query", "Intersect", "query to perform (Intersect, Union, Difference, Xor)")
42 | 	flags.StringVar(&b.Field, "field", defaultField, "Field to query.")
43 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.")
44 | 
45 | 	return cmd
46 | }
47 | 


--------------------------------------------------------------------------------
/bench/doc.go:
--------------------------------------------------------------------------------
 1 | // bench contains benchmarks and common utilities useful to benchmarks
 2 | //
 3 | // In order to write new benchmarks, one must satisfy the Benchmark interface in
 4 | // bench.go. In order to use the benchmark from pi, it needs a new file under
 5 | // tools/cmd which defines a cobra.Cmd - look at an existing benchmark's file to
 6 | // see what needs to be done.
 7 | //
 8 | // When writing a new benchmark, there are a few things to keep in mind other
 9 | // than just implementing the interface:
10 | //
11 | // 1. The benchmark should modify its own configuration in its Init method based
12 | // on the agentNum it is given. How it modifies is specific to the benchmark,
13 | // but the idea is that it should make sense to call the benchmark with the same
14 | // configuration, but multiple different agent numbers, and it should do useful
15 | // work each time (i.e. not just setting the same bits, or running the same
16 | // queries).
17 | //
18 | // 2. The Init method should do everything that needs to be done to get the
19 | // benchmark to a runnable state - all code in run should be the stuff that we
20 | // actually want to time.
21 | //
22 | // 3. The Run method does not need to report the total runtime - that is collected
23 | // by calling code.
24 | //
25 | // Files:
26 | //
27 | // 1. client.go contains pilosa client code which is shared by many benchmarks
28 | //
29 | // 2. errgroup.go contains the ErrGroup implementation copied from golang.org/x/
30 | // so as not to pull in a bunch of useless deps.
31 | //
32 | // 3. stats.go contains useful code for gathering stats about a series of timed
33 | // operations.
34 | package bench
35 | 


--------------------------------------------------------------------------------
/bench/slicewidth.go:
--------------------------------------------------------------------------------
 1 | package bench
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log"
 6 | 	"os"
 7 | 
 8 | 	"github.com/pilosa/go-pilosa"
 9 | )
10 | 
11 | // SliceWidthBenchmark helps importing data based on slice-width and data density.
12 | // a single slice on query time.
13 | type SliceWidthBenchmark struct {
14 | 	Name       string  `json:"name"`
15 | 	Index      string  `json:"index"`
16 | 	Field      string  `json:"field"`
17 | 	BitDensity float64 `json:"bit-density"`
18 | 	SliceWidth int64   `json:"slice-width"`
19 | 	SliceCount int64   `json:"slice-count"`
20 | 
21 | 	Logger *log.Logger `json:"-"`
22 | }
23 | 
24 | // NewSliceWidthBenchmark creates slice width benchmark.
25 | func NewSliceWidthBenchmark() *SliceWidthBenchmark {
26 | 	return &SliceWidthBenchmark{
27 | 		Name:   "slice-width",
28 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
29 | 	}
30 | }
31 | 
32 | // Run runs the benchmark to import data.
33 | func (b *SliceWidthBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
34 | 	numColumns := b.SliceWidth * b.SliceCount
35 | 	numRows := int64(1000)
36 | 
37 | 	importBenchmark := NewImportBenchmark()
38 | 	importBenchmark.MaxRowID = numRows
39 | 	importBenchmark.MinColumnID = 0
40 | 	importBenchmark.MaxColumnID = numColumns
41 | 	importBenchmark.Iterations = int64(float64(numColumns)*b.BitDensity) * numRows
42 | 	importBenchmark.Index = b.Index
43 | 	importBenchmark.Field = b.Field
44 | 	importBenchmark.Distribution = "uniform"
45 | 	importBenchmark.BufferSize = 1000000
46 | 
47 | 	result, err := importBenchmark.Run(ctx, client, agentNum)
48 | 	result.Configuration = b
49 | 	return result, err
50 | }
51 | 


--------------------------------------------------------------------------------
/bench/diagonal.go:
--------------------------------------------------------------------------------
 1 | package bench
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log"
 6 | 	"os"
 7 | 	"time"
 8 | 
 9 | 	"github.com/pilosa/go-pilosa"
10 | )
11 | 
12 | // DiagonalSetBitsBenchmark sets bits with increasing column id and row id.
13 | type DiagonalSetBitsBenchmark struct {
14 | 	Name        string `json:"name"`
15 | 	MinRowID    int    `json:"min-row-id"`
16 | 	MinColumnID int    `json:"min-column-id"`
17 | 	Iterations  int    `json:"iterations"`
18 | 	Index       string `json:"index"`
19 | 	Field       string `json:"field"`
20 | 
21 | 	Logger *log.Logger `json:"-"`
22 | }
23 | 
24 | // NewDiagonalSetBitsBenchmark returns a new instance of DiagonalSetBitsBenchmark.
25 | func NewDiagonalSetBitsBenchmark() *DiagonalSetBitsBenchmark {
26 | 	return &DiagonalSetBitsBenchmark{
27 | 		Name:   "diagonal-set-bits",
28 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
29 | 	}
30 | }
31 | 
32 | // Run runs the benchmark.
33 | func (b *DiagonalSetBitsBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
34 | 	result := NewResult()
35 | 	result.AgentNum = agentNum
36 | 	result.Configuration = b
37 | 
38 | 	// Initialize schema.
39 | 	_, field, err := ensureSchema(client, b.Index, b.Field)
40 | 	if err != nil {
41 | 		return result, err
42 | 	}
43 | 
44 | 	minRowID := b.MinRowID + (agentNum * b.Iterations)
45 | 	minColumnID := b.MinColumnID + (agentNum * b.Iterations)
46 | 
47 | 	for n := 0; n < b.Iterations; n++ {
48 | 		start := time.Now()
49 | 		_, err := client.Query(field.Set(minRowID+n, minColumnID+n))
50 | 		result.Add(time.Since(start), nil)
51 | 		if err != nil {
52 | 			return result, err
53 | 		}
54 | 	}
55 | 	return result, nil
56 | }
57 | 


--------------------------------------------------------------------------------
/imagine/samples/students.toml:
--------------------------------------------------------------------------------
 1 | densityscale = 2097152
 2 | version = "1.0"
 3 | [indexes.students_ts]
 4 | columns = 10000
 5 | fields = [
 6 | { name = "gender", type = "mutex", max = 3, density = 0.9, valueRule = "linear", cache = "lru" },
 7 | { name = "school", type = "set", max = 400, density = 0.10, valueRule = "zipf" , cache = "lru" },
 8 | { name = "timestamp", type = "time", max=1, density = 1.0, valueRule="zipf", quantum="YMDH" },
 9 | #{ name = "client_mac", type="set", max=10000000, density=0.001, valueRule="zipf"},
10 | { name = "zone", type="set", max=300, density=0.1, valueRule="zipf", cache = "lru" },
11 | { name = "age", type="int", max=100, min=14, density=0.99, valueRule="zipf", zipfV = 3.0, zipfS = 1.1 },
12 | { name = "on_campus", type="mutex", max=2, density=0.9, valueRule="zipf", cache = "lru", zipfS = 1.1, zipfV = 3.0 },
13 | { name = "athlete", type="mutex", max=2, density=1.0, valueRule="linear", cache = "lru" },
14 | { name = "gpa", type="int", max=400, min=0, density=0.99, valueRule="zipf", zipfV = 3.0, zipfS = 1.1 },
15 | ]
16 | [[workloads]]
17 | name = "ingest"
18 | threadCount = 6
19 | batchSize = 1048576
20 | useRoaring = true
21 | tasks = [
22 |     { index = "students_ts", field = "gender"},
23 |     { index = "students_ts", field = "school"},
24 |     { index = "students_ts", field = "timestamp", stamp = "increasing", stampStart = "2019-01-01T00:00:00Z", stampRange = "240h" },
25 |     #{ index = "students_ts", field = "client_mac"},
26 |     { index = "students_ts", field = "zone"},
27 |     { index = "students_ts", field = "age", seed = 1},
28 |     { index = "students_ts", field = "on_campus"},
29 |     { index = "students_ts", field = "gpa", seed = 2},
30 | ]
31 | 


--------------------------------------------------------------------------------
/cmd/pi/diagonal.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewDiagonalSetBitsCommand() *cobra.Command {
12 | 	b := bench.NewDiagonalSetBitsBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "diagonal-set-bits",
15 | 		Short: "Sets bits with increasing column id and row id.",
16 | 		Long: `Sets bits with increasing column id and row id.
17 | 
18 | Agent num offsets both the min column id and min row id by the number of
19 | iterations, so that only bits on the main diagonal are set, and agents don't
20 | overlap at all.`,
21 | 		RunE: func(cmd *cobra.Command, args []string) error {
22 | 			flags := cmd.Flags()
23 | 			b.Logger = NewLoggerFromFlags(flags)
24 | 			client, err := NewClientFromFlags(flags)
25 | 			if err != nil {
26 | 				return err
27 | 			}
28 | 			agentNum, err := flags.GetInt("agent-num")
29 | 			if err != nil {
30 | 				return err
31 | 			}
32 | 			result, err := b.Run(context.Background(), client, agentNum)
33 | 			if err != nil {
34 | 				result.Error = err.Error()
35 | 			}
36 | 			return PrintResults(cmd, result, os.Stdout)
37 | 		},
38 | 	}
39 | 
40 | 	flags := cmd.Flags()
41 | 	flags.IntVar(&b.MinRowID, "min-row-id", 0, "Rows being set will all be greater than this.")
42 | 	flags.IntVar(&b.MinColumnID, "min-column-id", 0, "Columns being set will all be greater than this.")
43 | 	flags.IntVar(&b.Iterations, "iterations", 100, "Number of bits to set.")
44 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index in which to set bits.")
45 | 	flags.StringVar(&b.Field, "field", defaultField, "Pilosa field in which to set bits.")
46 | 
47 | 	return cmd
48 | }
49 | 


--------------------------------------------------------------------------------
/cmd/pi/range_query.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewRangeQueryCommand() *cobra.Command {
12 | 	b := bench.NewRangeQueryBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "range-query",
15 | 		Short: "Constructs and performs range queries.",
16 | 		Long: `Constructs and performs range queries.
17 | Agent num modifies random seed.`,
18 | 		RunE: func(cmd *cobra.Command, args []string) error {
19 | 			flags := cmd.Flags()
20 | 			b.Logger = NewLoggerFromFlags(flags)
21 | 			client, err := NewClientFromFlags(flags)
22 | 			if err != nil {
23 | 				return err
24 | 			}
25 | 			agentNum, err := flags.GetInt("agent-num")
26 | 			if err != nil {
27 | 				return err
28 | 			}
29 | 			result, err := b.Run(context.Background(), client, agentNum)
30 | 			if err != nil {
31 | 				result.Error = err.Error()
32 | 			}
33 | 			return PrintResults(cmd, result, os.Stdout)
34 | 		},
35 | 	}
36 | 
37 | 	flags := cmd.Flags()
38 | 	flags.IntVar(&b.MaxDepth, "max-depth", 2, "Maximum nesting of queries.")
39 | 	flags.IntVar(&b.MaxArgs, "max-args", 2, "Maximum number of arguments per query.")
40 | 	flags.Int64Var(&b.MinRange, "min-range", 0, "Minimum range to include in queries.")
41 | 	flags.Int64Var(&b.MaxRange, "max-range", 100, "Maximum range to include in queries.")
42 | 	flags.Int64Var(&b.Seed, "seed", 1, "random seed")
43 | 	flags.IntVar(&b.Iterations, "iterations", 100, "Number queries to perform.")
44 | 	flags.StringVar(&b.Field, "field", defaultField, "Field to query.")
45 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.")
46 | 	flags.StringVar(&b.QueryType, "type", "sum", "Query type for range, default to sum")
47 | 	return cmd
48 | }
49 | 


--------------------------------------------------------------------------------
/cmd/pi/random_query.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewRandomQueryCommand() *cobra.Command {
12 | 	b := bench.NewRandomQueryBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "random-query",
15 | 		Short: "Constructs and performs random queries.",
16 | 		Long: `Constructs and performs random queries.
17 | Agent num modifies random seed.`,
18 | 		RunE: func(cmd *cobra.Command, args []string) error {
19 | 			flags := cmd.Flags()
20 | 			b.Logger = NewLoggerFromFlags(flags)
21 | 			client, err := NewClientFromFlags(flags)
22 | 			if err != nil {
23 | 				return err
24 | 			}
25 | 			agentNum, err := flags.GetInt("agent-num")
26 | 			if err != nil {
27 | 				return err
28 | 			}
29 | 			result, err := b.Run(context.Background(), client, agentNum)
30 | 			if err != nil {
31 | 				result.Error = err.Error()
32 | 			}
33 | 			return PrintResults(cmd, result, os.Stdout)
34 | 		},
35 | 	}
36 | 
37 | 	flags := cmd.Flags()
38 | 	flags.IntVar(&b.MaxDepth, "max-depth", 4, "Maximum nesting of queries.")
39 | 	flags.IntVar(&b.MaxArgs, "max-args", 4, "Maximum number of arguments per query.")
40 | 	flags.IntVar(&b.MaxN, "max-n", 100, "Maximum value of N for TopN queries.")
41 | 	flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Minimum row id to include in queries.")
42 | 	flags.Int64Var(&b.MaxRowID, "max-row-id", 100000, "Maximum row id to include in queries.")
43 | 	flags.Int64Var(&b.Seed, "seed", 1, "random seed")
44 | 	flags.IntVar(&b.Iterations, "iterations", 100, "Number queries to perform.")
45 | 	flags.StringVar(&b.Field, "field", defaultField, "Field to query.")
46 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.")
47 | 
48 | 	return cmd
49 | }
50 | 


--------------------------------------------------------------------------------
/bench/range_query.go:
--------------------------------------------------------------------------------
 1 | package bench
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log"
 6 | 	"os"
 7 | 	"time"
 8 | 
 9 | 	"github.com/pilosa/go-pilosa"
10 | )
11 | 
12 | // RangeQueryBenchmark runs Range query randomly.
13 | type RangeQueryBenchmark struct {
14 | 	Name       string `json:"name"`
15 | 	MaxDepth   int    `json:"max-depth"`
16 | 	MaxArgs    int    `json:"max-args"`
17 | 	MaxN       int    `json:"max-n"`
18 | 	MinRange   int64  `json:"min-range"`
19 | 	MaxRange   int64  `json:"max-range"`
20 | 	Iterations int    `json:"iterations"`
21 | 	Seed       int64  `json:"seed"`
22 | 	Frame      string `json:"frame"`
23 | 	Index      string `json:"index"`
24 | 	Field      string `json:"field"`
25 | 	QueryType  string `json:"type"`
26 | 
27 | 	Logger *log.Logger `json:"-"`
28 | }
29 | 
30 | // NewRangeQueryBenchmark returns a new instance of RangeQueryBenchmark.
31 | func NewRangeQueryBenchmark() *RangeQueryBenchmark {
32 | 	return &RangeQueryBenchmark{
33 | 		Name:   "range-query",
34 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
35 | 	}
36 | }
37 | 
38 | // Run runs the benchmark.
39 | func (b *RangeQueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
40 | 	result := NewResult()
41 | 	result.AgentNum = agentNum
42 | 	result.Configuration = b
43 | 
44 | 	// Initialize schema.
45 | 	index, field, err := ensureSchema(client, b.Index, b.Field)
46 | 	if err != nil {
47 | 		return result, err
48 | 	}
49 | 
50 | 	g := NewQueryGenerator(index, field, b.Seed)
51 | 	for n := 0; n < b.Iterations; n++ {
52 | 		start := time.Now()
53 | 		_, err := client.Query(g.RandomRangeQuery(b.MaxDepth, b.MaxArgs, uint64(b.MinRange), uint64(b.MaxRange)))
54 | 		result.Add(time.Since(start), nil)
55 | 		if err != nil {
56 | 			return result, err
57 | 		}
58 | 	}
59 | 	return result, nil
60 | }
61 | 


--------------------------------------------------------------------------------
/bench/random_query.go:
--------------------------------------------------------------------------------
 1 | package bench
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log"
 6 | 	"os"
 7 | 	"time"
 8 | 
 9 | 	"github.com/pilosa/go-pilosa"
10 | )
11 | 
12 | var _ Benchmark = (*BasicQueryBenchmark)(nil)
13 | 
14 | // RandomQueryBenchmark queries randomly and deterministically based on a seed.
15 | type RandomQueryBenchmark struct {
16 | 	Name       string `json:"name"`
17 | 	MaxDepth   int    `json:"max-depth"`
18 | 	MaxArgs    int    `json:"max-args"`
19 | 	MaxN       int    `json:"max-n"`
20 | 	MinRowID   int64  `json:"min-row-id"`
21 | 	MaxRowID   int64  `json:"max-row-id"`
22 | 	Iterations int    `json:"iterations"`
23 | 	Seed       int64  `json:"seed"`
24 | 	Index      string `json:"index"`
25 | 	Field      string `json:"field"`
26 | 
27 | 	Logger *log.Logger `json:"-"`
28 | }
29 | 
30 | // NewRandomQueryBenchmark returns a new instance of RandomQueryBenchmark.
31 | func NewRandomQueryBenchmark() *RandomQueryBenchmark {
32 | 	return &RandomQueryBenchmark{
33 | 		Name:   "random-query",
34 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
35 | 	}
36 | }
37 | 
38 | // Run runs the RandomQuery benchmark
39 | func (b *RandomQueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
40 | 	result := NewResult()
41 | 	result.AgentNum = agentNum
42 | 	result.Configuration = b
43 | 
44 | 	// Initialize schema.
45 | 	index, field, err := ensureSchema(client, b.Index, b.Field)
46 | 	if err != nil {
47 | 		return result, err
48 | 	}
49 | 
50 | 	g := NewQueryGenerator(index, field, b.Seed+int64(agentNum))
51 | 	for n := 0; n < b.Iterations; n++ {
52 | 		start := time.Now()
53 | 		_, err := client.Query(g.Random(b.MaxN, b.MaxDepth, b.MaxArgs, uint64(b.MinRowID), uint64(b.MaxRowID-b.MinRowID)))
54 | 		result.Add(time.Since(start), nil)
55 | 		if err != nil {
56 | 			return result, err
57 | 		}
58 | 	}
59 | 	return result, nil
60 | }
61 | 


--------------------------------------------------------------------------------
/cmd/pi/replay.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"time"
 7 | 
 8 | 	"github.com/jaffee/commandeer/cobrafy"
 9 | 	"github.com/pilosa/go-pilosa"
10 | 	"github.com/pkg/errors"
11 | 	"github.com/spf13/cobra"
12 | )
13 | 
14 | func NewReplayCommand() *cobra.Command {
15 | 	com, err := cobrafy.Command(newReplayCommand())
16 | 	if err != nil {
17 | 		panic(fmt.Sprintf("Couldn't create cobra command: %v", err))
18 | 	}
19 | 	com.Use = "replay"
20 | 	com.Short = "Replay recorded Pilosa imports."
21 | 	com.Long = `Replay recorded Pilosa imports.
22 | 
23 | The go-pilosa client contains an option which allows it to record all
24 | imports it runs to a file (or other io.Writer). This tool takes a
25 | Pilosa cluster and a filename containing such recorded data and
26 | imports the data into that cluster. The cluster must already have the 
27 | schema set up to support the recorded import data.
28 | `
29 | 
30 | 	return com
31 | }
32 | 
33 | func newReplayCommand() *ReplayCommand {
34 | 	return &ReplayCommand{
35 | 		File:        "replay.gopilosa",
36 | 		Hosts:       []string{"localhost:10101"},
37 | 		Concurrency: 8,
38 | 	}
39 | }
40 | 
41 | type ReplayCommand struct {
42 | 	File        string   `help:"File to read from."`
43 | 	Hosts       []string `help:"Pilosa hosts (comma separated)."`
44 | 	Concurrency int      `help:"Number of goroutines importing data." short:"n"`
45 | }
46 | 
47 | func (r *ReplayCommand) Run() error {
48 | 	client, err := pilosa.NewClient(r.Hosts)
49 | 	if err != nil {
50 | 		return errors.Wrap(err, "creating Pilosa client")
51 | 	}
52 | 	f, err := os.Open(r.File)
53 | 	if err != nil {
54 | 		return errors.Wrap(err, "opening replay file")
55 | 	}
56 | 
57 | 	start := time.Now()
58 | 	err = client.ExperimentalReplayImport(f, r.Concurrency)
59 | 	if err != nil {
60 | 		return errors.Wrap(err, "")
61 | 	}
62 | 	fmt.Printf("Done: %v", time.Since(start))
63 | 	return nil
64 | }
65 | 


--------------------------------------------------------------------------------
/apophenia/int128_test.go:
--------------------------------------------------------------------------------
 1 | package apophenia
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func Test_Int128Rotate(t *testing.T) {
 8 | 	cases := []struct {
 9 | 		in         Uint128
10 | 		bits       uint64
11 | 		outL, outR Uint128
12 | 	}{
13 | 		{in: Uint128{Lo: 0x1}, bits: 1, outR: Uint128{Lo: 0x0, Hi: 1 << 63}, outL: Uint128{Lo: 0x2, Hi: 0}},
14 | 		{in: Uint128{Lo: 0x11}, bits: 4, outR: Uint128{Lo: 1, Hi: 1 << 60}, outL: Uint128{Lo: 0x110, Hi: 0}},
15 | 		{in: Uint128{Lo: 0x11}, bits: 65, outR: Uint128{Lo: 1 << 63, Hi: 8}, outL: Uint128{Lo: 0, Hi: 0x22}},
16 | 	}
17 | 	for _, c := range cases {
18 | 		u := c.in
19 | 		u.RotateRight(c.bits)
20 | 		if u != c.outR {
21 | 			t.Fatalf("rotate %s right by %d: expected %s, got %s",
22 | 				c.in, c.bits, c.outR, u)
23 | 		}
24 | 		u = c.in
25 | 		u.RotateLeft(c.bits)
26 | 		if u != c.outL {
27 | 			t.Fatalf("rotate %s left by %d: expected %s, got %s",
28 | 				c.in, c.bits, c.outL, u)
29 | 		}
30 | 	}
31 | }
32 | 
33 | func Test_Int128Shift(t *testing.T) {
34 | 	cases := []struct {
35 | 		in         Uint128
36 | 		bits       uint64
37 | 		outL, outR Uint128
38 | 	}{
39 | 		{in: Uint128{Lo: 0x1}, bits: 1, outR: Uint128{Lo: 0x0, Hi: 0}, outL: Uint128{Lo: 0x2, Hi: 0}},
40 | 		{in: Uint128{Lo: 0x11}, bits: 4, outR: Uint128{Lo: 1, Hi: 0}, outL: Uint128{Lo: 0x110, Hi: 0}},
41 | 		{in: Uint128{Lo: 0x11, Hi: 0x3}, bits: 65, outR: Uint128{Lo: 1, Hi: 0}, outL: Uint128{Lo: 0, Hi: 0x22}},
42 | 		{in: Uint128{Lo: 0, Hi: 0x11}, bits: 68, outR: Uint128{Lo: 1, Hi: 0}, outL: Uint128{Lo: 0, Hi: 0}},
43 | 	}
44 | 	for _, c := range cases {
45 | 		u := c.in
46 | 		u.ShiftRight(c.bits)
47 | 		if u != c.outR {
48 | 			t.Fatalf("shift %s right by %d: expected %s, got %s",
49 | 				c.in, c.bits, c.outR, u)
50 | 		}
51 | 		u = c.in
52 | 		u.ShiftLeft(c.bits)
53 | 		if u != c.outL {
54 | 			t.Fatalf("shift %s left by %d: expected %s, got %s",
55 | 				c.in, c.bits, c.outL, u)
56 | 		}
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/bench/bench_test.go:
--------------------------------------------------------------------------------
 1 | package bench_test
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/json"
 6 | 	"fmt"
 7 | 	"os"
 8 | 	"testing"
 9 | 	"time"
10 | 
11 | 	"github.com/pilosa/tools/bench"
12 | )
13 | 
14 | func prettyEncode(data map[string]interface{}) string {
15 | 	pretty := bench.Prettify(data)
16 | 	jsonString := new(bytes.Buffer)
17 | 	enc := json.NewEncoder(jsonString)
18 | 	enc.SetIndent("", "  ")
19 | 	err := enc.Encode(pretty)
20 | 	if err != nil {
21 | 		fmt.Fprintln(os.Stderr, err)
22 | 	}
23 | 
24 | 	return jsonString.String()
25 | }
26 | 
27 | func TestPrettifyString(t *testing.T) {
28 | 	res := make(map[string]interface{}, 1)
29 | 	res["0"] = "foobar"
30 | 	pretty := prettyEncode(res)
31 | 
32 | 	expected := `
33 | {
34 |   "0": "foobar"
35 | }
36 | `[1:]
37 | 
38 | 	if pretty != expected {
39 | 		t.Fatalf("Pretty string doesn't match")
40 | 	}
41 | }
42 | 
43 | func TestPrettifyInt(t *testing.T) {
44 | 	res := make(map[string]interface{}, 1)
45 | 	res["0"] = 234567
46 | 	pretty := prettyEncode(res)
47 | 
48 | 	expected := `
49 | {
50 |   "0": 234567
51 | }
52 | `[1:]
53 | 
54 | 	if pretty != expected {
55 | 		t.Fatalf("Pretty int doesn't match")
56 | 	}
57 | }
58 | 
59 | func TestPrettifyDuration(t *testing.T) {
60 | 	res := make(map[string]interface{}, 1)
61 | 	res["0"] = time.Duration(234567)
62 | 	pretty := prettyEncode(res)
63 | 
64 | 	expected := `
65 | {
66 |   "0": "234.567µs"
67 | }
68 | `[1:]
69 | 
70 | 	if pretty != expected {
71 | 		t.Fatalf("Pretty duration doesn't match")
72 | 	}
73 | }
74 | 
75 | func TestPrettifyDurationSlice(t *testing.T) {
76 | 	res := make(map[string]interface{}, 1)
77 | 	res["0"] = []time.Duration{123, 234567, 34567890}
78 | 	pretty := prettyEncode(res)
79 | 
80 | 	expected := `
81 | {
82 |   "0": [
83 |     "123ns",
84 |     "234.567µs",
85 |     "34.56789ms"
86 |   ]
87 | }
88 | `[1:]
89 | 
90 | 	if pretty != expected {
91 | 		t.Fatalf("Pretty duration slice doesn't match")
92 | 	}
93 | }
94 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/pilosa/tools
 2 | 
 3 | go 1.12
 4 | 
 5 | require (
 6 | 	cloud.google.com/go v0.43.0 // indirect
 7 | 	github.com/BurntSushi/toml v0.3.1
 8 | 	github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect
 9 | 	github.com/go-kit/kit v0.9.0 // indirect
10 | 	github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 // indirect
11 | 	github.com/gorilla/handlers v1.4.1 // indirect
12 | 	github.com/gorilla/mux v1.7.3 // indirect
13 | 	github.com/grpc-ecosystem/grpc-gateway v1.9.4 // indirect
14 | 	github.com/jaffee/commandeer v0.1.0
15 | 	github.com/kr/pty v1.1.8 // indirect
16 | 	github.com/miekg/dns v1.1.15 // indirect
17 | 	github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
18 | 	github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6
19 | 	github.com/pilosa/pilosa v1.3.1
20 | 	github.com/pkg/errors v0.8.1
21 | 	github.com/prometheus/procfs v0.0.3 // indirect
22 | 	github.com/rogpeppe/fastuuid v1.2.0 // indirect
23 | 	github.com/spf13/cobra v0.0.5
24 | 	github.com/spf13/pflag v1.0.3
25 | 	github.com/spf13/viper v1.4.0
26 | 	github.com/ugorji/go v1.1.7 // indirect
27 | 	golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 // indirect
28 | 	golang.org/x/exp v0.0.0-20190718202018-cfdd5522f6f6 // indirect
29 | 	golang.org/x/image v0.0.0-20190703141733-d6a02ce849c9 // indirect
30 | 	golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028 // indirect
31 | 	golang.org/x/net v0.0.0-20190628185345-da137c7871d7 // indirect
32 | 	golang.org/x/sync v0.0.0-20190423024810-112230192c58
33 | 	golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 // indirect
34 | 	golang.org/x/tools v0.0.0-20190719005602-e377ae9d6386 // indirect
35 | 	google.golang.org/grpc v1.22.0 // indirect
36 | )
37 | 
38 | replace github.com/pilosa/pilosa => github.com/pilosa/pilosa v1.2.1-0.20190715194839-bd00f1bfe2b2
39 | 
40 | replace github.com/pilosa/go-pilosa => github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6
41 | 


--------------------------------------------------------------------------------
/imagine/enums_stamptype.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=stampType -trimprefix=stampType -text -transform=kebab -output enums_stamptype.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _stampTypeName = "noneincreasingrandom"
11 | 
12 | var _stampTypeIndex = [...]uint8{0, 4, 14, 20}
13 | 
14 | func (i stampType) String() string {
15 | 	if i < 0 || i >= stampType(len(_stampTypeIndex)-1) {
16 | 		return fmt.Sprintf("stampType(%d)", i)
17 | 	}
18 | 	return _stampTypeName[_stampTypeIndex[i]:_stampTypeIndex[i+1]]
19 | }
20 | 
21 | var _stampTypeValues = []stampType{0, 1, 2}
22 | 
23 | var _stampTypeNameToValueMap = map[string]stampType{
24 | 	_stampTypeName[0:4]:   0,
25 | 	_stampTypeName[4:14]:  1,
26 | 	_stampTypeName[14:20]: 2,
27 | }
28 | 
29 | // stampTypeString retrieves an enum value from the enum constants string name.
30 | // Throws an error if the param is not part of the enum.
31 | func stampTypeString(s string) (stampType, error) {
32 | 	if val, ok := _stampTypeNameToValueMap[s]; ok {
33 | 		return val, nil
34 | 	}
35 | 	return 0, fmt.Errorf("%s does not belong to stampType values", s)
36 | }
37 | 
38 | // stampTypeValues returns all values of the enum
39 | func stampTypeValues() []stampType {
40 | 	return _stampTypeValues
41 | }
42 | 
43 | // IsAstampType returns "true" if the value is listed in the enum definition. "false" otherwise
44 | func (i stampType) IsAstampType() bool {
45 | 	for _, v := range _stampTypeValues {
46 | 		if i == v {
47 | 			return true
48 | 		}
49 | 	}
50 | 	return false
51 | }
52 | 
53 | // MarshalText implements the encoding.TextMarshaler interface for stampType
54 | func (i stampType) MarshalText() ([]byte, error) {
55 | 	return []byte(i.String()), nil
56 | }
57 | 
58 | // UnmarshalText implements the encoding.TextUnmarshaler interface for stampType
59 | func (i *stampType) UnmarshalText(text []byte) error {
60 | 	var err error
61 | 	*i, err = stampTypeString(string(text))
62 | 	return err
63 | }
64 | 


--------------------------------------------------------------------------------
/imagine/enums_densitytype.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=densityType -trimprefix=densityType -text -transform=kebab -output enums_densitytype.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _densityTypeName = "linearzipf"
11 | 
12 | var _densityTypeIndex = [...]uint8{0, 6, 10}
13 | 
14 | func (i densityType) String() string {
15 | 	if i < 0 || i >= densityType(len(_densityTypeIndex)-1) {
16 | 		return fmt.Sprintf("densityType(%d)", i)
17 | 	}
18 | 	return _densityTypeName[_densityTypeIndex[i]:_densityTypeIndex[i+1]]
19 | }
20 | 
21 | var _densityTypeValues = []densityType{0, 1}
22 | 
23 | var _densityTypeNameToValueMap = map[string]densityType{
24 | 	_densityTypeName[0:6]:  0,
25 | 	_densityTypeName[6:10]: 1,
26 | }
27 | 
28 | // densityTypeString retrieves an enum value from the enum constants string name.
29 | // Throws an error if the param is not part of the enum.
30 | func densityTypeString(s string) (densityType, error) {
31 | 	if val, ok := _densityTypeNameToValueMap[s]; ok {
32 | 		return val, nil
33 | 	}
34 | 	return 0, fmt.Errorf("%s does not belong to densityType values", s)
35 | }
36 | 
37 | // densityTypeValues returns all values of the enum
38 | func densityTypeValues() []densityType {
39 | 	return _densityTypeValues
40 | }
41 | 
42 | // IsAdensityType returns "true" if the value is listed in the enum definition. "false" otherwise
43 | func (i densityType) IsAdensityType() bool {
44 | 	for _, v := range _densityTypeValues {
45 | 		if i == v {
46 | 			return true
47 | 		}
48 | 	}
49 | 	return false
50 | }
51 | 
52 | // MarshalText implements the encoding.TextMarshaler interface for densityType
53 | func (i densityType) MarshalText() ([]byte, error) {
54 | 	return []byte(i.String()), nil
55 | }
56 | 
57 | // UnmarshalText implements the encoding.TextUnmarshaler interface for densityType
58 | func (i *densityType) UnmarshalText(text []byte) error {
59 | 	var err error
60 | 	*i, err = densityTypeString(string(text))
61 | 	return err
62 | }
63 | 


--------------------------------------------------------------------------------
/imagine/enums_cachetype.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=cacheType -trimprefix=cacheType -text -transform=kebab -output enums_cachetype.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _cacheTypeName = "defaultnonelruranked"
11 | 
12 | var _cacheTypeIndex = [...]uint8{0, 7, 11, 14, 20}
13 | 
14 | func (i cacheType) String() string {
15 | 	if i < 0 || i >= cacheType(len(_cacheTypeIndex)-1) {
16 | 		return fmt.Sprintf("cacheType(%d)", i)
17 | 	}
18 | 	return _cacheTypeName[_cacheTypeIndex[i]:_cacheTypeIndex[i+1]]
19 | }
20 | 
21 | var _cacheTypeValues = []cacheType{0, 1, 2, 3}
22 | 
23 | var _cacheTypeNameToValueMap = map[string]cacheType{
24 | 	_cacheTypeName[0:7]:   0,
25 | 	_cacheTypeName[7:11]:  1,
26 | 	_cacheTypeName[11:14]: 2,
27 | 	_cacheTypeName[14:20]: 3,
28 | }
29 | 
30 | // cacheTypeString retrieves an enum value from the enum constants string name.
31 | // Throws an error if the param is not part of the enum.
32 | func cacheTypeString(s string) (cacheType, error) {
33 | 	if val, ok := _cacheTypeNameToValueMap[s]; ok {
34 | 		return val, nil
35 | 	}
36 | 	return 0, fmt.Errorf("%s does not belong to cacheType values", s)
37 | }
38 | 
39 | // cacheTypeValues returns all values of the enum
40 | func cacheTypeValues() []cacheType {
41 | 	return _cacheTypeValues
42 | }
43 | 
44 | // IsAcacheType returns "true" if the value is listed in the enum definition. "false" otherwise
45 | func (i cacheType) IsAcacheType() bool {
46 | 	for _, v := range _cacheTypeValues {
47 | 		if i == v {
48 | 			return true
49 | 		}
50 | 	}
51 | 	return false
52 | }
53 | 
54 | // MarshalText implements the encoding.TextMarshaler interface for cacheType
55 | func (i cacheType) MarshalText() ([]byte, error) {
56 | 	return []byte(i.String()), nil
57 | }
58 | 
59 | // UnmarshalText implements the encoding.TextUnmarshaler interface for cacheType
60 | func (i *cacheType) UnmarshalText(text []byte) error {
61 | 	var err error
62 | 	*i, err = cacheTypeString(string(text))
63 | 	return err
64 | }
65 | 


--------------------------------------------------------------------------------
/cmd/pi/tps.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/jaffee/commandeer/cobrafy"
 8 | 	"github.com/pilosa/tools/bench"
 9 | 	"github.com/spf13/cobra"
10 | )
11 | 
12 | // NewQueryCommand subcommands
13 | func NewTPSCommand() *cobra.Command {
14 | 	b := bench.NewTPSBenchmark()
15 | 	com, err := cobrafy.Command(b)
16 | 	if err != nil {
17 | 		panic(err)
18 | 	}
19 | 	com.Use = b.Name
20 | 	com.Short = "Run TPS benchmark."
21 | 	com.Long = `Run TPS benchmark.
22 | 
23 | This benchmark spawns <concurrency> goroutines, each of which queries
24 | Pilosa <iterations> times serially. The idea is to get an
25 | understanding of what kind of query throughput various Pilosa
26 | configurations can handle.
27 | 
28 | For this to be useful, you must already have an index in Pilosa with
29 | at least 1 field which has some data in it. I recommend the "imagine"
30 | tool (in this repository) for generating fake data with semi-realistic
31 | characteristics.
32 | 
33 | For each query, TPS chooses randomly from the enabled query types
34 | (intersect, union, difference, xor), and then chooses two random
35 | fields, and two random rows within each field to perform the given
36 | operation on. It wraps each query in a Count() to make the result size
37 | consistent.
38 | 
39 | Currently, row IDs are always chosen randomly between min and max. If
40 | no index is given, one is chosen at random, and if no fields are
41 | given, all the fields in the index are used.
42 | 
43 | `
44 | 
45 | 	com.RunE = func(cmd *cobra.Command, args []string) error {
46 | 		flags := cmd.Flags()
47 | 		b.Logger = NewLoggerFromFlags(flags)
48 | 		client, err := NewClientFromFlags(flags)
49 | 		if err != nil {
50 | 			return err
51 | 		}
52 | 		agentNum, err := flags.GetInt("agent-num")
53 | 		if err != nil {
54 | 			return err
55 | 		}
56 | 		result, err := b.Run(context.Background(), client, agentNum)
57 | 		if err != nil {
58 | 			result.Error = err.Error()
59 | 		}
60 | 		return PrintResults(cmd, result, os.Stdout)
61 | 	}
62 | 	return com
63 | }
64 | 


--------------------------------------------------------------------------------
/cmd/pi/import.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewImportCommand() *cobra.Command {
12 | 	b := bench.NewImportBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "import",
15 | 		Short: "Import random data into Pilosa quickly.",
16 | 		Long:  `import generates random data which can be controlled by command line flags and streams it into Pilosa's /import endpoint. Agent num has no effect`,
17 | 		RunE: func(cmd *cobra.Command, args []string) error {
18 | 			flags := cmd.Flags()
19 | 			b.Logger = NewLoggerFromFlags(flags)
20 | 			client, err := NewClientFromFlags(flags)
21 | 			if err != nil {
22 | 				return err
23 | 			}
24 | 			agentNum, err := flags.GetInt("agent-num")
25 | 			if err != nil {
26 | 				return err
27 | 			}
28 | 			result, err := b.Run(context.Background(), client, agentNum)
29 | 			if err != nil {
30 | 				result.Error = err.Error()
31 | 			}
32 | 			return PrintResults(cmd, result, os.Stdout)
33 | 		},
34 | 	}
35 | 
36 | 	flags := cmd.Flags()
37 | 	flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Minimum row id of set bits.")
38 | 	flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Minimum column id of set bits.")
39 | 	flags.Int64Var(&b.MaxRowID, "max-row-id", 1000, "Maximum row id of set bits.")
40 | 	flags.Int64Var(&b.MaxColumnID, "max-column-id", 1000, "Maximum column id of set bits.")
41 | 	flags.Int64Var(&b.Iterations, "iterations", 100000, "Number of bits to set")
42 | 	flags.Int64Var(&b.Seed, "seed", 0, "Random seed.")
43 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index in which to set bits.")
44 | 	flags.StringVar(&b.Field, "field", defaultField, "Pilosa field in which to set bits.")
45 | 	flags.StringVar(&b.Distribution, "distribution", "exponential", "Random distribution for deltas between set bits (exponential or uniform).")
46 | 	flags.IntVar(&b.BufferSize, "buffer-size", 10000000, "Number of set bits to buffer in importer before POSTing to Pilosa.")
47 | 
48 | 	return cmd
49 | }
50 | 


--------------------------------------------------------------------------------
/imagine/enums_fieldtype.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=fieldType -trimprefix=fieldType -transform=kebab -text -output enums_fieldtype.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _fieldTypeName = "undefintsetmutextime"
11 | 
12 | var _fieldTypeIndex = [...]uint8{0, 5, 8, 11, 16, 20}
13 | 
14 | func (i fieldType) String() string {
15 | 	if i < 0 || i >= fieldType(len(_fieldTypeIndex)-1) {
16 | 		return fmt.Sprintf("fieldType(%d)", i)
17 | 	}
18 | 	return _fieldTypeName[_fieldTypeIndex[i]:_fieldTypeIndex[i+1]]
19 | }
20 | 
21 | var _fieldTypeValues = []fieldType{0, 1, 2, 3, 4}
22 | 
23 | var _fieldTypeNameToValueMap = map[string]fieldType{
24 | 	_fieldTypeName[0:5]:   0,
25 | 	_fieldTypeName[5:8]:   1,
26 | 	_fieldTypeName[8:11]:  2,
27 | 	_fieldTypeName[11:16]: 3,
28 | 	_fieldTypeName[16:20]: 4,
29 | }
30 | 
31 | // fieldTypeString retrieves an enum value from the enum constants string name.
32 | // Throws an error if the param is not part of the enum.
33 | func fieldTypeString(s string) (fieldType, error) {
34 | 	if val, ok := _fieldTypeNameToValueMap[s]; ok {
35 | 		return val, nil
36 | 	}
37 | 	return 0, fmt.Errorf("%s does not belong to fieldType values", s)
38 | }
39 | 
40 | // fieldTypeValues returns all values of the enum
41 | func fieldTypeValues() []fieldType {
42 | 	return _fieldTypeValues
43 | }
44 | 
45 | // IsAfieldType returns "true" if the value is listed in the enum definition. "false" otherwise
46 | func (i fieldType) IsAfieldType() bool {
47 | 	for _, v := range _fieldTypeValues {
48 | 		if i == v {
49 | 			return true
50 | 		}
51 | 	}
52 | 	return false
53 | }
54 | 
55 | // MarshalText implements the encoding.TextMarshaler interface for fieldType
56 | func (i fieldType) MarshalText() ([]byte, error) {
57 | 	return []byte(i.String()), nil
58 | }
59 | 
60 | // UnmarshalText implements the encoding.TextUnmarshaler interface for fieldType
61 | func (i *fieldType) UnmarshalText(text []byte) error {
62 | 	var err error
63 | 	*i, err = fieldTypeString(string(text))
64 | 	return err
65 | }
66 | 


--------------------------------------------------------------------------------
/cmd/pi/import_range.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewImportRangeCommand() *cobra.Command {
12 | 	b := bench.NewImportRangeBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "import-range",
15 | 		Short: "Import random field data into Pilosa.",
16 | 		Long:  `import-range generates random data which can be controlled by command line flags and streams it into Pilosa's /import endpoint. Agent num has no effect`,
17 | 		RunE: func(cmd *cobra.Command, args []string) error {
18 | 			flags := cmd.Flags()
19 | 			b.Logger = NewLoggerFromFlags(flags)
20 | 			client, err := NewClientFromFlags(flags)
21 | 			if err != nil {
22 | 				return err
23 | 			}
24 | 			agentNum, err := flags.GetInt("agent-num")
25 | 			if err != nil {
26 | 				return err
27 | 			}
28 | 			result, err := b.Run(context.Background(), client, agentNum)
29 | 			if err != nil {
30 | 				result.Error = err.Error()
31 | 			}
32 | 			return PrintResults(cmd, result, os.Stdout)
33 | 		},
34 | 	}
35 | 
36 | 	flags := cmd.Flags()
37 | 	flags.Int64Var(&b.MinValue, "min-value", 0, "Minimum row id of set bits.")
38 | 	flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Minimum column id of set bits.")
39 | 	flags.Int64Var(&b.MaxValue, "max-value", 1000, "Maximum row id of set bits.")
40 | 	flags.Int64Var(&b.MaxColumnID, "max-column-id", 1000, "Maximum column id of set bits.")
41 | 	flags.Int64Var(&b.Iterations, "iterations", 1000, "Number of bits to set")
42 | 	flags.Int64Var(&b.Seed, "seed", 0, "Random seed.")
43 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index in which to set bits.")
44 | 	flags.StringVar(&b.Field, "field", defaultField, "Pilosa field in which to set bits.")
45 | 	flags.StringVar(&b.Distribution, "distribution", "uniform", "Random distribution for deltas between set bits (exponential or uniform).")
46 | 	flags.IntVar(&b.BufferSize, "buffer-size", 10000000, "Number of set bits to buffer in importer before POSTing to Pilosa.")
47 | 
48 | 	return cmd
49 | }
50 | 


--------------------------------------------------------------------------------
/imagine/enums_valueorder.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=valueOrder -trimprefix=valueOrder -text -transform=kebab -output enums_valueorder.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _valueOrderName = "linearstridepermutezipf"
11 | 
12 | var _valueOrderIndex = [...]uint8{0, 6, 12, 19, 23}
13 | 
14 | func (i valueOrder) String() string {
15 | 	if i < 0 || i >= valueOrder(len(_valueOrderIndex)-1) {
16 | 		return fmt.Sprintf("valueOrder(%d)", i)
17 | 	}
18 | 	return _valueOrderName[_valueOrderIndex[i]:_valueOrderIndex[i+1]]
19 | }
20 | 
21 | var _valueOrderValues = []valueOrder{0, 1, 2, 3}
22 | 
23 | var _valueOrderNameToValueMap = map[string]valueOrder{
24 | 	_valueOrderName[0:6]:   0,
25 | 	_valueOrderName[6:12]:  1,
26 | 	_valueOrderName[12:19]: 2,
27 | 	_valueOrderName[19:23]: 3,
28 | }
29 | 
30 | // valueOrderString retrieves an enum value from the enum constants string name.
31 | // Throws an error if the param is not part of the enum.
32 | func valueOrderString(s string) (valueOrder, error) {
33 | 	if val, ok := _valueOrderNameToValueMap[s]; ok {
34 | 		return val, nil
35 | 	}
36 | 	return 0, fmt.Errorf("%s does not belong to valueOrder values", s)
37 | }
38 | 
39 | // valueOrderValues returns all values of the enum
40 | func valueOrderValues() []valueOrder {
41 | 	return _valueOrderValues
42 | }
43 | 
44 | // IsAvalueOrder returns "true" if the value is listed in the enum definition. "false" otherwise
45 | func (i valueOrder) IsAvalueOrder() bool {
46 | 	for _, v := range _valueOrderValues {
47 | 		if i == v {
48 | 			return true
49 | 		}
50 | 	}
51 | 	return false
52 | }
53 | 
54 | // MarshalText implements the encoding.TextMarshaler interface for valueOrder
55 | func (i valueOrder) MarshalText() ([]byte, error) {
56 | 	return []byte(i.String()), nil
57 | }
58 | 
59 | // UnmarshalText implements the encoding.TextUnmarshaler interface for valueOrder
60 | func (i *valueOrder) UnmarshalText(text []byte) error {
61 | 	var err error
62 | 	*i, err = valueOrderString(string(text))
63 | 	return err
64 | }
65 | 


--------------------------------------------------------------------------------
/imagine/enums_timequantum.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=timeQuantum -trimprefix=timeQuantum -text -transform=caps -output enums_timequantum.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _timeQuantumName = "YYMYMDYMDH"
11 | 
12 | var _timeQuantumIndex = [...]uint8{0, 1, 3, 6, 10}
13 | 
14 | func (i timeQuantum) String() string {
15 | 	if i < 0 || i >= timeQuantum(len(_timeQuantumIndex)-1) {
16 | 		return fmt.Sprintf("timeQuantum(%d)", i)
17 | 	}
18 | 	return _timeQuantumName[_timeQuantumIndex[i]:_timeQuantumIndex[i+1]]
19 | }
20 | 
21 | var _timeQuantumValues = []timeQuantum{0, 1, 2, 3}
22 | 
23 | var _timeQuantumNameToValueMap = map[string]timeQuantum{
24 | 	_timeQuantumName[0:1]:  0,
25 | 	_timeQuantumName[1:3]:  1,
26 | 	_timeQuantumName[3:6]:  2,
27 | 	_timeQuantumName[6:10]: 3,
28 | }
29 | 
30 | // timeQuantumString retrieves an enum value from the enum constants string name.
31 | // Throws an error if the param is not part of the enum.
32 | func timeQuantumString(s string) (timeQuantum, error) {
33 | 	if val, ok := _timeQuantumNameToValueMap[s]; ok {
34 | 		return val, nil
35 | 	}
36 | 	return 0, fmt.Errorf("%s does not belong to timeQuantum values", s)
37 | }
38 | 
39 | // timeQuantumValues returns all values of the enum
40 | func timeQuantumValues() []timeQuantum {
41 | 	return _timeQuantumValues
42 | }
43 | 
44 | // IsAtimeQuantum returns "true" if the value is listed in the enum definition. "false" otherwise
45 | func (i timeQuantum) IsAtimeQuantum() bool {
46 | 	for _, v := range _timeQuantumValues {
47 | 		if i == v {
48 | 			return true
49 | 		}
50 | 	}
51 | 	return false
52 | }
53 | 
54 | // MarshalText implements the encoding.TextMarshaler interface for timeQuantum
55 | func (i timeQuantum) MarshalText() ([]byte, error) {
56 | 	return []byte(i.String()), nil
57 | }
58 | 
59 | // UnmarshalText implements the encoding.TextUnmarshaler interface for timeQuantum
60 | func (i *timeQuantum) UnmarshalText(text []byte) error {
61 | 	var err error
62 | 	*i, err = timeQuantumString(string(text))
63 | 	return err
64 | }
65 | 


--------------------------------------------------------------------------------
/imagine/enums_verifytype.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=verifyType -trimprefix=verifyType -text -transform=kebab -output enums_verifytype.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _verifyTypeName = "errornonepurgeupdatecreate"
11 | 
12 | var _verifyTypeIndex = [...]uint8{0, 5, 9, 14, 20, 26}
13 | 
14 | func (i verifyType) String() string {
15 | 	if i < 0 || i >= verifyType(len(_verifyTypeIndex)-1) {
16 | 		return fmt.Sprintf("verifyType(%d)", i)
17 | 	}
18 | 	return _verifyTypeName[_verifyTypeIndex[i]:_verifyTypeIndex[i+1]]
19 | }
20 | 
21 | var _verifyTypeValues = []verifyType{0, 1, 2, 3, 4}
22 | 
23 | var _verifyTypeNameToValueMap = map[string]verifyType{
24 | 	_verifyTypeName[0:5]:   0,
25 | 	_verifyTypeName[5:9]:   1,
26 | 	_verifyTypeName[9:14]:  2,
27 | 	_verifyTypeName[14:20]: 3,
28 | 	_verifyTypeName[20:26]: 4,
29 | }
30 | 
31 | // verifyTypeString retrieves an enum value from the enum constants string name.
32 | // Throws an error if the param is not part of the enum.
33 | func verifyTypeString(s string) (verifyType, error) {
34 | 	if val, ok := _verifyTypeNameToValueMap[s]; ok {
35 | 		return val, nil
36 | 	}
37 | 	return 0, fmt.Errorf("%s does not belong to verifyType values", s)
38 | }
39 | 
40 | // verifyTypeValues returns all values of the enum
41 | func verifyTypeValues() []verifyType {
42 | 	return _verifyTypeValues
43 | }
44 | 
45 | // IsAverifyType returns "true" if the value is listed in the enum definition. "false" otherwise
46 | func (i verifyType) IsAverifyType() bool {
47 | 	for _, v := range _verifyTypeValues {
48 | 		if i == v {
49 | 			return true
50 | 		}
51 | 	}
52 | 	return false
53 | }
54 | 
55 | // MarshalText implements the encoding.TextMarshaler interface for verifyType
56 | func (i verifyType) MarshalText() ([]byte, error) {
57 | 	return []byte(i.String()), nil
58 | }
59 | 
60 | // UnmarshalText implements the encoding.TextUnmarshaler interface for verifyType
61 | func (i *verifyType) UnmarshalText(text []byte) error {
62 | 	var err error
63 | 	*i, err = verifyTypeString(string(text))
64 | 	return err
65 | }
66 | 


--------------------------------------------------------------------------------
/imagine/enums_dimensionorder.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "enumer -type=dimensionOrder -trimprefix=dimensionOrder -text -transform=kebab -output enums_dimensionorder.go"; DO NOT EDIT.
 2 | 
 3 | //
 4 | package imagine
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | )
 9 | 
10 | const _dimensionOrderName = "rowcolumn"
11 | 
12 | var _dimensionOrderIndex = [...]uint8{0, 3, 9}
13 | 
14 | func (i dimensionOrder) String() string {
15 | 	if i < 0 || i >= dimensionOrder(len(_dimensionOrderIndex)-1) {
16 | 		return fmt.Sprintf("dimensionOrder(%d)", i)
17 | 	}
18 | 	return _dimensionOrderName[_dimensionOrderIndex[i]:_dimensionOrderIndex[i+1]]
19 | }
20 | 
21 | var _dimensionOrderValues = []dimensionOrder{0, 1}
22 | 
23 | var _dimensionOrderNameToValueMap = map[string]dimensionOrder{
24 | 	_dimensionOrderName[0:3]: 0,
25 | 	_dimensionOrderName[3:9]: 1,
26 | }
27 | 
28 | // dimensionOrderString retrieves an enum value from the enum constants string name.
29 | // Throws an error if the param is not part of the enum.
30 | func dimensionOrderString(s string) (dimensionOrder, error) {
31 | 	if val, ok := _dimensionOrderNameToValueMap[s]; ok {
32 | 		return val, nil
33 | 	}
34 | 	return 0, fmt.Errorf("%s does not belong to dimensionOrder values", s)
35 | }
36 | 
37 | // dimensionOrderValues returns all values of the enum
38 | func dimensionOrderValues() []dimensionOrder {
39 | 	return _dimensionOrderValues
40 | }
41 | 
42 | // IsAdimensionOrder returns "true" if the value is listed in the enum definition. "false" otherwise
43 | func (i dimensionOrder) IsAdimensionOrder() bool {
44 | 	for _, v := range _dimensionOrderValues {
45 | 		if i == v {
46 | 			return true
47 | 		}
48 | 	}
49 | 	return false
50 | }
51 | 
52 | // MarshalText implements the encoding.TextMarshaler interface for dimensionOrder
53 | func (i dimensionOrder) MarshalText() ([]byte, error) {
54 | 	return []byte(i.String()), nil
55 | }
56 | 
57 | // UnmarshalText implements the encoding.TextUnmarshaler interface for dimensionOrder
58 | func (i *dimensionOrder) UnmarshalText(text []byte) error {
59 | 	var err error
60 | 	*i, err = dimensionOrderString(string(text))
61 | 	return err
62 | }
63 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: pi crossbuild install release test cover cover-pkg cover-viz enumer
 2 | 
 3 | ENUMER := $(shell command -v enumer 2>/dev/null)
 4 | VERSION := $(shell git describe --tags 2> /dev/null || echo unknown)
 5 | IDENTIFIER := $(VERSION)-$(GOOS)-$(GOARCH)
 6 | CLONE_URL=github.com/pilosa/tools
 7 | PKGS := $(shell cd $(GOPATH)/src/$(CLONE_URL); go list ./... | grep -v vendor)
 8 | BUILD_TIME=`date -u +%FT%T%z`
 9 | LDFLAGS="-X github.com/pilosa/tools.Version=$(VERSION) -X github.com/pilosa/tools.BuildTime=$(BUILD_TIME)"
10 | export GO111MODULE=on
11 | 
12 | default: test install
13 | 
14 | test:
15 | 	go test ./... $(TESTFLAGS)
16 | 
17 | cover:
18 | 	mkdir -p build/coverage
19 | 	echo "mode: set" > build/coverage/all.out
20 | 	for pkg in $(PKGS) ; do \
21 | 		make cover-pkg PKG=$$pkg ; \
22 | 	done
23 | 
24 | cover-pkg:
25 | 	mkdir -p build/coverage
26 | 	touch build/coverage/$(subst /,-,$(PKG)).out
27 | 	go test -coverprofile=build/coverage/$(subst /,-,$(PKG)).out $(PKG)
28 | 	tail -n +2 build/coverage/$(subst /,-,$(PKG)).out >> build/coverage/all.out
29 | 
30 | cover-viz: cover
31 | 	go tool cover -html=build/coverage/all.out
32 | 
33 | crossbuild:
34 | 	mkdir -p build/pi-$(IDENTIFIER)
35 | 	make pi FLAGS="-o build/pi-$(IDENTIFIER)/pi"
36 | 	cp LICENSE README.md build/pi-$(IDENTIFIER)
37 | 	tar -cvz -C build -f build/pi-$(IDENTIFIER).tar.gz pilosa-$(IDENTIFIER)/
38 | 	@echo "Created release build: build/pi-$(IDENTIFIER).tar.gz"
39 | 
40 | release:
41 | 	make crossbuild GOOS=linux GOARCH=amd64
42 | 	make crossbuild GOOS=linux GOARCH=386
43 | 	make crossbuild GOOS=darwin GOARCH=amd64
44 | 
45 | install: install-pi install-imagine install-dx
46 | 
47 | install-dx:
48 | 	go install -ldflags $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/dx
49 | 
50 | install-imagine:
51 | 	go install -ldflags $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/imagine
52 | 
53 | install-pi:
54 | 	go install -ldflags $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/pi
55 | 
56 | 
57 | generate: enumer-install
58 | 	cd imagine && \
59 | 	go generate
60 | 
61 | 
62 | enumer-install:
63 | 	$(if $(ENUMER),@echo "enumer already installed — skipping.", go get -u github.com/alvaroloes/enumer)
64 | 


--------------------------------------------------------------------------------
/bench/basic_query.go:
--------------------------------------------------------------------------------
 1 | package bench
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 	"os"
 8 | 	"time"
 9 | 
10 | 	"github.com/pilosa/go-pilosa"
11 | )
12 | 
13 | var _ Benchmark = (*BasicQueryBenchmark)(nil)
14 | 
15 | // BasicQueryBenchmark runs a query multiple times with increasing row ids.
16 | type BasicQueryBenchmark struct {
17 | 	Name       string `json:"name"`
18 | 	MinRowID   int64  `json:"min-row-id"`
19 | 	Iterations int    `json:"iterations"`
20 | 	NumArgs    int    `json:"num-args"`
21 | 	Query      string `json:"query"`
22 | 	Index      string `json:"index"`
23 | 	Field      string `json:"field"`
24 | 
25 | 	Logger *log.Logger `json:"-"`
26 | }
27 | 
28 | // NewBasicQueryBenchmark returns a new instance of BasicQueryBenchmark.
29 | func NewBasicQueryBenchmark() *BasicQueryBenchmark {
30 | 	return &BasicQueryBenchmark{
31 | 		Name:   "basic-query",
32 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
33 | 	}
34 | }
35 | 
36 | // Run runs the benchmark.
37 | func (b *BasicQueryBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
38 | 	result := NewResult()
39 | 	result.AgentNum = agentNum
40 | 	result.Configuration = b
41 | 
42 | 	// Initialize schema.
43 | 	index, field, err := ensureSchema(client, b.Index, b.Field)
44 | 	if err != nil {
45 | 		return result, err
46 | 	}
47 | 
48 | 	// Determine minimum row id.
49 | 	minRowID := b.MinRowID + int64(agentNum*b.Iterations)
50 | 
51 | 	var start time.Time
52 | 	for n := 0; n < b.Iterations; n++ {
53 | 		rows := make([]*pilosa.PQLRowQuery, b.NumArgs)
54 | 		for i := range rows {
55 | 			rows[i] = field.Row(minRowID + int64(n))
56 | 		}
57 | 
58 | 		var q *pilosa.PQLRowQuery
59 | 		switch b.Query {
60 | 		case "Intersect":
61 | 			q = index.Intersect(rows...)
62 | 		case "Union":
63 | 			q = index.Union(rows...)
64 | 		case "Difference":
65 | 			q = index.Difference(rows...)
66 | 		case "Xor":
67 | 			q = index.Xor(rows...)
68 | 		default:
69 | 			return result, fmt.Errorf("invalid query type: %q", b.Query)
70 | 		}
71 | 
72 | 		start = time.Now()
73 | 		_, err := client.Query(q)
74 | 		result.Add(time.Since(start), nil)
75 | 		if err != nil {
76 | 			return result, err
77 | 		}
78 | 	}
79 | 	return result, nil
80 | }
81 | 


--------------------------------------------------------------------------------
/cmd/pi/random_set.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | // NewRandomSetCommand subcommands
12 | func NewRandomSetCommand() *cobra.Command {
13 | 	b := bench.NewRandomSetBenchmark()
14 | 	cmd := &cobra.Command{
15 | 		Use:   "random-set",
16 | 		Short: "Executes random sets.",
17 | 		Long: `Sets random values according to the parameters using PQL through the /query endpoint.
18 | If NumAttrs and NumAttrValues are greater than 0, then each SetBit query is
19 | followed by a SetRowAttrs query on the same row id. Each SetRowAttrs query sets
20 | a single attribute to an integer value chosen randomly. There will be num-attrs
21 | total possible attributes and num-attr-values total possible values. Agent num
22 | modifies random seed.`,
23 | 
24 | 		RunE: func(cmd *cobra.Command, args []string) error {
25 | 			flags := cmd.Flags()
26 | 			b.Logger = NewLoggerFromFlags(flags)
27 | 			client, err := NewClientFromFlags(flags)
28 | 			if err != nil {
29 | 				return err
30 | 			}
31 | 			agentNum, err := flags.GetInt("agent-num")
32 | 			if err != nil {
33 | 				return err
34 | 			}
35 | 			result, err := b.Run(context.Background(), client, agentNum)
36 | 			if err != nil {
37 | 				result.Error = err.Error()
38 | 			}
39 | 			return PrintResults(cmd, result, os.Stdout)
40 | 		},
41 | 	}
42 | 
43 | 	flags := cmd.Flags()
44 | 	flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Minimum row id for set.")
45 | 	flags.Int64Var(&b.MaxRowID, "max-row-id", 100000, "Maximum row id for set.")
46 | 	flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Minimum column id for set.")
47 | 	flags.Int64Var(&b.MaxColumnID, "max-column-id", 100000, "Maximum column id for set.")
48 | 	flags.Int64Var(&b.Seed, "seed", 1, "Random seed.")
49 | 	flags.IntVar(&b.Iterations, "iterations", 100, "Number of values to set.")
50 | 	flags.IntVar(&b.BatchSize, "batch-size", 1, "Number of values to set per batch.")
51 | 	flags.IntVar(&b.NumAttrs, "num-attrs", 0, "If > 0, alternate set with setrowattrs - this number of different attributes")
52 | 	flags.IntVar(&b.NumAttrValues, "num-attr-values", 0, "If > 0, alternate set with setrowattrs - this number of different attribute values")
53 | 	flags.StringVar(&b.Field, "field", defaultField, "Field to set in.")
54 | 	flags.StringVar(&b.Index, "index", defaultIndex, "Pilosa index to use.")
55 | 
56 | 	return cmd
57 | }
58 | 


--------------------------------------------------------------------------------
/cmd/pi/bench.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"io"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | const (
12 | 	defaultIndex      = "ibench"
13 | 	defaultField      = "fbench"
14 | 	defaultRangeField = "range-field"
15 | )
16 | 
17 | func NewBenchCommand() *cobra.Command {
18 | 	benchCmd := &cobra.Command{
19 | 		Use:   "bench",
20 | 		Short: "Runs benchmarks against a pilosa cluster.",
21 | 		Long: `Runs benchmarks against a pilosa cluster.
22 | 
23 | See the various subcommands for specific benchmarks and their arguments. The
24 | various benchmarks should modulate their behavior based on what agent-num is
25 | given, so that multiple benchmarks with identical configurations but differing
26 | agent numbers will do interesting work.
27 | 
28 | `,
29 | 	}
30 | 
31 | 	flags := benchCmd.PersistentFlags()
32 | 	flags.StringSlice("hosts", []string{"localhost:10101"}, "Comma separated list of \"host:port\" pairs of the Pilosa cluster.")
33 | 	flags.Int("agent-num", 0, "A unique integer to associate with this invocation of 'bench' to distinguish it from others running concurrently.")
34 | 	flags.Bool("human", true, "Make output human friendly.")
35 | 	flags.Bool("tls.skip-verify", false, "Skip TLS certificate verification (not secure)")
36 | 
37 | 	benchCmd.AddCommand(NewBasicQueryCommand())
38 | 	benchCmd.AddCommand(NewDiagonalSetBitsCommand())
39 | 	benchCmd.AddCommand(NewImportCommand())
40 | 	benchCmd.AddCommand(NewImportRangeCommand())
41 | 	benchCmd.AddCommand(NewQueryCommand())
42 | 	benchCmd.AddCommand(NewRandomQueryCommand())
43 | 	benchCmd.AddCommand(NewRandomSetCommand())
44 | 	benchCmd.AddCommand(NewRangeQueryCommand())
45 | 	benchCmd.AddCommand(NewSliceWidthCommand())
46 | 	benchCmd.AddCommand(NewZipfCommand())
47 | 	benchCmd.AddCommand(NewTPSCommand())
48 | 
49 | 	return benchCmd
50 | }
51 | 
52 | // PrintResults encodes the output of a benchmark subcommand as json and writes
53 | // it to the given Writer. It takes the "human" flag into account when encoding
54 | // the json.
55 | func PrintResults(cmd *cobra.Command, result *bench.Result, out io.Writer) error {
56 | 	human, err := cmd.Flags().GetBool("human")
57 | 	if err != nil {
58 | 		return err
59 | 	}
60 | 
61 | 	enc := json.NewEncoder(out)
62 | 	if human {
63 | 		enc.SetIndent("", "  ")
64 | 	}
65 | 	if err := enc.Encode(result); err != nil {
66 | 		return err
67 | 	}
68 | 	return nil
69 | }
70 | 


--------------------------------------------------------------------------------
/cmd/pi/zipf.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pilosa/tools/bench"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | func NewZipfCommand() *cobra.Command {
12 | 	b := bench.NewZipfBenchmark()
13 | 	cmd := &cobra.Command{
14 | 		Use:   "zipf",
15 | 		Short: "zipf sets random bits according to the Zipf distribution.",
16 | 		Long: `Sets random bits according to the Zipf distribution.
17 | 
18 | This is a power-law distribution controlled by two parameters.
19 | Exponent, in the range (1, inf), with a default value of 1.001, controls
20 | the "sharpness" of the distribution, with higher exponent being sharper.
21 | Ratio, in the range (0, 1), with a default value of 0.25, controls the
22 | maximum variation of the distribution, with higher ratio being more uniform.
23 | `,
24 | 		RunE: func(cmd *cobra.Command, args []string) error {
25 | 			flags := cmd.Flags()
26 | 			b.Logger = NewLoggerFromFlags(flags)
27 | 			client, err := NewClientFromFlags(flags)
28 | 			if err != nil {
29 | 				return err
30 | 			}
31 | 			agentNum, err := flags.GetInt("agent-num")
32 | 			if err != nil {
33 | 				return err
34 | 			}
35 | 			result, err := b.Run(context.Background(), client, agentNum)
36 | 			if err != nil {
37 | 				result.Error = err.Error()
38 | 			}
39 | 			return PrintResults(cmd, result, os.Stdout)
40 | 		},
41 | 	}
42 | 
43 | 	flags := cmd.Flags()
44 | 	flags.Int64Var(&b.MinRowID, "min-row-id", 0, "Rows being set will all be greater than this.")
45 | 	flags.Int64Var(&b.MaxRowID, "max-row-id", 100000, "Maximum row id for set bits.")
46 | 	flags.Int64Var(&b.MinColumnID, "min-column-id", 0, "Column id to start from.")
47 | 	flags.Int64Var(&b.MaxColumnID, "max-column-id", 100000, "Maximum column id for set bits.")
48 | 	flags.IntVar(&b.Iterations, "iterations", 100, "Number of bits to set.")
49 | 	flags.Int64Var(&b.Seed, "seed", 1, "Seed for RNG.")
50 | 	flags.StringVar(&b.Field, "field", "fbench", "Pilosa field in which to set bits.")
51 | 	flags.StringVar(&b.Index, "index", "ibench", "Pilosa index to use.")
52 | 	flags.Float64Var(&b.RowExponent, "row-exponent", 1.01, "Zipf exponent parameter for row IDs.")
53 | 	flags.Float64Var(&b.RowRatio, "row-ratio", 0.25, "Zipf probability ratio parameter for row IDs.")
54 | 	flags.Float64Var(&b.ColumnExponent, "column-exponent", 1.01, "Zipf exponent parameter for column IDs.")
55 | 	flags.Float64Var(&b.ColumnRatio, "column-ratio", 0.25, "Zipf probability ratio parameter for column IDs.")
56 | 	flags.StringVar(&b.Operation, "operation", "set", "Can be set or clear.")
57 | 
58 | 	return cmd
59 | }
60 | 


--------------------------------------------------------------------------------
/bench/random_set.go:
--------------------------------------------------------------------------------
 1 | package bench
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 	"math/rand"
 8 | 	"os"
 9 | 	"time"
10 | 
11 | 	"github.com/pilosa/go-pilosa"
12 | )
13 | 
14 | var _ Benchmark = (*RandomSetBenchmark)(nil)
15 | 
16 | // RandomSetBenchmark sets bits randomly and deterministically based on a seed.
17 | type RandomSetBenchmark struct {
18 | 	Name          string `json:"name"`
19 | 	MinRowID      int64  `json:"min-row-id"`
20 | 	MaxRowID      int64  `json:"max-row-id"`
21 | 	MinColumnID   int64  `json:"min-column-id"`
22 | 	MaxColumnID   int64  `json:"max-column-id"`
23 | 	Iterations    int    `json:"iterations"`
24 | 	BatchSize     int    `json:"batch-size"`
25 | 	Seed          int64  `json:"seed"`
26 | 	NumAttrs      int    `json:"num-attrs"`
27 | 	NumAttrValues int    `json:"num-attr-values"`
28 | 	Index         string `json:"index"`
29 | 	Field         string `json:"field"`
30 | 
31 | 	Logger *log.Logger `json:"-"`
32 | }
33 | 
34 | // NewRandomSetBenchmark returns a new instance of RandomSetBenchmark.
35 | func NewRandomSetBenchmark() *RandomSetBenchmark {
36 | 	return &RandomSetBenchmark{
37 | 		Name:   "random-set",
38 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
39 | 	}
40 | }
41 | 
42 | // Run runs the benchmark.
43 | func (b *RandomSetBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
44 | 	result := NewResult()
45 | 	result.AgentNum = agentNum
46 | 	result.Configuration = b
47 | 
48 | 	if b.BatchSize <= 0 {
49 | 		return result, fmt.Errorf("batch size must be greater than 0, currently: %d", b.BatchSize)
50 | 	}
51 | 
52 | 	// Initialize schema.
53 | 	index, field, err := ensureSchema(client, b.Index, b.Field)
54 | 	if err != nil {
55 | 		return result, err
56 | 	}
57 | 
58 | 	rand := rand.New(rand.NewSource(b.Seed))
59 | 	const letters = "abcdefghijklmnopqrstuvwxyz"
60 | 	for n := 0; n < b.Iterations; {
61 | 		var a []pilosa.PQLQuery
62 | 		for i := 0; i < b.BatchSize && n < b.Iterations; i, n = i+1, n+1 {
63 | 			rowID := rand.Int63n(b.MaxRowID - b.MinRowID)
64 | 			columnID := rand.Int63n(b.MaxColumnID - b.MinColumnID)
65 | 
66 | 			a = append(a, field.Set(b.MinRowID+rowID, b.MinColumnID+columnID))
67 | 
68 | 			if b.NumAttrs > 0 && b.NumAttrValues > 0 {
69 | 				attri := rand.Intn(b.NumAttrs)
70 | 				key := fmt.Sprintf("%c%d", letters[attri%len(letters)], attri)
71 | 				val := rand.Intn(b.NumAttrValues)
72 | 				a = append(a, field.SetRowAttrs(rowID, map[string]interface{}{key: val}))
73 | 			}
74 | 		}
75 | 
76 | 		start := time.Now()
77 | 		_, err := client.Query(index.BatchQuery(a...))
78 | 		result.Add(time.Since(start), nil)
79 | 		if err != nil {
80 | 			return result, err
81 | 		}
82 | 	}
83 | 	return result, nil
84 | }
85 | 


--------------------------------------------------------------------------------
/imagine/sample.md:
--------------------------------------------------------------------------------
 1 | # A sample schema
 2 | 
 3 | This is loosely inspired by things like the Star Schema Benchmark, with the
 4 | idea being to create a tool to let similar tests be run on different scales,
 5 | against an arbitrary install. This schema is not yet fully implemented;
 6 | what follows is some sketchy design notes.
 7 | 
 8 | ## Indexes
 9 | 
10 | All indexes used by this tool have names starting with `imaginary-`. (Or
11 | another name specified with `--prefix`) The intent here is that it should be
12 | safe to delete these indexes after tests.
13 | 
14 | ## Sample Data
15 | 
16 | In the interests of visualization, we imagine a system which has data about
17 | various entities, called `users`, and things that might happen involving
18 | those users, called `events`. We also add another table, `supplemental`,
19 | which has different data from `users` but the same column space, to facilitate
20 | cross-index benchmarks. Benchmarks might well operate on only one of
21 | these indexes, but having three of them lets us do interesting comparisons.
22 | 
23 | ### Users
24 | 
25 | The main index is a table of information about users. Fields:
26 | 	age (int)
27 | 	income (int)
28 | 	favorite numbers (N rows, bits indicate "user likes number")
29 | 	least favorite numbers (N rows, bits indicate "user dislikes number")
30 | 
31 | The fields are obviously made-up, but they let us vary broadly in cardinality
32 | without having to have a giant list of "ingredients one could have an allergy
33 | to" or something. We will probably want/need other fields also.
34 | 
35 | We should probably have column keys, even if they're obviously-trivial (like
36 | "the column ID expressed as a string with a prefix"), because that's a
37 | use case we potentially care about.
38 | 
39 | ### Events
40 | 
41 | The second table, `events`, has an int field which contains column IDs from the
42 | user table. This allows comparisons to go either of two ways; you can perform
43 | queries on this table, then create a new row which has bits set for all the
44 | entries in the `userid` field for the returned columns, or you can perform
45 | queries on `users`, then select entries from this table which have a value in
46 | that set of columns present in their `userid` field. Events likewise have
47 | other fields:
48 | 	timestamp of event
49 | 	int fields
50 | 	count-type fields
51 | 	fields which encourage union/intersection queries
52 | 
53 | ### Supplemental
54 | 
55 | The third table, `supplemental`, contains additional details about users; the
56 | column space for this table, and the column space for `users`, are the same.
57 | This is useful in cases where there is a reason to have two indexes which
58 | refer to the same columns, and not to combine those indexes. The likely join
59 | use case is to perform queries on one, then use that in intersections or unions
60 | with queries on the other. Fields:
61 | 	???
62 | 


--------------------------------------------------------------------------------
/dx/compare_test.go:
--------------------------------------------------------------------------------
  1 | package dx
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/pilosa/go-pilosa"
  7 | )
  8 | 
  9 | func TestIsValidQuery(t *testing.T) {
 10 | 	res := &pilosa.RowResult{}
 11 | 	count := int64(50)
 12 | 
 13 | 	tests := []struct {
 14 | 		name     string
 15 | 		query    *Query
 16 | 		expected bool
 17 | 	}{
 18 | 		{
 19 | 			name:     "nil",
 20 | 			query:    nil,
 21 | 			expected: false,
 22 | 		},
 23 | 		{
 24 | 			name:     "empty",
 25 | 			query:    &Query{},
 26 | 			expected: false,
 27 | 		},
 28 | 		{
 29 | 			name:     "reult-only",
 30 | 			query:    &Query{Result: res},
 31 | 			expected: true,
 32 | 		},
 33 | 		{
 34 | 			name:     "count-only",
 35 | 			query:    &Query{ResultCount: &count},
 36 | 			expected: true,
 37 | 		},
 38 | 		{
 39 | 			name:     "result-and-count",
 40 | 			query:    &Query{Result: res, ResultCount: &count},
 41 | 			expected: true,
 42 | 		},
 43 | 	}
 44 | 
 45 | 	for _, q := range tests {
 46 | 		got := isValidQuery(q.query)
 47 | 		if got != q.expected {
 48 | 			t.Fatalf("test case %v: expected: %v, got %v", q.name, q.expected, got)
 49 | 		}
 50 | 	}
 51 | }
 52 | 
 53 | func TestQueryResultEqual(t *testing.T) {
 54 | 	res0 := &pilosa.RowResult{Columns: []uint64{0, 2, 4, 6}}
 55 | 	res1 := &pilosa.RowResult{Columns: []uint64{0, 2, 4, 6}}
 56 | 	res2 := &pilosa.RowResult{Columns: []uint64{1, 2, 4, 6}}
 57 | 	count3 := int64(3)
 58 | 	count4 := int64(4)
 59 | 	count4dup := int64(4)
 60 | 
 61 | 	tests := []struct {
 62 | 		name     string
 63 | 		query1   *Query
 64 | 		query2   *Query
 65 | 		expected bool
 66 | 	}{
 67 | 		{
 68 | 			name:     "result-result-equal",
 69 | 			query1:   &Query{Result: res0},
 70 | 			query2:   &Query{Result: res1},
 71 | 			expected: true,
 72 | 		},
 73 | 		{
 74 | 			name:     "result-count-equal",
 75 | 			query1:   &Query{Result: res0},
 76 | 			query2:   &Query{ResultCount: &count4},
 77 | 			expected: true,
 78 | 		},
 79 | 		{
 80 | 			name:     "count-count-equal",
 81 | 			query1:   &Query{ResultCount: &count4},
 82 | 			query2:   &Query{ResultCount: &count4dup},
 83 | 			expected: true,
 84 | 		},
 85 | 		{
 86 | 			name:     "result-result-unequal",
 87 | 			query1:   &Query{Result: res0},
 88 | 			query2:   &Query{Result: res2},
 89 | 			expected: false,
 90 | 		},
 91 | 		{
 92 | 			name:     "result-count-unequal",
 93 | 			query1:   &Query{Result: res0},
 94 | 			query2:   &Query{ResultCount: &count3},
 95 | 			expected: false,
 96 | 		},
 97 | 		{
 98 | 			name:     "count-count-unequal",
 99 | 			query1:   &Query{ResultCount: &count4},
100 | 			query2:   &Query{ResultCount: &count3},
101 | 			expected: false,
102 | 		},
103 | 		{
104 | 			name:     "default-to-results",
105 | 			query1:   &Query{Result: res0, ResultCount: &count4},
106 | 			query2:   &Query{Result: res1, ResultCount: &count3},
107 | 			expected: true,
108 | 		},
109 | 	}
110 | 
111 | 	for _, q := range tests {
112 | 		got := queryResultsEqual(q.query1, q.query2)
113 | 		if got != q.expected {
114 | 			t.Fatalf("test case: %v, expected %v, got: %v", q.name, q.expected, got)
115 | 		}
116 | 	}
117 | }
118 | 


--------------------------------------------------------------------------------
/apophenia/zipf_test.go:
--------------------------------------------------------------------------------
  1 | package apophenia
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"math/rand"
  7 | 	"testing"
  8 | )
  9 | 
 10 | type testCase struct {
 11 | 	name string
 12 | 	s, v float64
 13 | 	m    uint64
 14 | }
 15 | 
 16 | var testCases = []testCase{
 17 | 	{s: 1.01, v: 1, m: 100},
 18 | 	{s: 2, v: 1, m: 100},
 19 | 	{s: 1.01, v: 100, m: 1000},
 20 | 	{s: 2, v: 10000, m: 1000},
 21 | }
 22 | 
 23 | func (tc testCase) Name() string {
 24 | 	if tc.name != "" {
 25 | 		return tc.name
 26 | 	}
 27 | 	return fmt.Sprintf("(zipf:s%f,v%f,m%d)", tc.s, tc.v, tc.m)
 28 | }
 29 | 
 30 | func runZipf(zf func() uint64, values []uint64, n uint64, t *testing.T) {
 31 | 	for i := uint64(0); i < n; i++ {
 32 | 		x := zf()
 33 | 		if x < 0 || x >= uint64(len(values)) {
 34 | 			t.Fatalf("got out-of-range value %d from zipf function", x)
 35 | 		}
 36 | 		values[x]++
 37 | 	}
 38 | }
 39 | 
 40 | type zipfTestCase struct {
 41 | 	q, v float64
 42 | 	seq  Sequence
 43 | 	exp  string
 44 | }
 45 | 
 46 | func (z zipfTestCase) String() string {
 47 | 	return fmt.Sprintf("q: %g, v: %g, seq: %t, expected error: %t",
 48 | 		z.q, z.v, z.seq != nil, z.exp != "")
 49 | }
 50 | 
 51 | func Test_InvalidInputs(t *testing.T) {
 52 | 	seq := NewSequence(0)
 53 | 	testCases := []zipfTestCase{
 54 | 		{q: 1, v: 1.1, seq: seq, exp: "need q > 1 (got 1) and v >= 1 (got 1.1) for Zipf distribution"},
 55 | 		{q: 1.1, v: 0.99, seq: seq, exp: "need q > 1 (got 1.1) and v >= 1 (got 0.99) for Zipf distribution"},
 56 | 		{q: 1.1, v: 1.1, seq: nil, exp: "need a usable PRNG apophenia.Sequence"},
 57 | 		{q: math.NaN(), v: 1.1, seq: nil, exp: "q (NaN) and v (1.1) must not be NaN for Zipf distribution"},
 58 | 		{q: 1.01, v: 2, seq: seq, exp: ""},
 59 | 	}
 60 | 	for _, c := range testCases {
 61 | 		z, err := NewZipf(c.q, c.v, 20, 0, c.seq)
 62 | 		if c.exp != "" {
 63 | 			if err == nil {
 64 | 				t.Errorf("case %v: expected error '%s', got no error", c, c.exp)
 65 | 			} else if err.Error() != c.exp {
 66 | 				t.Errorf("case %v: expected error '%s', got error '%s'", c, c.exp, err.Error())
 67 | 			}
 68 | 		} else {
 69 | 			if err != nil {
 70 | 				t.Errorf("case %v: unexpected error %v", c, err)
 71 | 			} else if z == nil {
 72 | 				t.Errorf("case %v: nil Zipf despite no error", c)
 73 | 			}
 74 | 		}
 75 | 	}
 76 | }
 77 | 
 78 | const runs = 1000000
 79 | 
 80 | func Test_CompareWithMath(t *testing.T) {
 81 | 	failed := false
 82 | 	for idx, c := range testCases {
 83 | 		stdlibValues := make([]uint64, c.m+1)
 84 | 		zipfValues := make([]uint64, c.m+1)
 85 | 		stdlibZipf := rand.NewZipf(rand.New(rand.NewSource(int64(idx))), c.s, c.v, c.m)
 86 | 		seq := NewSequence(int64(idx))
 87 | 		zipfZipf, err := NewZipf(c.s, c.v, c.m, 0, seq)
 88 | 		if err != nil {
 89 | 			t.Fatalf("failed to create newZipf: %s", err)
 90 | 		}
 91 | 		runZipf(stdlibZipf.Uint64, stdlibValues, runs, t)
 92 | 		runZipf(zipfZipf.Next, zipfValues, runs, t)
 93 | 		for i := uint64(0); i < c.m; i++ {
 94 | 			stdlibP := float64(stdlibValues[i]) / runs
 95 | 			zipfP := float64(zipfValues[i]) / runs
 96 | 			diff := math.Abs(stdlibP - zipfP)
 97 | 			if diff > 0.001 {
 98 | 				failed = true
 99 | 				t.Logf("%s: stdlib %d, zipf %d, diff %f [s %f, v %f]",
100 | 					c.Name(), stdlibValues[i], zipfValues[i], diff, c.s, c.v)
101 | 			}
102 | 		}
103 | 	}
104 | 	if failed {
105 | 		t.Fail()
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/bench/import.go:
--------------------------------------------------------------------------------
  1 | package bench
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"io"
  6 | 	"log"
  7 | 	"math/rand"
  8 | 	"os"
  9 | 
 10 | 	"github.com/pilosa/go-pilosa"
 11 | )
 12 | 
 13 | var _ Benchmark = (*ImportBenchmark)(nil)
 14 | 
 15 | type ImportBenchmark struct {
 16 | 	Name         string `json:"name"`
 17 | 	MinRowID     int64  `json:"min-row-id"`
 18 | 	MinColumnID  int64  `json:"min-column-id"`
 19 | 	MaxRowID     int64  `json:"max-row-id"`
 20 | 	MaxColumnID  int64  `json:"max-column-id"`
 21 | 	Index        string `json:"index"`
 22 | 	Field        string `json:"field"`
 23 | 	Iterations   int64  `json:"iterations"`
 24 | 	Seed         int64  `json:"seed"`
 25 | 	Distribution string `json:"distribution"`
 26 | 	BufferSize   int    `json:"-"`
 27 | 
 28 | 	Logger *log.Logger `json:"-"`
 29 | }
 30 | 
 31 | // NewImportBenchmark returns a new instance of ImportBenchmark.
 32 | func NewImportBenchmark() *ImportBenchmark {
 33 | 	return &ImportBenchmark{
 34 | 		Name:   "import",
 35 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
 36 | 	}
 37 | }
 38 | 
 39 | // Run runs the Import benchmark
 40 | func (b *ImportBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
 41 | 	result := NewResult()
 42 | 	result.AgentNum = agentNum
 43 | 	result.Configuration = b
 44 | 
 45 | 	// Initialize schema.
 46 | 	_, field, err := ensureSchema(client, b.Index, b.Field)
 47 | 	if err != nil {
 48 | 		return result, err
 49 | 	}
 50 | 
 51 | 	itr := b.RecordIterator(b.Seed + int64(agentNum))
 52 | 	err = client.ImportField(field, itr, pilosa.OptImportBatchSize(b.BufferSize))
 53 | 	result.Extra["actual-iterations"] = itr.actualIterations
 54 | 	result.Extra["avgdelta"] = itr.avgdelta
 55 | 	return result, err
 56 | }
 57 | 
 58 | func (b *ImportBenchmark) RecordIterator(seed int64) *RecordIterator {
 59 | 	rand := rand.New(rand.NewSource(seed))
 60 | 
 61 | 	itr := NewRecordIterator()
 62 | 	itr.maxbitnum = (b.MaxRowID - b.MinRowID + 1) * (b.MaxColumnID - b.MinColumnID + 1)
 63 | 	itr.avgdelta = float64(itr.maxbitnum) / float64(b.Iterations)
 64 | 	itr.minrow, itr.mincol, itr.maxrow, itr.maxcol = b.MinRowID, b.MinColumnID, b.MaxRowID, b.MaxColumnID
 65 | 
 66 | 	if b.Distribution == "exponential" {
 67 | 		itr.lambda = 1.0 / itr.avgdelta
 68 | 		itr.fdelta = func(itr *RecordIterator) float64 {
 69 | 			return rand.ExpFloat64() / itr.lambda
 70 | 		}
 71 | 	} else { // if b.Distribution == "uniform" {
 72 | 		itr.fdelta = func(itr *RecordIterator) float64 {
 73 | 			return rand.Float64() * itr.avgdelta * 2
 74 | 		}
 75 | 	}
 76 | 	return itr
 77 | }
 78 | 
 79 | func NewRecordIterator() *RecordIterator {
 80 | 	return &RecordIterator{}
 81 | }
 82 | 
 83 | type RecordIterator struct {
 84 | 	actualIterations int64
 85 | 	bitnum           int64
 86 | 	maxbitnum        int64
 87 | 	minrow           int64
 88 | 	maxrow           int64
 89 | 	mincol           int64
 90 | 	maxcol           int64
 91 | 	avgdelta         float64
 92 | 	lambda           float64
 93 | 	rand             *rand.Rand
 94 | 	fdelta           func(z *RecordIterator) float64
 95 | }
 96 | 
 97 | func (itr *RecordIterator) NextRecord() (pilosa.Record, error) {
 98 | 	delta := itr.fdelta(itr)
 99 | 	if delta < 1.0 {
100 | 		delta = 1.0
101 | 	}
102 | 	itr.bitnum = int64(float64(itr.bitnum) + delta)
103 | 	if itr.bitnum > itr.maxbitnum {
104 | 		return pilosa.Column{}, io.EOF
105 | 	}
106 | 	itr.actualIterations++
107 | 	return pilosa.Column{
108 | 		RowID:    uint64((itr.bitnum / (itr.maxcol - itr.mincol + 1)) + itr.minrow),
109 | 		ColumnID: uint64(itr.bitnum%(itr.maxcol-itr.mincol+1) + itr.mincol),
110 | 	}, nil
111 | }
112 | 


--------------------------------------------------------------------------------
/dx/cmd_test.go:
--------------------------------------------------------------------------------
  1 | package dx
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io/ioutil"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"reflect"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/pilosa/pilosa"
 12 | 	"github.com/pilosa/pilosa/test"
 13 | )
 14 | 
 15 | func SetupMain() (*Main, string) {
 16 | 	path, err := ioutil.TempDir("", "dx-")
 17 | 	if err != nil {
 18 | 		panic(err)
 19 | 	}
 20 | 	m := NewMain()
 21 | 	m.DataDir = path
 22 | 	m.ThreadCount = 2
 23 | 	m.NumQueries = 10
 24 | 	m.SpecFiles = []string{filepath.Join("./testdata", "spec", "spec.toml")}
 25 | 
 26 | 	return m, path
 27 | }
 28 | 
 29 | func SetupBits(holder *pilosa.Holder) {
 30 | 	idx0, err := holder.CreateIndex("index0", pilosa.IndexOptions{})
 31 | 	if err != nil {
 32 | 		panic(err)
 33 | 	}
 34 | 	idx1, err := holder.CreateIndex("index1", pilosa.IndexOptions{})
 35 | 	if err != nil {
 36 | 		panic(err)
 37 | 	}
 38 | 	fld0, err := idx0.CreateField("field0")
 39 | 	if err != nil {
 40 | 		panic(err)
 41 | 	}
 42 | 	fld1, err := idx0.CreateField("field1")
 43 | 	if err != nil {
 44 | 		panic(err)
 45 | 	}
 46 | 	fld2, err := idx1.CreateField("field2")
 47 | 	if err != nil {
 48 | 		panic(err)
 49 | 	}
 50 | 
 51 | 	fld0.SetBit(0, 0, nil)
 52 | 	fld0.SetBit(0, 1, nil)
 53 | 	fld0.SetBit(0, 0, nil)
 54 | 	fld0.SetBit(0, 2, nil)
 55 | 	fld0.SetBit(1, 1, nil)
 56 | 	fld0.SetBit(1, 12, nil)
 57 | 	fld0.SetBit(2, 24, nil)
 58 | 	fld1.SetBit(1, 2, nil)
 59 | 	fld1.SetBit(1, 13, nil)
 60 | 	fld1.SetBit(1, 65536, nil)
 61 | 	fld1.SetBit(2, 12, nil)
 62 | 	fld2.SetBit(3, 36, nil)
 63 | }
 64 | 
 65 | func TestIngest(t *testing.T) {
 66 | 	m, path := SetupMain()
 67 | 	defer os.RemoveAll(path)
 68 | 
 69 | 	cluster := test.MustRunCluster(t, 3)
 70 | 	defer cluster.Close()
 71 | 	for _, cmd := range cluster {
 72 | 		host := cmd.URL()
 73 | 		m.Hosts = append(m.Hosts, host)
 74 | 	}
 75 | 
 76 | 	if err := ExecuteIngest(m); err != nil {
 77 | 		t.Fatalf("executing ingest: %v", err)
 78 | 	}
 79 | 
 80 | 	index := "dx-index"
 81 | 	q := "Row(field=%v)"
 82 | 	expectedCols := []uint64{2, 5, 10}
 83 | 
 84 | 	for i := 0; i < 5; i++ {
 85 | 		query := fmt.Sprintf(q, i)
 86 | 		response := cluster.Query(t, index, query)
 87 | 		columns := response.Results[0].(*pilosa.Row).Columns()
 88 | 		if !reflect.DeepEqual(columns, expectedCols) {
 89 | 			t.Fatalf("row %v should have values %v, got %v", i, expectedCols, columns)
 90 | 		}
 91 | 	}
 92 | 
 93 | 	for i := 5; i < 15; i++ {
 94 | 		query := fmt.Sprintf(q, i)
 95 | 		response := cluster.Query(t, index, query)
 96 | 		columns := response.Results[0].(*pilosa.Row).Columns()
 97 | 		if reflect.DeepEqual(columns, []uint64(nil)) {
 98 | 			t.Fatalf("row %v should have no values, got %v", i, columns)
 99 | 		}
100 | 	}
101 | }
102 | 
103 | func TestQuery(t *testing.T) {
104 | 	m, path := SetupMain()
105 | 	defer os.RemoveAll(path)
106 | 
107 | 	cluster := test.MustRunCluster(t, 1)
108 | 	defer cluster.Close()
109 | 	for _, cmd := range cluster {
110 | 		host := cmd.URL()
111 | 		m.Hosts = append(m.Hosts, host)
112 | 	}
113 | 	holder := cluster[0].Server.Holder()
114 | 
115 | 	SetupBits(holder)
116 | 
117 | 	if err := ExecuteQueries(m); err != nil {
118 | 		t.Fatalf("executing queries: %+v", err)
119 | 	}
120 | }
121 | 
122 | func TestCompare(t *testing.T) {
123 | 	ingest0 := filepath.Join("./testdata", "ingest", "0")
124 | 	ingest1 := filepath.Join("./testdata", "ingest", "1")
125 | 	query0 := filepath.Join("./testdata", "query", "0")
126 | 	query1 := filepath.Join("./testdata", "query", "1")
127 | 
128 | 	if err := ExecuteComparison(ingest0, ingest1); err != nil {
129 | 		t.Fatalf("comparing ingest: %v", err)
130 | 	}
131 | 
132 | 	if err := ExecuteComparison(query0, query1); err != nil {
133 | 		t.Fatalf("comparing query: %v", err)
134 | 	}
135 | }
136 | 


--------------------------------------------------------------------------------
/apophenia/weighted.go:
--------------------------------------------------------------------------------
 1 | package apophenia
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"math/bits"
 6 | )
 7 | 
 8 | // Weighted provides a generator which produces weighted bits -- bits with
 9 | // a specified probability of being set, as opposed to random values weighted
10 | // a given way. Bit density is specified as N/M, with M a (positive)
11 | // power of 2, and N an integer between 0 and M.
12 | //
13 | // The underlying generator can produce 2^128 128-bit values, but the
14 | // iterative process requires log2(densityScale) 128-bit values from the
15 | // source per 128-bit output, so the top 7 bits of the address are used
16 | // as an iteration counter. Thus, Weighted.Bit can produce 2^128 distinct
17 | // values, but if the offset provided to Weighted.Bits is over 2^121, there
18 | // will be overlap between the source values used for those bits, and
19 | // source values used (for different iterations) for other offsets.
20 | type Weighted struct {
21 | 	src Sequence
22 | 	// internal result cache
23 | 	lastValue              Uint128
24 | 	lastOffset             Uint128
25 | 	lastDensity, lastScale uint64
26 | }
27 | 
28 | // NewWeighted yields a new Weighted using the given sequence as a source of
29 | // seekable pseudo-random bits.
30 | func NewWeighted(src Sequence) (*Weighted, error) {
31 | 	if src == nil {
32 | 		return nil, errors.New("new Weighted requires a non-nil source")
33 | 	}
34 | 	w := Weighted{src: src}
35 | 	return &w, nil
36 | }
37 | 
38 | // Bit returns the single 0-or-1 bit at the specified offset.
39 | func (w *Weighted) Bit(offset Uint128, density uint64, scale uint64) uint64 {
40 | 	var bit uint64
41 | 	// In order to be able to cache/reuse values, we want to grab a whole
42 | 	// set of 128 bits including a given offset, and use the same
43 | 	// calculation for all of them. So we mask out the low-order 7 bits
44 | 	// of offset, and use them separately. Meanwhile, Bits will
45 | 	// always right-shift its column bits by 7, which reduces the
46 | 	// space of possible results but means that it produces the same
47 | 	// set of bits for any given batch...
48 | 	offset.Lo, bit = offset.Lo&^127, offset.Lo&127
49 | 	if offset == w.lastOffset && density == w.lastDensity && scale == w.lastScale {
50 | 		return w.lastValue.Bit(bit)
51 | 	}
52 | 	w.lastValue = w.Bits(offset, density, scale)
53 | 	w.lastOffset, w.lastDensity, w.lastScale = offset, density, scale
54 | 	return w.lastValue.Bit(bit)
55 | }
56 | 
57 | const weightedIterationMask = (^uint64(0)) >> 7
58 | 
59 | // Bits returns the 128-bit set of bits including offset. The column portion
60 | // of offset is right-shifted by 7 to match the offset calculations in Bit(),
61 | // above. Thus, you get the same values back for each sequence of 128 consecutive
62 | // offsets.
63 | func (w *Weighted) Bits(offset Uint128, density uint64, scale uint64) (out Uint128) {
64 | 	// magic accommodation for choices made elsewhere.
65 | 	offset.Lo >>= 7
66 | 	if density == scale {
67 | 		out.Not()
68 | 		return out
69 | 	}
70 | 	if density == 0 {
71 | 		return out
72 | 	}
73 | 	lz := uint(bits.TrailingZeros64(density))
74 | 	density >>= lz
75 | 	scale >>= lz
76 | 	// generate the same results we would have without this hackery
77 | 	offset.Hi += uint64(lz)
78 | 	for scale > 1 {
79 | 		next := w.src.BitsAt(offset)
80 | 		if density&1 != 0 {
81 | 			out.Or(next)
82 | 		} else {
83 | 			out.And(next)
84 | 		}
85 | 		density >>= 1
86 | 		scale >>= 1
87 | 		// iteration is stashed in the bottom 24-bits of an offset
88 | 		offset.Hi++
89 | 	}
90 | 	return out
91 | }
92 | 
93 | // NextBits returns the next batch of bits after the last one retrieved.
94 | func (w *Weighted) NextBits(density, scale uint64) (out Uint128) {
95 | 	w.lastOffset.Inc()
96 | 	return w.Bits(w.lastOffset, density, scale)
97 | }
98 | 


--------------------------------------------------------------------------------
/bench/import_range.go:
--------------------------------------------------------------------------------
  1 | package bench
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"io"
  6 | 	"log"
  7 | 	"math/rand"
  8 | 	"os"
  9 | 
 10 | 	"github.com/pilosa/go-pilosa"
 11 | )
 12 | 
 13 | var _ Benchmark = (*ImportRangeBenchmark)(nil)
 14 | 
 15 | type ImportRangeBenchmark struct {
 16 | 	Name         string `json:"name"`
 17 | 	MinValue     int64  `json:"min-value"`
 18 | 	MinColumnID  int64  `json:"min-column-id"`
 19 | 	MaxValue     int64  `json:"max-value"`
 20 | 	MaxColumnID  int64  `json:"max-column-id"`
 21 | 	Index        string `json:"index"`
 22 | 	Field        string `json:"field"`
 23 | 	Row          string `json:"row"`
 24 | 	Iterations   int64  `json:"iterations"`
 25 | 	Seed         int64  `json:"seed"`
 26 | 	Distribution string `json:"distribution"`
 27 | 	BufferSize   int    `json:"-"`
 28 | 
 29 | 	Logger *log.Logger `json:"-"`
 30 | }
 31 | 
 32 | // NewImportRangeBenchmark returns a new instance of ImportRangeBenchmark.
 33 | func NewImportRangeBenchmark() *ImportRangeBenchmark {
 34 | 	return &ImportRangeBenchmark{
 35 | 		Name:   "import-range",
 36 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
 37 | 	}
 38 | }
 39 | 
 40 | // Run runs the benchmark.
 41 | func (b *ImportRangeBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
 42 | 	result := NewResult()
 43 | 	result.AgentNum = agentNum
 44 | 	result.Configuration = b
 45 | 
 46 | 	// Initialize schema.
 47 | 	_, field, err := ensureSchema(client, b.Index, b.Field, pilosa.OptFieldTypeInt(b.MinValue, b.MaxValue))
 48 | 	if err != nil {
 49 | 		return result, err
 50 | 	}
 51 | 
 52 | 	itr := b.ValueIterator(b.Seed + int64(agentNum))
 53 | 	err = client.ImportField(field, itr, pilosa.OptImportBatchSize(b.BufferSize))
 54 | 	result.Extra["actual-iterations"] = itr.actualIterations
 55 | 	result.Extra["avgdelta"] = itr.avgdelta
 56 | 	return result, err
 57 | }
 58 | 
 59 | func (b *ImportRangeBenchmark) ValueIterator(seed int64) *ValueIterator {
 60 | 	rand := rand.New(rand.NewSource(seed))
 61 | 
 62 | 	itr := NewValueIterator()
 63 | 	itr.maxbitnum = (b.MaxValue - b.MinValue + 1) * (b.MaxColumnID - b.MinColumnID + 1)
 64 | 	itr.avgdelta = float64(itr.maxbitnum) / float64(b.Iterations)
 65 | 	itr.minvalue, itr.mincol, itr.maxvalue, itr.maxcol = b.MinValue, b.MinColumnID, b.MaxValue, b.MaxColumnID
 66 | 
 67 | 	if b.Distribution == "exponential" {
 68 | 		itr.lambda = 1.0 / itr.avgdelta
 69 | 		itr.fdelta = func(itr *ValueIterator) float64 {
 70 | 			return rand.ExpFloat64() / itr.lambda
 71 | 		}
 72 | 	} else { // if b.Distribution == "uniform" {
 73 | 		itr.fdelta = func(itr *ValueIterator) float64 {
 74 | 			return rand.Float64() * itr.avgdelta * 2
 75 | 		}
 76 | 	}
 77 | 	return itr
 78 | }
 79 | 
 80 | func NewValueIterator() *ValueIterator {
 81 | 	return &ValueIterator{}
 82 | }
 83 | 
 84 | type ValueIterator struct {
 85 | 	actualIterations int64
 86 | 	bitnum           int64
 87 | 	maxbitnum        int64
 88 | 	minvalue         int64
 89 | 	maxvalue         int64
 90 | 	mincol           int64
 91 | 	maxcol           int64
 92 | 	avgdelta         float64
 93 | 	lambda           float64
 94 | 	rng              *rand.Rand
 95 | 	fdelta           func(itr *ValueIterator) float64
 96 | }
 97 | 
 98 | func (itr *ValueIterator) NextRecord() (pilosa.Record, error) {
 99 | 	delta := itr.fdelta(itr)
100 | 	if delta < 1.0 {
101 | 		delta = 1.0
102 | 	}
103 | 	itr.bitnum = int64(float64(itr.bitnum) + delta)
104 | 	if itr.bitnum > itr.maxbitnum {
105 | 		return pilosa.FieldValue{}, io.EOF
106 | 	}
107 | 
108 | 	itr.actualIterations++
109 | 	return pilosa.FieldValue{
110 | 		Value:    int64((itr.bitnum / (itr.maxcol - itr.mincol + 1)) + itr.minvalue),
111 | 		ColumnID: uint64(itr.bitnum%(itr.maxcol-itr.mincol+1) + itr.mincol),
112 | 	}, nil
113 | }
114 | 


--------------------------------------------------------------------------------
/apophenia/permute_test.go:
--------------------------------------------------------------------------------
  1 | package apophenia
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func PermutationOrBust(period int64, seed int64, expectedErr string, tb testing.TB) *Permutation {
  9 | 	seq := NewSequence(seed)
 10 | 	p, err := NewPermutation(period, 0, seq)
 11 | 	if err != nil {
 12 | 		if expectedErr == "" || expectedErr != err.Error() {
 13 | 			tb.Fatalf("unexpected error creating permutation generator: %s", err)
 14 | 		}
 15 | 		return p
 16 | 	}
 17 | 	if p == nil {
 18 | 		tb.Fatalf("unexpected nil permutation generator without error")
 19 | 	}
 20 | 	return p
 21 | }
 22 | 
 23 | func Test_PermuteCycle(t *testing.T) {
 24 | 	sizes := []int64{8, 23, 64, 10000}
 25 | 	for _, size := range sizes {
 26 | 		p := PermutationOrBust(size, 0, "", t)
 27 | 		seen := make(map[int64]struct{}, size)
 28 | 		for i := int64(0); i < size; i++ {
 29 | 			n := p.Next()
 30 | 			if _, ok := seen[n]; ok {
 31 | 				list := make([]int64, len(seen))
 32 | 				j := 0
 33 | 				for k := range seen {
 34 | 					list[j] = k
 35 | 					j++
 36 | 				}
 37 | 				t.Fatalf("size %d: got duplicate entry %d in %v", size, n, list)
 38 | 			}
 39 | 			seen[n] = struct{}{}
 40 | 		}
 41 | 	}
 42 | }
 43 | 
 44 | func TestPermuteSeed(t *testing.T) {
 45 | 	size := int64(129)
 46 | 	seeds := int64(8)
 47 | 	p := make([]*Permutation, seeds)
 48 | 	seen := make([]map[int64]struct{}, seeds)
 49 | 	for s := int64(0); s < seeds; s++ {
 50 | 		p[s] = PermutationOrBust(size, s, "", t)
 51 | 		seen[s] = make(map[int64]struct{}, size)
 52 | 	}
 53 | 	matches := int64(0)
 54 | 	v := make([]int64, seeds)
 55 | 	for i := int64(0); i < size; i++ {
 56 | 		for s := int64(0); s < seeds; s++ {
 57 | 			v[s] = p[s].Next()
 58 | 			if _, ok := seen[s][v[s]]; ok {
 59 | 				t.Fatalf("duplicate entry (size %d, seed %d, entry %d): %d",
 60 | 					size, s, i, v[s])
 61 | 			}
 62 | 			seen[s][v[s]] = struct{}{}
 63 | 		}
 64 | 		for s := int64(1); s < seeds; s++ {
 65 | 			if v[s] == v[s-1] {
 66 | 				matches++
 67 | 			}
 68 | 		}
 69 | 	}
 70 | 	// assuming number of outcomes is more than about 16, matches are pretty rare if nothing
 71 | 	// is wrong.
 72 | 	if (matches * 8) > (size * seeds) {
 73 | 		t.Fatalf("too many matches: %d values to permute, %d seeds, %d matches seems suspicious.",
 74 | 			size, seeds, matches)
 75 | 	} else {
 76 | 		t.Logf("permuting %d values across %d seeds: %d matches (OK)", size, seeds, matches)
 77 | 	}
 78 | }
 79 | 
 80 | func Test_PermuteNth(t *testing.T) {
 81 | 	size := int64(129)
 82 | 	seeds := int64(8)
 83 | 	p := make([][]*Permutation, seeds)
 84 | 	seen := make([]map[int64]struct{}, seeds)
 85 | 	for s := int64(0); s < seeds; s++ {
 86 | 		p[s] = make([]*Permutation, 2)
 87 | 		p[s][0] = PermutationOrBust(size, s, "", t)
 88 | 		p[s][1] = PermutationOrBust(size, s, "", t)
 89 | 		seen[s] = make(map[int64]struct{}, size)
 90 | 	}
 91 | 	matches := int64(0)
 92 | 	v := make([]int64, seeds)
 93 | 	for i := int64(0); i < size; i++ {
 94 | 		for s := int64(0); s < seeds; s++ {
 95 | 			v[s] = p[s][0].Next()
 96 | 			if _, ok := seen[s][v[s]]; ok {
 97 | 				t.Fatalf("duplicate entry (size %d, seed %d, entry %d): %d",
 98 | 					size, s, i, v[s])
 99 | 			}
100 | 			seen[s][v[s]] = struct{}{}
101 | 			vN := p[s][1].Nth(i)
102 | 			if vN != v[s] {
103 | 				t.Fatalf("Nth entry didn't match Nth call to Next()) (size %d, seed %d, n %d): expected %d, got %d\n",
104 | 					size, s, i, v[s], vN)
105 | 			}
106 | 		}
107 | 	}
108 | 	// assuming number of outcomes is more than about 16, matches are pretty rare if nothing
109 | 	// is wrong.
110 | 	if (matches * 8) > (size * seeds) {
111 | 		t.Fatalf("too many matches: %d values to permute, %d seeds, %d matches seems suspicious.",
112 | 			size, seeds, matches)
113 | 	} else {
114 | 		t.Logf("permuting %d values across %d seeds: %d matches (OK)", size, seeds, matches)
115 | 	}
116 | }
117 | 
118 | func Benchmark_PermuteCycle(b *testing.B) {
119 | 	sizes := []int64{5, 63, 1000000, (1 << 19)}
120 | 	for _, size := range sizes {
121 | 		b.Run(fmt.Sprintf("Pool%d", size), func(b *testing.B) {
122 | 			p := PermutationOrBust(size, 0, "", b)
123 | 			for i := 0; i < b.N; i++ {
124 | 				_ = p.Next()
125 | 			}
126 | 		})
127 | 	}
128 | }
129 | 


--------------------------------------------------------------------------------
/apophenia/README.md:
--------------------------------------------------------------------------------
 1 | # Apophenia -- seeking patterns in randomness
 2 | 
 3 | Apophenia provides an approximate emulation of a seekable pseudo-random
 4 | number generator. You provide a seed, and get a generator which can generate
 5 | a large number of pseudo-random bits which will occur in a predictable
 6 | pattern, but you can seek anywhere in that pattern in constant time.
 7 | 
 8 | Apophenia's interface is intended to be similar to that of stdlib's
 9 | `math/rand`. In fact, the `Sequence` interface type is a strict superset
10 | of `rand.Source`.
11 | 
12 | ## Implementation Notes
13 | 
14 | AES-128, for a given seed, consists of a PRNG which, from 2^128 input
15 | coordinates, produces 2^128 possible outputs, each of which is 128 bits
16 | long. This is equivalent to a single-bit PRNG of period 2^135. It is
17 | also possible to treat the input 128 bits as a coordinate system of
18 | some sort, to allow multiple parallel sequences, etcetera.
19 | 
20 | This design may have serious fundamental flaws, but it worked out in
21 | light testing and I'm an optimist.
22 | 
23 | ### Sequences and Offsets
24 | 
25 | Apophenia's underlying implementation admits 128-bit keys, and 128-bit
26 | offsets within each sequence. In most cases:
27 | 
28 |     * That's more space than we need.
29 |     * Working with a non-native type for item numbers is annoying,
30 |       but 64-bits is enough range.
31 |     * It would be nice to avoid using the *same* pseudo-random values
32 |       for different things.
33 |     * Even when those things have the same basic identifying ID or
34 |       value.
35 | 
36 | For instance, say you wish to generate a billion items. Each item should
37 | have several "random" values. Some values might follow a Zipf distribution,
38 | others might just be "U % N" for some N. If you use the item number as a
39 | key, and seek to the same position for each of these, and get the same bits
40 | for each of these, you may get unintended similarities or correlations between
41 | them.
42 | 
43 | With this in mind, apophenia divides its 128-bit offset space into a number
44 | of spaces. The most significant bits are used for a sequence-type value, one
45 | of:
46 | 
47 |     * SequenceDefault
48 |     * SequencePermutationK/SequencePermutationF: permutations
49 |     * SequenceWeighted: weighted bits
50 |     * SequenceLinear: linear values within a range
51 |     * SequenceZipfU: uniforms to use for Zipf values
52 |     * SequenceRandSource: default offsets for the rand.Source
53 |     * SequenceUser1/SequenceUser2: reserved for non-apophenia usage
54 | 
55 | Other values are not yet defined, but are reserved.
56 | 
57 | Within most of these spaces, the rest of the high word of the offset is used
58 | for a 'seed' (used to select different sequences) and an 'iteration' (used
59 | for successive values consumed by an algorithm). The low-order word is treated
60 | as a 64-bit item ID.
61 | 
62 | ```
63 | High-order word:
64 | 0               1               2               3
65 | 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
66 | [iteration              ][seed                         ][seq   ]
67 | Low-order word:
68 | 0               1               2               3
69 | 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
70 | [id                                                            ]
71 | ```
72 | 
73 | The convenience function `OffsetFor(sequence, seed, iteration, id)`
74 | supports this usage.
75 | 
76 | As a side effect, if generating additional values for a given seed and
77 | id, you can increment the high-order word of the `Uint128`,
78 | and if generating values for a new id, you can increment the low-order
79 | word. If your algorithm consumes more than 2^24 values for a single
80 | operation, you could start hitting values shared with other seeds. Oh,
81 | well.
82 | 
83 | #### Iteration usage
84 | 
85 | For the built-in consumers:
86 | 
87 |     * Weighted consumes log2(scale) iterated values.
88 |     * Zipf consumes an *average* of no more than about 1.1 values.
89 |     * Permutation consumes one iterated value per 128 rounds of permutation,
90 |       where rounds is equal to `6*ceil(log2(max))`. (For instance, a second
91 |       value is consumed around a maximum of 2^22, and a third around 2^43.)
92 |     * Nothing else uses more than one iterated value.
93 | 


--------------------------------------------------------------------------------
/apophenia/zipf.go:
--------------------------------------------------------------------------------
  1 | package apophenia
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | )
  7 | 
  8 | // Zipf produces a series of values following a Zipf distribution.
  9 | // It is initialized with values q, v, and max, and produces values
 10 | // in the range [0,max) such that the probability of a value k is
 11 | // proportional to (v+k) ** -q. v must be >= 1, q must be > 1.
 12 | //
 13 | // This is based on the same paper used for the golang stdlib Zipf
 14 | // distribution:
 15 | //
 16 | // "Rejection-Inversion to Generate Variates
 17 | // from Monotone Discrete Distributions"
 18 | // W.Hormann, G.Derflinger [1996]
 19 | // http://eeyore.wu-wien.ac.at/papers/96-04-04.wh-der.ps.gz
 20 | //
 21 | // This implementation differs from stdlib's in that it is seekable; you
 22 | // can get the Nth value in a theoretical series of results in constant
 23 | // time, without having to generate the whole series linearly.
 24 | type Zipf struct {
 25 | 	src                  Sequence
 26 | 	seed                 uint32
 27 | 	q                    float64
 28 | 	v                    float64
 29 | 	max                  float64
 30 | 	oneMinusQ            float64
 31 | 	oneOverOneMinusQ     float64
 32 | 	h                    func(float64) float64
 33 | 	hInv                 func(float64) float64
 34 | 	hImaxOneHalf         float64
 35 | 	hX0MinusHImaxOneHalf float64 // hX0 is only ever used as hX0 - h(i[max] + 1/2)
 36 | 	s                    float64
 37 | 	idx                  uint64
 38 | }
 39 | 
 40 | // NewZipf returns a new Zipf object with the specified q, v, and
 41 | // max, and with its random source seeded in some way by seed.
 42 | // The sequence of values returned is consistent for a given set
 43 | // of inputs. The seed parameter can select one of multiple sub-sequences
 44 | // of the given sequence.
 45 | func NewZipf(q float64, v float64, max uint64, seed uint32, src Sequence) (z *Zipf, err error) {
 46 | 	if math.IsNaN(q) || math.IsNaN(v) {
 47 | 		return nil, fmt.Errorf("q (%g) and v (%g) must not be NaN for Zipf distribution", q, v)
 48 | 	}
 49 | 	if q <= 1 || v < 1 {
 50 | 		return nil, fmt.Errorf("need q > 1 (got %g) and v >= 1 (got %g) for Zipf distribution", q, v)
 51 | 	}
 52 | 	if src == nil {
 53 | 		return nil, fmt.Errorf("need a usable PRNG apophenia.Sequence")
 54 | 	}
 55 | 	oneMinusQ := 1 - q
 56 | 	oneOverOneMinusQ := 1 / (1 - q)
 57 | 	z = &Zipf{
 58 | 		q:                q,
 59 | 		v:                v,
 60 | 		max:              float64(max),
 61 | 		seed:             seed,
 62 | 		oneMinusQ:        oneMinusQ,
 63 | 		oneOverOneMinusQ: oneOverOneMinusQ,
 64 | 	}
 65 | 	z.h = func(x float64) float64 {
 66 | 		return math.Exp((1-q)*math.Log(v+x)) * oneOverOneMinusQ
 67 | 	}
 68 | 	z.hInv = func(x float64) float64 {
 69 | 		return -v + math.Exp(oneOverOneMinusQ*math.Log(oneMinusQ*x))
 70 | 	}
 71 | 	hX0 := z.h(0.5) - math.Exp(math.Log(v)*-q)
 72 | 	z.hImaxOneHalf = z.h(z.max + 0.5)
 73 | 	z.hX0MinusHImaxOneHalf = hX0 - z.hImaxOneHalf
 74 | 	z.s = 1 - z.hInv(z.h(1.5)-math.Exp(math.Log(v+1)*-q))
 75 | 	z.src = src
 76 | 	if err != nil {
 77 | 		return nil, err
 78 | 	}
 79 | 	return z, nil
 80 | }
 81 | 
 82 | // Nth returns the Nth value from the sequence associated with the
 83 | // given Zipf. For a given set of input values (q, v, max, and seed),
 84 | // and a given index, the same value is returned.
 85 | func (z *Zipf) Nth(index uint64) uint64 {
 86 | 	z.idx = index
 87 | 	offset := OffsetFor(SequenceZipfU, z.seed, 0, index)
 88 | 	for {
 89 | 		bits := z.src.BitsAt(offset)
 90 | 		uInt := bits.Lo
 91 | 		u := float64(uInt&(1<<53-1)) / (1 << 53)
 92 | 		u = z.hImaxOneHalf + u*z.hX0MinusHImaxOneHalf
 93 | 		x := z.hInv(u)
 94 | 		k := math.Floor(x + 0.5)
 95 | 		if k-x <= z.s {
 96 | 			return uint64(k)
 97 | 		}
 98 | 		if u >= z.h(k+0.5)-math.Exp(-math.Log(z.v+k)*z.q) {
 99 | 			return uint64(k)
100 | 		}
101 | 		// the low-order 24 bits of the high-order 64-bit word
102 | 		// are the "iteration", which started as zero. Assuming we
103 | 		// don't need more than ~16.7M values, we're good. The expected
104 | 		// average is about 1.1.
105 | 		offset.Hi++
106 | 	}
107 | }
108 | 
109 | // Next returns the "next" value -- the one after the last one requested, or
110 | // value 1 if none have been requested before.
111 | func (z *Zipf) Next() uint64 {
112 | 	return z.Nth(z.idx + 1)
113 | }
114 | 


--------------------------------------------------------------------------------
/apophenia/apophenia.go:
--------------------------------------------------------------------------------
  1 | // Package apophenia provides seekable pseudo-random numbers, allowing
  2 | // reproducibility of pseudo-random results regardless of the order they're
  3 | // generated in.
  4 | package apophenia
  5 | 
  6 | import (
  7 | 	"crypto/aes"
  8 | 	"crypto/cipher"
  9 | 	"encoding/binary"
 10 | 	"math/rand"
 11 | )
 12 | 
 13 | // Sequence represents a specific deterministic but pseudo-random-ish series
 14 | // of bits. A Sequence can be used as a `rand.Source` or `rand.Source64`
 15 | // for `math/rand`.
 16 | type Sequence interface {
 17 | 	rand.Source
 18 | 	Seek(Uint128) Uint128
 19 | 	BitsAt(Uint128) Uint128
 20 | }
 21 | 
 22 | // aesSequence128 implements Sequence on top of an AES block cipher.
 23 | type aesSequence128 struct {
 24 | 	key                   [16]byte
 25 | 	cipher                cipher.Block
 26 | 	plainText, cipherText [16]byte
 27 | 	offset                Uint128
 28 | 	err                   error
 29 | }
 30 | 
 31 | // NewSequence generates a sequence initialized with the given seed.
 32 | func NewSequence(seed int64) Sequence {
 33 | 	s := aesSequence128{offset: OffsetFor(SequenceRandSource, 0, 0, 0)}
 34 | 	s.Seed(seed)
 35 | 	if s.err != nil {
 36 | 		panic("impossible error: " + s.err.Error())
 37 | 	}
 38 | 	return &s
 39 | }
 40 | 
 41 | // Seed sets the generator to a known state.
 42 | func (s *aesSequence128) Seed(seed int64) {
 43 | 	var newKey [16]byte
 44 | 	binary.LittleEndian.PutUint64(newKey[:8], uint64(seed))
 45 | 	newCipher, err := aes.NewCipher(newKey[:])
 46 | 	if err != nil {
 47 | 		// we can't return an error, because Seed() can't fail. also
 48 | 		// note that this can't actually happen, supposedly.
 49 | 		s.err = err
 50 | 		return
 51 | 	}
 52 | 	copy(s.key[:], newKey[:])
 53 | 	s.cipher = newCipher
 54 | 	s.offset = Uint128{0, 0}
 55 | }
 56 | 
 57 | // Int63 returns a value in 0..(1<<63)-1.
 58 | func (s *aesSequence128) Int63() int64 {
 59 | 	return int64(s.Uint64() >> 1)
 60 | }
 61 | 
 62 | // Uint64 returns a value in 0..(1<<64)-1.
 63 | func (s *aesSequence128) Uint64() uint64 {
 64 | 	out := s.BitsAt(s.offset)
 65 | 	s.offset.Inc()
 66 | 	return out.Lo
 67 | }
 68 | 
 69 | // SequenceClass denotes one of the sequence types, which are used to allow
 70 | // sequences to avoid hitting each other's pseudo-random results.
 71 | type SequenceClass uint8
 72 | 
 73 | const (
 74 | 	// SequenceDefault is the zero value, used if you didn't think to pick one.
 75 | 	SequenceDefault SequenceClass = iota
 76 | 	// SequencePermutationK is the K values for the permutation algorithm.
 77 | 	SequencePermutationK
 78 | 	// SequencePermutationF is the F values for the permutation algorithm.
 79 | 	SequencePermutationF
 80 | 	// SequenceWeighted is used to generate weighted values for a given
 81 | 	// position.
 82 | 	SequenceWeighted
 83 | 	// SequenceLinear is the random numbers for U%N type usage.
 84 | 	SequenceLinear
 85 | 	// SequenceZipfU is the random numbers for the Zipf computations.
 86 | 	SequenceZipfU
 87 | 	// SequenceRandSource is used by default when a Sequence is being
 88 | 	// used as a rand.Source.
 89 | 	SequenceRandSource
 90 | 	// SequenceUser1 is eserved for non-apophenia package usage.
 91 | 	SequenceUser1
 92 | 	// SequenceUser2 is reserved for non-apophenia package usage.
 93 | 	SequenceUser2
 94 | )
 95 | 
 96 | // OffsetFor determines the Uint128 offset for a given class/seed/iteration/id.
 97 | func OffsetFor(class SequenceClass, seed uint32, iter uint32, id uint64) Uint128 {
 98 | 	return Uint128{Hi: (uint64(class) << 56) | (uint64(seed) << 24) | uint64(iter),
 99 | 		Lo: id}
100 | }
101 | 
102 | // Seek seeks to the specified offset, yielding the previous offset. This
103 | // sets the stream to a specific point in its cycle, affecting future calls
104 | // to Int63 or Uint64.
105 | func (s *aesSequence128) Seek(offset Uint128) (old Uint128) {
106 | 	old, s.offset = s.offset, offset
107 | 	return old
108 | }
109 | 
110 | // BitsAt yields the sequence of bits at the provided offset into the stream.
111 | func (s *aesSequence128) BitsAt(offset Uint128) (out Uint128) {
112 | 	binary.LittleEndian.PutUint64(s.plainText[:8], offset.Lo)
113 | 	binary.LittleEndian.PutUint64(s.plainText[8:], offset.Hi)
114 | 	s.cipher.Encrypt(s.cipherText[:], s.plainText[:])
115 | 	out.Lo, out.Hi = binary.LittleEndian.Uint64(s.cipherText[:8]), binary.LittleEndian.Uint64(s.cipherText[8:])
116 | 	return out
117 | }
118 | 


--------------------------------------------------------------------------------
/bench/zipf.go:
--------------------------------------------------------------------------------
  1 | package bench
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"math"
  8 | 	"math/rand"
  9 | 	"os"
 10 | 	"time"
 11 | 
 12 | 	"github.com/pilosa/go-pilosa"
 13 | 	"github.com/pilosa/tools/apophenia"
 14 | )
 15 | 
 16 | // ZipfBenchmark sets random bits according to the Zipf-Mandelbrot distribution.
 17 | // This distribution accepts two parameters, Exponent and Ratio, for both rows and columns.
 18 | // It also uses PermutationGenerator to permute IDs randomly.
 19 | type ZipfBenchmark struct {
 20 | 	Name           string  `json:"name"`
 21 | 	MinRowID       int64   `json:"min-row-id"`
 22 | 	MinColumnID    int64   `json:"min-column-id"`
 23 | 	MaxRowID       int64   `json:"max-row-id"`
 24 | 	MaxColumnID    int64   `json:"max-column-id"`
 25 | 	Iterations     int     `json:"iterations"`
 26 | 	Seed           int64   `json:"seed"`
 27 | 	Index          string  `json:"index"`
 28 | 	Field          string  `json:"field"`
 29 | 	RowExponent    float64 `json:"row-exponent"`
 30 | 	RowRatio       float64 `json:"row-ratio"`
 31 | 	ColumnExponent float64 `json:"column-exponent"`
 32 | 	ColumnRatio    float64 `json:"column-ratio"`
 33 | 	Operation      string  `json:"operation"`
 34 | 
 35 | 	Logger *log.Logger `json:"-"`
 36 | }
 37 | 
 38 | // NewZipfBenchmark returns a new instance of ZipfBenchmark.
 39 | func NewZipfBenchmark() *ZipfBenchmark {
 40 | 	return &ZipfBenchmark{
 41 | 		Name:   "zipf",
 42 | 		Logger: log.New(os.Stderr, "", log.LstdFlags),
 43 | 	}
 44 | }
 45 | 
 46 | // Run runs the Zipf benchmark
 47 | func (b *ZipfBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
 48 | 	result := NewResult()
 49 | 	result.AgentNum = agentNum
 50 | 	result.Configuration = b
 51 | 
 52 | 	// Initialize schema.
 53 | 	_, field, err := ensureSchema(client, b.Index, b.Field)
 54 | 	if err != nil {
 55 | 		return result, err
 56 | 	}
 57 | 
 58 | 	seed := b.Seed + int64(agentNum)
 59 | 	rowOffset := getZipfOffset(b.MaxRowID-b.MinRowID, b.RowExponent, b.RowRatio)
 60 | 	rowRand := rand.NewZipf(rand.New(rand.NewSource(seed)), b.RowExponent, rowOffset, uint64(b.MaxRowID-b.MinRowID-1))
 61 | 	columnOffset := getZipfOffset(b.MaxColumnID-b.MinColumnID, b.ColumnExponent, b.ColumnRatio)
 62 | 	columnRand := rand.NewZipf(rand.New(rand.NewSource(seed)), b.ColumnExponent, columnOffset, uint64(b.MaxColumnID-b.MinColumnID-1))
 63 | 	rowSeq := apophenia.NewSequence(seed)
 64 | 	colSeq := apophenia.NewSequence(seed + 1)
 65 | 	rowPerm, err := apophenia.NewPermutation(b.MaxRowID-b.MinRowID, 0, rowSeq)
 66 | 	if err != nil {
 67 | 		return result, err
 68 | 	}
 69 | 	columnPerm, err := apophenia.NewPermutation(b.MaxColumnID-b.MinColumnID, 0, colSeq)
 70 | 	if err != nil {
 71 | 		return result, err
 72 | 	}
 73 | 
 74 | 	for n := 0; n < b.Iterations; n++ {
 75 | 		// generate IDs from Zipf distribution
 76 | 		rowIDOriginal := rowRand.Uint64()
 77 | 		profIDOriginal := columnRand.Uint64()
 78 | 
 79 | 		// permute IDs randomly, but repeatably
 80 | 		rowID := rowPerm.Nth(int64(rowIDOriginal))
 81 | 		profID := columnPerm.Nth(int64(profIDOriginal))
 82 | 
 83 | 		var q pilosa.PQLQuery
 84 | 		switch b.Operation {
 85 | 		case "set":
 86 | 			q = field.Set(b.MinRowID+int64(rowID), b.MinColumnID+int64(profID))
 87 | 		case "clear":
 88 | 			q = field.Clear(b.MinRowID+int64(rowID), b.MinColumnID+int64(profID))
 89 | 		default:
 90 | 			return result, fmt.Errorf("Unsupported operation: \"%s\" (must be \"set\" or \"clear\")", b.Operation)
 91 | 		}
 92 | 
 93 | 		start := time.Now()
 94 | 		_, err := client.Query(q)
 95 | 		result.Add(time.Since(start), nil)
 96 | 		if err != nil {
 97 | 			return result, err
 98 | 		}
 99 | 	}
100 | 	return result, err
101 | }
102 | 
103 | // Offset is the true parameter used by the Zipf distribution, but the ratio,
104 | // as defined here, is a simpler, readable way to define the distribution.
105 | // Offset is in [1, inf), and its meaning depends on N (a pain for updating benchmark configs)
106 | // ratio is in (0, 1), and its meaning does not depend on N.
107 | // it is the ratio of the lowest probability in the distribution to the highest.
108 | // ratio=0.01 corresponds to a very small offset - the most skewed distribution for a given pair (N, exp)
109 | // ratio=0.99 corresponds to a very large offset - the most nearly uniform distribution for a given (N, exp)
110 | func getZipfOffset(N int64, exp, ratio float64) float64 {
111 | 	z := math.Pow(ratio, 1/exp)
112 | 	return z * float64(N-1) / (1 - z)
113 | }
114 | 


--------------------------------------------------------------------------------
/apophenia/int128.go:
--------------------------------------------------------------------------------
  1 | package apophenia
  2 | 
  3 | import "fmt"
  4 | 
  5 | // Uint128 is an array of 2 uint64, treated as a single
  6 | // object to simplify calling conventions.
  7 | type Uint128 struct {
  8 | 	Lo, Hi uint64 // low-order and high-order uint64 words. Value is ``(Hi << 64) | Lo`.
  9 | }
 10 | 
 11 | // Add adds value to its receiver in place.
 12 | func (u *Uint128) Add(value Uint128) {
 13 | 	u.Lo += value.Lo
 14 | 	if u.Lo < value.Lo {
 15 | 		u.Hi++
 16 | 	}
 17 | 	u.Hi += value.Hi
 18 | }
 19 | 
 20 | // Sub subtracts value from its receiver in place.
 21 | func (u *Uint128) Sub(value Uint128) {
 22 | 	u.Lo -= value.Lo
 23 | 	if u.Lo > value.Lo {
 24 | 		u.Hi--
 25 | 	}
 26 | 	u.Hi -= value.Hi
 27 | }
 28 | 
 29 | // And does a bitwise and with value, in place.
 30 | func (u *Uint128) And(value Uint128) {
 31 | 	u.Lo, u.Hi = u.Lo&value.Lo, u.Hi&value.Hi
 32 | }
 33 | 
 34 | // Or does a bitwise or with value, in place.
 35 | func (u *Uint128) Or(value Uint128) {
 36 | 	u.Lo, u.Hi = u.Lo|value.Lo, u.Hi|value.Hi
 37 | }
 38 | 
 39 | // Xor does a bitwise xor with value, in place.
 40 | func (u *Uint128) Xor(value Uint128) {
 41 | 	u.Lo, u.Hi = u.Lo^value.Lo, u.Hi^value.Hi
 42 | }
 43 | 
 44 | // Not does a bitwise complement in place.
 45 | func (u *Uint128) Not() {
 46 | 	u.Lo, u.Hi = ^u.Lo, ^u.Hi
 47 | }
 48 | 
 49 | // Mask produces a mask of the lower n bits of u.
 50 | func (u *Uint128) Mask(n uint64) {
 51 | 	if n >= 128 {
 52 | 		return
 53 | 	}
 54 | 	if n >= 64 {
 55 | 		u.Hi &= (1 << (n & 63)) - 1
 56 | 	} else {
 57 | 		u.Lo &= (1 << n) - 1
 58 | 	}
 59 | }
 60 | 
 61 | // Mask produces a bitmask with n bits set.
 62 | func Mask(n uint64) (u Uint128) {
 63 | 	if n >= 128 {
 64 | 		u.Not()
 65 | 		return u
 66 | 	}
 67 | 	if n >= 64 {
 68 | 		u.Lo--
 69 | 		u.Hi = (1 << n & 63) - 1
 70 | 		return u
 71 | 	}
 72 | 	u.Lo = (1 << n) - 1
 73 | 	return u
 74 | }
 75 | 
 76 | // String provides a string representation.
 77 | func (u Uint128) String() string {
 78 | 	return fmt.Sprintf("0x%x%016x", u.Hi, u.Lo)
 79 | }
 80 | 
 81 | // bit rotation: for 1-63 bits, we are moving the low-order N bits of u.Lo
 82 | // into the high-order N bits of u.Hi, and vice versa. For 64-127, it's that
 83 | // plus swapping u.Lo and u.Hi.
 84 | 
 85 | // RotateRight rotates u right by n bits.
 86 | func (u *Uint128) RotateRight(n uint64) {
 87 | 	if n&64 != 0 {
 88 | 		u.Lo, u.Hi = u.Hi, u.Lo
 89 | 	}
 90 | 	n &= 63
 91 | 	if n == 0 {
 92 | 		return
 93 | 	}
 94 | 	unbits := 64 - n
 95 | 
 96 | 	u.Lo, u.Hi = (u.Lo>>n)|(u.Hi<<unbits), (u.Hi>>n)|(u.Lo<<unbits)
 97 | }
 98 | 
 99 | // RotateLeft rotates u left by n bits.
100 | func (u *Uint128) RotateLeft(n uint64) {
101 | 	if n&64 != 0 {
102 | 		u.Lo, u.Hi = u.Hi, u.Lo
103 | 	}
104 | 	n &= 63
105 | 	if n == 0 {
106 | 		return
107 | 	}
108 | 	unbits := 64 - n
109 | 
110 | 	u.Lo, u.Hi = (u.Lo<<n)|(u.Hi>>unbits), (u.Hi<<n)|(u.Lo>>unbits)
111 | }
112 | 
113 | // ShiftRight shifts u right by n bits.
114 | func (u *Uint128) ShiftRight(n uint64) {
115 | 	if n > 127 {
116 | 		u.Lo, u.Hi = 0, 0
117 | 		return
118 | 	}
119 | 	if n >= 64 {
120 | 		u.Lo, u.Hi = u.Hi>>(n&63), 0
121 | 		return
122 | 	}
123 | 	unbits := 64 - n
124 | 
125 | 	u.Lo, u.Hi = (u.Lo>>n)|(u.Hi<<unbits), (u.Hi >> n)
126 | }
127 | 
128 | // ShiftRightCarry returns both the shifted value and the bits that
129 | // were shifted out. Useful for when you want both x%N and x/N for
130 | // N a power of 2. Only sane if bits <= 64.
131 | func (u *Uint128) ShiftRightCarry(n uint64) (out Uint128, carry uint64) {
132 | 	if n > 64 {
133 | 		return out, carry
134 | 	}
135 | 	if n == 64 {
136 | 		out.Lo, carry = u.Hi, u.Lo
137 | 		return out, carry
138 | 	}
139 | 	unbits := 64 - n
140 | 
141 | 	out.Lo, out.Hi, carry = (u.Lo>>n)|(u.Hi<<unbits), (u.Hi >> n), u.Lo&((1<<n)-1)
142 | 	return out, carry
143 | }
144 | 
145 | // ShiftLeft rotates u left by n bits.
146 | func (u *Uint128) ShiftLeft(n uint64) {
147 | 	if n > 127 {
148 | 		u.Lo, u.Hi = 0, 0
149 | 		return
150 | 	}
151 | 	if n >= 64 {
152 | 		u.Lo, u.Hi = 0, u.Lo<<(n&63)
153 | 		return
154 | 	}
155 | 	n &= 63
156 | 	if n == 0 {
157 | 		return
158 | 	}
159 | 	unbits := 64 - n
160 | 
161 | 	u.Lo, u.Hi = (u.Lo << n), (u.Hi<<n)|(u.Lo>>unbits)
162 | }
163 | 
164 | // Bit returns 1 if the nth bit is set, 0 otherwise.
165 | func (u *Uint128) Bit(n uint64) uint64 {
166 | 	if n >= 128 {
167 | 		return 0
168 | 	}
169 | 	if n >= 64 {
170 | 		return (u.Hi >> (n & 63)) & 1
171 | 	}
172 | 	return (u.Lo >> n) & 1
173 | }
174 | 
175 | // Inc increments its receiver in place.
176 | func (u *Uint128) Inc() {
177 | 	u.Lo++
178 | 	if u.Lo == 0 {
179 | 		u.Hi++
180 | 	}
181 | }
182 | 


--------------------------------------------------------------------------------
/dx/ingest.go:
--------------------------------------------------------------------------------
  1 | package dx
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"log"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"strconv"
 11 | 	"sync"
 12 | 	"time"
 13 | 
 14 | 	"github.com/pilosa/tools/imagine"
 15 | 	"github.com/pkg/errors"
 16 | 	"github.com/spf13/cobra"
 17 | )
 18 | 
 19 | // NewIngestCommand initializes an ingest command.
 20 | func NewIngestCommand(m *Main) *cobra.Command {
 21 | 	ingestCmd := &cobra.Command{
 22 | 		Use:   "ingest",
 23 | 		Short: "ingest on cluster/s using imagine",
 24 | 		Long:  `Perform ingest the cluster/s using imagine.`,
 25 | 		Run: func(cmd *cobra.Command, args []string) {
 26 | 			if err := ExecuteIngest(m); err != nil {
 27 | 				if m.Verbose {
 28 | 					fmt.Printf("%+v\n", err)
 29 | 				} else {
 30 | 					fmt.Printf("%v\n", err)
 31 | 				}
 32 | 				os.Exit(1)
 33 | 			}
 34 | 		},
 35 | 	}
 36 | 
 37 | 	flags := ingestCmd.PersistentFlags()
 38 | 	flags.StringVarP(&m.Prefix, "prefix", "p", "dx-", "Prefix to use for index")
 39 | 	flags.StringSliceVar(&m.SpecFiles, "specfiles", nil, "Path to imagine spec file")
 40 | 	ingestCmd.MarkFlagRequired("specfile")
 41 | 
 42 | 	return ingestCmd
 43 | }
 44 | 
 45 | // ExecuteIngest executes an ingest command on the cluster/s, ensuring that the order of clusters
 46 | // specified in the flags corresponds to the filenames that the results are saved in.
 47 | func ExecuteIngest(m *Main) error {
 48 | 	for _, file := range m.SpecFiles {
 49 | 		found, err := checkFileExists(file)
 50 | 		if err != nil {
 51 | 			return errors.Wrapf(err, "error checking existence of %v", file)
 52 | 		}
 53 | 		if !found {
 54 | 			return errors.Errorf("%s does not exist", file)
 55 | 		}
 56 | 	}
 57 | 
 58 | 	path, err := makeFolder(cmdIngest, m.DataDir)
 59 | 	if err != nil {
 60 | 		return errors.Wrap(err, "error creating folder for ingest results")
 61 | 	}
 62 | 
 63 | 	// TODO: copy spec file?
 64 | 	configs := make([]*imagine.Config, 0)
 65 | 
 66 | 	allClusterHosts := getAllClusterHosts(m.Hosts)
 67 | 	for _, clusterHosts := range allClusterHosts {
 68 | 		config := newConfig(clusterHosts, m.SpecFiles, m.Prefix, m.ThreadCount)
 69 | 		configs = append(configs, config)
 70 | 	}
 71 | 
 72 | 	var wg sync.WaitGroup
 73 | 
 74 | 	for i, config := range configs {
 75 | 		wg.Add(1)
 76 | 		go func(i int, config *imagine.Config) {
 77 | 			defer wg.Done()
 78 | 			ingestAndWriteResult(i, config, path)
 79 | 		}(i, config)
 80 | 	}
 81 | 
 82 | 	wg.Wait()
 83 | 	fmt.Printf("result(s) successfully saved in %s\n", path)
 84 | 	return nil
 85 | }
 86 | 
 87 | func ingestAndWriteResult(instanceNum int, config *imagine.Config, path string) {
 88 | 	bench, err := ingestOnInstance(config)
 89 | 	if err != nil {
 90 | 		log.Printf("error ingesting on instance %v: %+v", instanceNum, err)
 91 | 	}
 92 | 
 93 | 	filename := strconv.Itoa(instanceNum)
 94 | 	if err := writeResultFile(bench, filename, path); err != nil {
 95 | 		log.Printf("error writing result file: %v", err)
 96 | 	}
 97 | }
 98 | 
 99 | // runIngestOnInstance ingests data based on a config file.
100 | func ingestOnInstance(conf *imagine.Config) (*Benchmark, error) {
101 | 	bench := NewBenchmark()
102 | 	bench.Type = cmdIngest
103 | 	bench.ThreadCount = conf.ThreadCount
104 | 
105 | 	err := conf.ReadSpecs()
106 | 	if err != nil {
107 | 		return nil, errors.Wrap(err, "error reading spec from config")
108 | 	}
109 | 
110 | 	client, err := initializeClient(conf.Hosts...)
111 | 	if err != nil {
112 | 		return nil, errors.Wrap(err, "error creating Pilosa client")
113 | 	}
114 | 
115 | 	if err = conf.UpdateIndexes(client); err != nil {
116 | 		return nil, errors.Wrap(err, "error updating indexes")
117 | 	}
118 | 
119 | 	now := time.Now()
120 | 	err = conf.ApplyWorkloads(client)
121 | 	if err != nil {
122 | 		return nil, errors.Wrap(err, "error applying workloads")
123 | 	}
124 | 
125 | 	bench.Time.Duration = time.Since(now)
126 | 	return bench, nil
127 | }
128 | 
129 | // writeResultFile writes the results of a SoloBenchmark to a JSON file.
130 | func writeResultFile(bench *Benchmark, filename, dir string) error {
131 | 	jsonBytes, err := json.Marshal(bench)
132 | 	if err != nil {
133 | 		return errors.Wrap(err, "could not marshal results to JSON")
134 | 	}
135 | 
136 | 	path := filepath.Join(dir, filename)
137 | 	if err = ioutil.WriteFile(path, jsonBytes, 0666); err != nil {
138 | 		return errors.Wrap(err, "could not write JSON to file")
139 | 	}
140 | 	return nil
141 | }
142 | 
143 | func newConfig(hosts []string, specFiles []string, prefix string, threadCount int) *imagine.Config {
144 | 	conf := &imagine.Config{
145 | 		Hosts:       hosts,
146 | 		Prefix:      prefix,
147 | 		ThreadCount: threadCount,
148 | 	}
149 | 	conf.NewSpecsFiles(specFiles)
150 | 
151 | 	return conf
152 | }
153 | 


--------------------------------------------------------------------------------
/apophenia/permute.go:
--------------------------------------------------------------------------------
  1 | package apophenia
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"math/bits"
  6 | )
  7 | 
  8 | // PermutationGenerator provides a way to pass integer IDs through a permutation
  9 | // map that is pseudorandom but repeatable. This could be done with rand.Perm,
 10 | // but that would require storing a slice of [Items]int64, which we want to avoid
 11 | // for large values of Items.
 12 | //
 13 | // Not actually cryptographically secure.
 14 | type Permutation struct {
 15 | 	src      Sequence
 16 | 	permSeed uint32
 17 | 	max      int64
 18 | 	counter  int64
 19 | 	rounds   int
 20 | 	bits     Uint128
 21 | 	k        []uint64
 22 | }
 23 | 
 24 | // Design notes:
 25 | //
 26 | // This is based on:
 27 | // http://arxiv.org/abs/1208.1176v2
 28 | //
 29 | // This simulates the results of a shuffle in a way allowing a lookup of
 30 | // the results of the shuffle for any given position, in time proportional
 31 | // to a number of "rounds", each of which is 50% likely to swap a slot
 32 | // with another slot. The number of rounds needed to achieve a reasonable
 33 | // probability of safety is log(N)*6 or so.
 34 | //
 35 | // Each permutation is fully defined by a "key", consisting of:
 36 | //   1. A key "KF" naming a value in [0,max) for each round.
 37 | //   2. A series of round functions mapping values in [0,max) to bits,
 38 | //      one for each round.
 39 | // I refer to these as K[r] and F[r]. Thus, K[0] is the index used to
 40 | // compute swap operations four round 0, and F[0] is the series of bits
 41 | // used to determine whether a swap is performed, with F[0][0] being
 42 | // the swap decision for slot 0 in round 0. (Except it probably isn't,
 43 | // because the swap decision is actually made based on the highest index
 44 | // in a pair, to ensure that a swap between A and B always uses the same
 45 | // decision bit.)
 46 | //
 47 | // K values are generated using the SequencePermutationK range of offsets,
 48 | // with
 49 | //
 50 | // For F values, we set byte 8 of the plain text to 0x00, and use
 51 | // encoding/binary to dump the slot number into the first 8 bytes. This
 52 | // yields 128 values, which we treat as the values for the first 128 rounds,
 53 | // and then recycle for rounds 129+ if those exist. This is not very
 54 | // secure, but we're already at 1/2^128 chances by that time and don't care.
 55 | // We could probably trim rounds to 64 or so and not lose much data.
 56 | 
 57 | // NewPermutation creates a Permutation which generates values in [0,m),
 58 | // from a given Sequence and seed value.
 59 | //
 60 | // The seed parameter selects different shuffles, and is useful if you need
 61 | // to generate multiple distinct shuffles from the same underlying sequence.
 62 | // Treat it as a secondary seed.
 63 | func NewPermutation(max int64, seed uint32, src Sequence) (*Permutation, error) {
 64 | 	if max < 1 {
 65 | 		return nil, errors.New("period must be positive")
 66 | 	}
 67 | 	// number of rounds to get "good" results is roughly 6 log N.
 68 | 	bits := 64 - bits.LeadingZeros64(uint64(max))
 69 | 	p := Permutation{max: max, rounds: 6 * bits, counter: 0}
 70 | 
 71 | 	p.src = src
 72 | 	p.k = make([]uint64, p.rounds)
 73 | 	p.permSeed = seed
 74 | 	offset := OffsetFor(SequencePermutationK, p.permSeed, 0, 0)
 75 | 	for i := uint64(0); i < uint64(p.rounds); i++ {
 76 | 		offset.Lo = i
 77 | 		p.k[i] = p.src.BitsAt(offset).Lo % uint64(p.max)
 78 | 	}
 79 | 	return &p, nil
 80 | }
 81 | 
 82 | // Next generates the next value from the permutation.
 83 | func (p *Permutation) Next() (ret int64) {
 84 | 	return p.nextValue()
 85 | }
 86 | 
 87 | // Nth generates the Nth value from the permutation. For instance,
 88 | // given a new permutation, calling Next once produces the same
 89 | // value you'd get from calling Nth(0). This is a seek which changes
 90 | // the offset that Next will count from; after calling Nth(x), you
 91 | // would get the same result from Next() that you would from Nth(x+1).
 92 | func (p *Permutation) Nth(n int64) (ret int64) {
 93 | 	p.counter = n
 94 | 	ret = p.nextValue()
 95 | 	return ret
 96 | }
 97 | 
 98 | func (p *Permutation) nextValue() int64 {
 99 | 	p.counter = int64(uint64(p.counter) % uint64(p.max))
100 | 	x := uint64(p.counter)
101 | 	p.counter++
102 | 	// a value which can't possibly be the next value we need, so we
103 | 	// always hash on the first pass.
104 | 	prev := uint64(p.max) + 1
105 | 	offset := OffsetFor(SequencePermutationF, p.permSeed, 0, 0)
106 | 	for i := uint64(0); i < uint64(p.rounds); i++ {
107 | 		if i > 0 && i&127 == 0 {
108 | 			offset.Hi++
109 | 			// force regeneration of bits down below
110 | 			prev = uint64(p.max) + 1
111 | 		}
112 | 		xPrime := (p.k[i] + uint64(p.max) - x) % uint64(p.max)
113 | 		xCaret := x
114 | 		if xPrime > xCaret {
115 | 			xCaret = xPrime
116 | 		}
117 | 		if xCaret != prev {
118 | 			offset.Lo = xCaret
119 | 			p.bits = p.src.BitsAt(offset)
120 | 			prev = xCaret
121 | 		}
122 | 		if p.bits.Bit(i) != 0 {
123 | 			x = xPrime
124 | 		}
125 | 	}
126 | 	return int64(x)
127 | }
128 | 


--------------------------------------------------------------------------------
/cmd/pi/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"crypto/tls"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"log"
  8 | 	"net/http"
  9 | 	_ "net/http/pprof"
 10 | 	"os"
 11 | 	"strings"
 12 | 
 13 | 	"github.com/pilosa/go-pilosa"
 14 | 	"github.com/pilosa/tools"
 15 | 	"github.com/spf13/cobra"
 16 | 	"github.com/spf13/pflag"
 17 | 	"github.com/spf13/viper"
 18 | )
 19 | 
 20 | func main() {
 21 | 	go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }()
 22 | 
 23 | 	if err := NewRootCommand().Execute(); err != nil {
 24 | 		fmt.Println(err)
 25 | 		os.Exit(1)
 26 | 	}
 27 | }
 28 | 
 29 | func NewRootCommand() *cobra.Command {
 30 | 	rc := &cobra.Command{
 31 | 		Use:   "pi",
 32 | 		Short: "Pilosa Tools",
 33 | 		Long: `Contains various benchmarking and cluster creation and management tools for
 34 | Pilosa. Try "pi <command> --help for more information."
 35 | 
 36 | Version: ` + tools.Version + `
 37 | Build Time: ` + tools.BuildTime + "\n",
 38 | 		PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
 39 | 			v := viper.New()
 40 | 			err := setAllConfig(v, cmd.Flags(), "PI")
 41 | 			if err != nil {
 42 | 				return err
 43 | 			}
 44 | 
 45 | 			// return "dry run" error if "dry-run" flag is set
 46 | 			if ret, err := cmd.Flags().GetBool("dry-run"); ret && err == nil {
 47 | 				if cmd.Parent() != nil {
 48 | 					return fmt.Errorf("dry run")
 49 | 				} else if err != nil {
 50 | 					return fmt.Errorf("problem getting dry-run flag: %v", err)
 51 | 				}
 52 | 			}
 53 | 
 54 | 			return nil
 55 | 		},
 56 | 	}
 57 | 	rc.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose logging.")
 58 | 	rc.PersistentFlags().Bool("dry-run", false, "Stop before executing. Useful for testing.")
 59 | 	_ = rc.PersistentFlags().MarkHidden("dry-run")
 60 | 	rc.PersistentFlags().StringP("config", "c", "", "Configuration file to read from.")
 61 | 	_ = rc.PersistentFlags().MarkHidden("config")
 62 | 
 63 | 	rc.AddCommand(NewBenchCommand())
 64 | 	rc.AddCommand(NewReplayCommand())
 65 | 
 66 | 	rc.SetOutput(os.Stderr)
 67 | 	return rc
 68 | }
 69 | 
 70 | // setAllConfig takes a FlagSet to be the definition of all configuration
 71 | // options, as well as their defaults. It then reads from the command line, the
 72 | // environment, and a config file (if specified), and applies the configuration
 73 | // in that priority order. Since each flag in the set contains a pointer to
 74 | // where its value should be stored, setAllConfig can directly modify the value
 75 | // of each config variable.
 76 | //
 77 | // setAllConfig looks for environment variables which are capitalized versions
 78 | // of the flag names with dashes replaced by underscores, and prefixed with
 79 | // envPrefix plus an underscore.
 80 | func setAllConfig(v *viper.Viper, flags *pflag.FlagSet, envPrefix string) error {
 81 | 	// add cmd line flag def to viper
 82 | 	err := v.BindPFlags(flags)
 83 | 	if err != nil {
 84 | 		return err
 85 | 	}
 86 | 
 87 | 	// add env to viper
 88 | 	v.SetEnvPrefix(envPrefix)
 89 | 	v.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
 90 | 	v.AutomaticEnv()
 91 | 
 92 | 	c := v.GetString("config")
 93 | 	var flagErr error
 94 | 	validTags := make(map[string]bool)
 95 | 	flags.VisitAll(func(f *pflag.Flag) {
 96 | 		validTags[f.Name] = true
 97 | 	})
 98 | 
 99 | 	// add config file to viper
100 | 	if c != "" {
101 | 		v.SetConfigFile(c)
102 | 		v.SetConfigType("toml")
103 | 		err := v.ReadInConfig()
104 | 		if err != nil {
105 | 			return fmt.Errorf("error reading configuration file '%s': %v", c, err)
106 | 		}
107 | 
108 | 		for _, key := range v.AllKeys() {
109 | 			if _, ok := validTags[key]; !ok {
110 | 				return fmt.Errorf("invalid option in configuration file: %v", key)
111 | 			}
112 | 		}
113 | 
114 | 	}
115 | 
116 | 	// set all values from viper
117 | 	flags.VisitAll(func(f *pflag.Flag) {
118 | 		if flagErr != nil {
119 | 			return
120 | 		}
121 | 		var value string
122 | 		if f.Value.Type() == "stringSlice" {
123 | 			// special handling is needed for stringSlice as v.GetString will
124 | 			// always return "" in the case that the value is an actual string
125 | 			// slice from a config file rather than a comma separated string
126 | 			// from a flag or env var.
127 | 			vss := v.GetStringSlice(f.Name)
128 | 			value = strings.Join(vss, ",")
129 | 		} else {
130 | 			value = v.GetString(f.Name)
131 | 		}
132 | 
133 | 		if f.Changed {
134 | 			// If f.Changed is true, that means the value has already been set
135 | 			// by a flag, and we don't need to ask viper for it since the flag
136 | 			// is the highest priority. This works around a problem with string
137 | 			// slices where f.Value.Set(csvString) would cause the elements of
138 | 			// csvString to be appended to the existing value rather than
139 | 			// replacing it.
140 | 			return
141 | 		}
142 | 		flagErr = f.Value.Set(value)
143 | 	})
144 | 	return flagErr
145 | }
146 | 
147 | // NewClientFromFlags returns a new Pilosa client based on the flag arguments.
148 | func NewClientFromFlags(flags *pflag.FlagSet) (*pilosa.Client, error) {
149 | 	hosts, err := flags.GetStringSlice("hosts")
150 | 	if err != nil {
151 | 		return nil, err
152 | 	}
153 | 	tlsSkipVerify, err := flags.GetBool("tls.skip-verify")
154 | 	if err != nil {
155 | 		return nil, err
156 | 	}
157 | 	clientOptions := []pilosa.ClientOption{
158 | 		pilosa.OptClientTLSConfig(&tls.Config{InsecureSkipVerify: tlsSkipVerify}),
159 | 	}
160 | 	return pilosa.NewClient(hosts, clientOptions...)
161 | }
162 | 
163 | func NewLoggerFromFlags(flags *pflag.FlagSet) *log.Logger {
164 | 	if verbose, _ := flags.GetBool("verbose"); verbose {
165 | 		return log.New(os.Stderr, "", log.LstdFlags)
166 | 	}
167 | 	return log.New(ioutil.Discard, "", log.LstdFlags)
168 | }
169 | 


--------------------------------------------------------------------------------
/bench/tps.go:
--------------------------------------------------------------------------------
  1 | package bench
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"log"
  6 | 	"math/rand"
  7 | 	"os"
  8 | 	"runtime"
  9 | 	"time"
 10 | 
 11 | 	"github.com/pilosa/go-pilosa"
 12 | 	"github.com/pkg/errors"
 13 | 	"golang.org/x/sync/errgroup"
 14 | )
 15 | 
 16 | type TPSBenchmark struct {
 17 | 	Name        string   `json:"name"`
 18 | 	Intersect   bool     `json:"intersect" help:"If true, include Intersect queries in benchmark."`
 19 | 	Union       bool     `json:"union" help:"If true, include Union queries in benchmark."`
 20 | 	Difference  bool     `json:"difference" help:"If true, include Difference queries in benchmark."`
 21 | 	Xor         bool     `json:"xor" help:"If true, include XOR queries in benchmark."`
 22 | 	Fields      []string `json:"fields" help:"Comma separated list of fields. If blank, use all fields in index schema."`
 23 | 	MinRowID    int64    `json:"min-row-id" help:"Minimum row ID to use in queries."`
 24 | 	MaxRowID    int64    `json:"max-row-id" help:"Max row ID to use in queries. If 0, determine max available."`
 25 | 	Index       string   `json:"index" help:"Index to use. If blank, one is chosen randomly from the schema."`
 26 | 	Concurrency int      `json:"concurrency" help:"Run this many goroutines concurrently." short:"y"`
 27 | 	Iterations  int      `json:"iterations" help:"Each goroutine will perform this many queries."`
 28 | 
 29 | 	// Complexity int `help:"Number of Rows calls to include in each query."`
 30 | 	// Depth int `help:"Nesting depth of queries. (e.g. Xor(Row(blah=2), Intersect(Row(ha=3), Row(blah=4))))"`
 31 | 
 32 | 	Logger *log.Logger `json:"-"`
 33 | }
 34 | 
 35 | func NewTPSBenchmark() *TPSBenchmark {
 36 | 	return &TPSBenchmark{
 37 | 		Name:        "tps",
 38 | 		Intersect:   true,
 39 | 		Concurrency: runtime.NumCPU(),
 40 | 		MaxRowID:    100,
 41 | 		Iterations:  1000,
 42 | 		Logger:      log.New(os.Stderr, "", log.LstdFlags),
 43 | 	}
 44 | }
 45 | 
 46 | // Run runs the benchmark.
 47 | func (b *TPSBenchmark) Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error) {
 48 | 	result := NewResult()
 49 | 	result.AgentNum = agentNum
 50 | 	result.Configuration = b
 51 | 
 52 | 	// get the schema to validate existence of index/fields or pick ones to use.
 53 | 	s, err := client.Schema()
 54 | 	if err != nil {
 55 | 		return result, errors.Wrap(err, "getting schema")
 56 | 	}
 57 | 
 58 | 	// deal with indexes
 59 | 	indexes := s.Indexes()
 60 | 	var index *pilosa.Index
 61 | 	if b.Index == "" {
 62 | 		if len(indexes) == 0 {
 63 | 			return result, errors.New("no indexes in Pilosa, aborting.")
 64 | 		}
 65 | 		for name, idx := range indexes {
 66 | 			b.Index = name
 67 | 			index = idx
 68 | 			break
 69 | 		}
 70 | 	} else {
 71 | 		var ok bool
 72 | 		index, ok = indexes[b.Index]
 73 | 		if !ok {
 74 | 			return result, errors.Errorf("index '%s' not found in schema.", b.Index)
 75 | 		}
 76 | 	}
 77 | 
 78 | 	// we have an index, deal with fields
 79 | 	var fields []*pilosa.Field
 80 | 	fieldsMap := index.Fields()
 81 | 	if len(b.Fields) == 0 {
 82 | 		fields = make([]*pilosa.Field, 0, len(fieldsMap))
 83 | 		for _, fld := range fieldsMap {
 84 | 			fields = append(fields, fld)
 85 | 		}
 86 | 	} else {
 87 | 		fields = make([]*pilosa.Field, 0, len(b.Fields))
 88 | 		for _, name := range b.Fields {
 89 | 			fld, ok := fieldsMap[name]
 90 | 			if !ok {
 91 | 				return result, errors.Errorf("field '%s' not found in index '%s'.", name, index.Name())
 92 | 			}
 93 | 			fields = append(fields, fld)
 94 | 		}
 95 | 	}
 96 | 	if len(fields) == 0 {
 97 | 		return result, errors.Errorf("no fields to query in index '%s'", b.Index)
 98 | 	}
 99 | 
100 | 	queries := make([]func(...*pilosa.PQLRowQuery) *pilosa.PQLRowQuery, 0)
101 | 	if b.Intersect {
102 | 		queries = append(queries, index.Intersect)
103 | 	}
104 | 	if b.Difference {
105 | 		queries = append(queries, index.Difference)
106 | 	}
107 | 	if b.Union {
108 | 		queries = append(queries, index.Union)
109 | 	}
110 | 	if b.Xor {
111 | 		queries = append(queries, index.Xor)
112 | 	}
113 | 
114 | 	// TODO: Figure out set of rows to use for each field. For now, just apply MaxRowID to all fields.
115 | 
116 | 	start := time.Now()
117 | 	eg := errgroup.Group{}
118 | 	stats := make([]*NumStats, b.Concurrency)
119 | 	for i := 0; i < b.Concurrency; i++ {
120 | 		i := i
121 | 		stats[i] = NewNumStats()
122 | 		eg.Go(func() error {
123 | 			return b.runQueries(client, index, fields, queries, i, stats[i])
124 | 		})
125 | 	}
126 | 	err = eg.Wait()
127 | 	duration := time.Since(start)
128 | 	if err == nil {
129 | 		for i := 1; i < len(stats); i++ {
130 | 			stats[0].Combine(stats[i])
131 | 		}
132 | 		result.Extra["countstats"] = stats[0]
133 | 		seconds := float64(duration) / 1000000000
134 | 		result.Extra["tps"] = float64(b.Iterations*b.Concurrency) / seconds
135 | 	}
136 | 	return result, err
137 | }
138 | 
139 | func (b *TPSBenchmark) runQueries(client *pilosa.Client, index *pilosa.Index, fields []*pilosa.Field, queries []func(...*pilosa.PQLRowQuery) *pilosa.PQLRowQuery, seed int, stats *NumStats) error {
140 | 	r := rand.New(rand.NewSource(int64(seed)))
141 | 	for i := 0; i < b.Iterations; i++ {
142 | 		f1 := fields[r.Intn(len(fields))]
143 | 		f2 := fields[r.Intn(len(fields))]
144 | 
145 | 		r1 := r.Int63n(b.MaxRowID) + b.MinRowID
146 | 		r2 := r.Int63n(b.MaxRowID) + b.MinRowID
147 | 
148 | 		q := queries[r.Intn(len(queries))]
149 | 
150 | 		cntq := index.Count(q(f1.Row(r1), f2.Row(r2)))
151 | 		resp, err := client.Query(cntq)
152 | 		if err != nil {
153 | 			return errors.Wrap(err, "performing query")
154 | 		}
155 | 		if !resp.Success {
156 | 			return errors.Errorf("unsuccessful query: %s", resp.ErrorMessage)
157 | 		}
158 | 		stats.Add(int64(resp.Result().Count()))
159 | 	}
160 | 	return nil
161 | }
162 | 


--------------------------------------------------------------------------------
/dx/README.md:
--------------------------------------------------------------------------------
  1 | #  dx
  2 | 
  3 | `dx` is a load-testing tool used to measure the differences between Pilosa versions. This is typically used to compare a version of Pilosa in development and the last known-good version of Pilosa for any regressions or improvements. Alternately, `dx` can be used to just perform a heavy ingest or query load on a single cluster to see how it performs under load.
  4 | 
  5 | ## Invocation
  6 | 
  7 | ```
  8 | dx [command] [flags]
  9 | ```
 10 | 
 11 | `dx` can only be used when the Pilosa clusters are already running. You can then specify the configuration using the following global flags:
 12 | 
 13 | ```
 14 |   -o  --hosts        strings    Comma-separated list of 'host:port' pairs (default localhost:10101)
 15 |   -h, --help                    help for dx
 16 |   -t, --threadcount  int        Number of concurrent goroutines to allocate (default 1)
 17 |   -v, --verbose      bool       Enable verbose logging (default false)
 18 |   -d, --datadir      string     Data directory to store resuls (default ~/dx)
 19 | ```
 20 | 
 21 | Use one `--hosts` flag for each cluster. Ex.
 22 | 
 23 | ```
 24 | dx [command] --hosts host1,host2 --hosts host3 --hosts host4,host5,host6
 25 | ```
 26 | 
 27 | is interpreted as cluster0 having hosts host1 and host2, cluster1 having host3, and cluster2 having host4, host5, and host6.
 28 | 
 29 | ## Commands
 30 | 
 31 | Along with the flags, the following commands are used by `dx` to determine what to do:
 32 | 
 33 | * `ingest`  --- ingest data from an `imagine` spec file on all clusters
 34 | * `query`   --- generate and run queries on all clusters
 35 | * `compare` --- compare the results from a `dx ingest` or `dx compare` command
 36 | 
 37 | ### ingest
 38 | 
 39 | Aside from the global flags, the following flags can be used for `dx ingest`:
 40 | 
 41 | ```
 42 |   -h, --help                    help for dx ingest
 43 |   -p, --prefix       string     Prefix to use for index (default "dx-")
 44 |       --specfiles   strings    Path to imagine spec file
 45 | ```
 46 | 
 47 | The `ingest` command requires one or more [`imagine` spec file](https://github.com/pilosa/tools/tree/master/imagine) that describes its workload in order to generate data.
 48 | 
 49 | Sample ingest:
 50 | 
 51 | ```
 52 | > dx ingest --specfiles spec.toml --hosts localhost:10101 --hosts localhost:10102
 53 | ```
 54 | 
 55 | will result in the two files (named `0` and `1`) written to a folder in `--datadir`. The folder is named "ingest-{timestamp}" (ex. ingest-2019-07-15T12/59/24-05/00). The files contain a single JSON object describing the results of the ingest.
 56 | 
 57 | ```
 58 | {"type":"ingest","time":"635.162153ms","threadcount":1}
 59 | ```
 60 | 
 61 | ### query
 62 | 
 63 | Aside from the global flags, the following flags can be used for `dx query`:
 64 | 
 65 | ```
 66 |   -q, --queries       int      Number of queries to run (default 100)
 67 |   -r, --rows          int      Number of rows to perform intersect query on (default 2)
 68 |   -i, --indexes       strings  Indexes to run queries on (default all indexes from first cluster)
 69 |   -a, --actualresults bool     Save actual results of queries instead of counts (default false)
 70 |       --querytemplate string   Run the queries from a previous result file
 71 |       --seed          int      Seed for generating random rows and columns (default 1)
 72 | ```
 73 | 
 74 | To compare a current query benchmark to an older one, usae `dx query` with the `--querytemplate` set to the old result so that the queries ran on the newer cluster will be the same. If `--querytemplate` is not set, then `dx` automatically generates `--queries` number of queries using the indexes from `indexes`. If `indexes` is also not specified, then `dx` will default to using all of the indexes present in the first cluster.
 75 | 
 76 | Sample query:
 77 | ```
 78 | > dx query --hosts localhost:10101 --hosts localhost:10102 --hosts localhost:8000 --threadcount=4
 79 | ```
 80 | 
 81 | will result in the three files (named `0`, `1`, and `2` in order of the flags) written to the folder "query-{timestamp}" in `--datadir`. The files contain `--queries + 1` number of JSON objects. The objects describe the queries and their results, while the last object describes the total time the whole run took.
 82 | 
 83 | ```
 84 | {"type":"query","time":"532.164µs","threadcount":1,"query":{"id":0,"query":1,"index":"dx-users","field":"numbers","rows":[21,51],"time":"532.164µs","resultcount":82}}
 85 | ...
 86 | {"type":"query","time":"1.275702ms","threadcount":14,"query":{"id":0,"query":0,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"1.275702ms","resultcount":7}}
 87 | {"type":"total","time":"164.410886ms","threadcount":4,"query":{"id":-1,"query":0,"index":"","field":"","rows":null,"time":"164.410886ms"}}
 88 | ```
 89 | 
 90 | ### compare
 91 | 
 92 | The JSON files output by `dx ingest` and `dx query` are not actually meant to be read by humans. The final step in comparing results between different clusters is `dx compare`.
 93 | 
 94 | `dx compare` does not take any flags, but it takes two arguments that specify the paths of the two result files to compare. These two result files must be of the same type, or `dx` will return an error. If the two files are valid, `dx` will automatically determine whether they are of type ingest or query and perform the appropriate comparisons.
 95 | 
 96 | ### default behavior
 97 | 
 98 | If no commands are specified, `dx` checks that the clusters are running and prints out their information.
 99 | ```
100 | > dx
101 | 
102 | dx is a tool used to analyze accuracy and performance regression across Pilosa versions.
103 | The following checks whether the clusters specified by the hosts flag are running.
104 | 
105 | Cluster with hosts localhost:10101
106 | server memory: 16GB [16384MB]
107 | server CPU: Intel(R) Core(TM) i7-6567U CPU @ 3.30GHz
108 | [2 physical cores, 4 logical cores available]
109 | cluster nodes: 1
110 | 
111 | ```
112 | 


--------------------------------------------------------------------------------
/imagine/generators_test.go:
--------------------------------------------------------------------------------
  1 | package imagine
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	gopilosa "github.com/pilosa/go-pilosa"
 10 | )
 11 | 
 12 | func testSequenceGenerator(s sequenceGenerator, min int64, max int64, total int64) error {
 13 | 	seen := make(map[int64]struct{})
 14 | 	var done bool
 15 | 	var value int64
 16 | 
 17 | 	for !done {
 18 | 		value, done = s.Next()
 19 | 		if _, ok := seen[value]; ok {
 20 | 			return fmt.Errorf("generator produced %d more than once", value)
 21 | 		}
 22 | 		if value < min || value > max {
 23 | 			return fmt.Errorf("generator produced value %d, out of range %d..%d", value, min, max)
 24 | 		}
 25 | 		seen[value] = struct{}{}
 26 | 	}
 27 | 	if int64(len(seen)) != total {
 28 | 		return fmt.Errorf("generator produced %d values from %d..%d, expecting %d", len(seen), min, max, total)
 29 | 	}
 30 | 	return nil
 31 | }
 32 | 
 33 | func testValueGenerator(v valueGenerator, min int64, max int64, total int64) error {
 34 | 	seen := make(map[int64]struct{})
 35 | 	var value int64
 36 | 
 37 | 	for i := int64(0); i < (max - min); i++ {
 38 | 		value = v.Nth(i)
 39 | 		if _, ok := seen[value]; ok {
 40 | 			return fmt.Errorf("generator produced %d more than once", value)
 41 | 		}
 42 | 		if value < min || value > max {
 43 | 			return fmt.Errorf("generator produced value %d, out of range %d..%d", value, min, max)
 44 | 		}
 45 | 		seen[value] = struct{}{}
 46 | 	}
 47 | 	if int64(len(seen)) != total {
 48 | 		return fmt.Errorf("generator produced %d values from %d..%d, expecting %d", len(seen), min, max, total)
 49 | 	}
 50 | 	return nil
 51 | }
 52 | 
 53 | func Test_Generators(t *testing.T) {
 54 | 	inc := newIncrementGenerator(-3, 5)
 55 | 	testSequenceGenerator(inc, -3, 5, 9)
 56 | 	inc2, err := newPermutedGenerator(-3, 5, 7, 0, 0, 1)
 57 | 	if err != nil {
 58 | 		t.Fatalf("unexpected error: %v", err)
 59 | 	}
 60 | 	testSequenceGenerator(inc2, -3, 5, 7)
 61 | 	lin, err := newLinearValueGenerator(-3, 5, 0)
 62 | 	if err != nil {
 63 | 		t.Fatalf("unexpected error: %v", err)
 64 | 	}
 65 | 	testValueGenerator(lin, -3, 5, 9)
 66 | 	perm, err := newPermutedValueGenerator(lin, -3, 5, 9)
 67 | 	if err != nil {
 68 | 		t.Fatalf("unexpected error: %v", err)
 69 | 	}
 70 | 	testValueGenerator(perm, -3, 5, 9)
 71 | }
 72 | 
 73 | func float64p(v float64) *float64 {
 74 | 	return &v
 75 | }
 76 | func uint64p(v uint64) *uint64 {
 77 | 	return &v
 78 | }
 79 | func int64p(v int64) *int64 {
 80 | 	return &v
 81 | }
 82 | func durationp(v duration) *duration {
 83 | 	return &v
 84 | }
 85 | 
 86 | func TestFieldMin(t *testing.T) {
 87 | 	startTime := time.Date(2000, time.Month(1), 2, 3, 4, 5, 6, time.UTC)
 88 | 	dur := time.Hour * 120
 89 | 	spec := &taskSpec{
 90 | 		FieldSpec: &fieldSpec{
 91 | 			Type:         fieldTypeTime,
 92 | 			Min:          10,
 93 | 			Max:          12,
 94 | 			Chance:       float64p(1.0),
 95 | 			DensityScale: uint64p(2097152),
 96 | 			Density:      1.0,
 97 | 		},
 98 | 		ColumnOrder:    valueOrderLinear,
 99 | 		DimensionOrder: dimensionOrderRow,
100 | 		Columns:        uint64p(10),
101 | 		RowOrder:       valueOrderLinear,
102 | 		Seed:           int64p(0),
103 | 		Stamp:          stampTypeIncreasing,
104 | 		StampStart:     &startTime,
105 | 		StampRange:     durationp(duration(dur)),
106 | 	}
107 | 
108 | 	updateChan := make(chan taskUpdate, 10)
109 | 	go func() {
110 | 		for _, ok := <-updateChan; ok; {
111 | 		}
112 | 	}()
113 | 	sg, err := newSetGenerator(spec, updateChan, "updateid")
114 | 	if err != nil {
115 | 		t.Fatalf("getting new set generator: %v", err)
116 | 	}
117 | 
118 | 	r, err := sg.NextRecord()
119 | 	if err != nil {
120 | 		t.Fatalf("Error in iterator: %v", err)
121 | 	}
122 | 	col, ok := r.(gopilosa.Column)
123 | 	if !ok {
124 | 		t.Fatalf("%v not a Column", r)
125 | 	}
126 | 	if col.RowID != 10 {
127 | 		t.Fatalf("field.Min not respected, got row %d, expected 10", col.RowID)
128 | 	}
129 | 
130 | }
131 | 
132 | func TestNewSetGenerator(t *testing.T) {
133 | 	startTime := time.Date(2000, time.Month(1), 2, 3, 4, 5, 6, time.UTC)
134 | 	dur := time.Hour * 120
135 | 	spec := &taskSpec{
136 | 		FieldSpec: &fieldSpec{
137 | 			Type:         fieldTypeTime,
138 | 			Max:          1,
139 | 			Chance:       float64p(1.0),
140 | 			DensityScale: uint64p(2097152),
141 | 			Density:      1.0,
142 | 		},
143 | 		ColumnOrder:    valueOrderLinear,
144 | 		DimensionOrder: dimensionOrderRow,
145 | 		Columns:        uint64p(10),
146 | 		RowOrder:       valueOrderLinear,
147 | 		Seed:           int64p(0),
148 | 		Stamp:          stampTypeIncreasing,
149 | 		StampStart:     &startTime,
150 | 		StampRange:     durationp(duration(dur)),
151 | 	}
152 | 
153 | 	updateChan := make(chan taskUpdate, 10)
154 | 	go func() {
155 | 		for _, ok := <-updateChan; ok; {
156 | 		}
157 | 	}()
158 | 	sg, err := newSetGenerator(spec, updateChan, "updateid")
159 | 	if err != nil {
160 | 		t.Fatalf("getting new set generator: %v", err)
161 | 	}
162 | 	lastT := int64(0)
163 | 	i := -1
164 | 	endTime := startTime.Add(dur)
165 | 	for r, err := sg.NextRecord(); err != io.EOF; r, err = sg.NextRecord() {
166 | 		if err != nil {
167 | 			t.Fatalf("Error in iterator: %v", err)
168 | 		}
169 | 		col, ok := r.(gopilosa.Column)
170 | 		if !ok {
171 | 			t.Fatalf("%v not a Column", r)
172 | 		}
173 | 		i++
174 | 		if col.RowID != 0 {
175 | 			t.Fatalf("unexpected row at record %d: %v", i, col)
176 | 		}
177 | 		if int(col.ColumnID) != i {
178 | 			t.Fatalf("unexpected col: exp: %d got %d", i, col.ColumnID)
179 | 		}
180 | 		if col.Timestamp <= lastT {
181 | 			t.Fatalf("unexpected... timestamp did not increase: last: %d this: %v", lastT, col)
182 | 		}
183 | 		if lastT >= col.Timestamp {
184 | 			t.Fatalf("time stamp did not increase, last: %d, this: %d", lastT, col.Timestamp)
185 | 		}
186 | 		lastT = col.Timestamp
187 | 		tim := time.Unix(0, col.Timestamp)
188 | 		if tim.Before(startTime) {
189 | 			t.Fatalf("got a time before start time: %v", tim)
190 | 		}
191 | 		if tim.After(endTime) {
192 | 			t.Fatalf("got a time after start+duration: %v", tim)
193 | 		}
194 | 	}
195 | 	if endTime.Sub(time.Unix(0, lastT)) > dur/2 {
196 | 		t.Fatalf("less than half the duration was used - lastT: %v", lastT)
197 | 	}
198 | 
199 | 	close(updateChan)
200 | }
201 | 
202 | func TestMutexGen(t *testing.T) {
203 | 	spec := &taskSpec{
204 | 		FieldSpec: &fieldSpec{
205 | 			Type:         fieldTypeMutex,
206 | 			Max:          2,
207 | 			Chance:       float64p(1.0),
208 | 			DensityScale: uint64p(2097152),
209 | 			Density:      0.9,
210 | 			ValueRule:    densityTypeZipf,
211 | 			Cache:        cacheTypeLRU,
212 | 			ZipfS:        1.1,
213 | 			ZipfV:        1,
214 | 		},
215 | 		ColumnOrder:    valueOrderLinear,
216 | 		DimensionOrder: dimensionOrderRow,
217 | 		Columns:        uint64p(10),
218 | 		RowOrder:       valueOrderLinear,
219 | 		Seed:           int64p(0),
220 | 	}
221 | 
222 | 	updateChan := make(chan taskUpdate, 10)
223 | 	go func() {
224 | 		for _, ok := <-updateChan; ok; {
225 | 		}
226 | 	}()
227 | 	sg, err := newMutexGenerator(spec, updateChan, "updateid")
228 | 	if err != nil {
229 | 		t.Fatalf("getting new set generator: %v", err)
230 | 	}
231 | 
232 | 	done := make(chan error)
233 | 	go func() {
234 | 		for _, err := sg.NextRecord(); err != io.EOF; _, err = sg.NextRecord() {
235 | 			if err != nil {
236 | 				done <- err
237 | 			}
238 | 		}
239 | 		close(done)
240 | 	}()
241 | 
242 | 	select {
243 | 	case err = <-done:
244 | 		if err != nil {
245 | 			t.Fatalf("error in iterator: %v", err)
246 | 		}
247 | 	case <-time.After(time.Second):
248 | 		t.Fatalf("mutex generator hanging")
249 | 	}
250 | 
251 | }
252 | 


--------------------------------------------------------------------------------
/dx/main.go:
--------------------------------------------------------------------------------
  1 | package dx
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io/ioutil"
  6 | 	"log"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strings"
 10 | 	"time"
 11 | 
 12 | 	"github.com/pilosa/go-pilosa"
 13 | 	"github.com/pkg/errors"
 14 | 	"github.com/spf13/cobra"
 15 | )
 16 | 
 17 | const (
 18 | 	cmdIngest = "ingest"
 19 | 	cmdQuery  = "query"
 20 | 	cmdTotal  = "total"
 21 | )
 22 | 
 23 | // Main contains the flags dx uses.
 24 | type Main struct {
 25 | 	Hosts         []string
 26 | 	ThreadCount   int
 27 | 	SpecFiles     []string
 28 | 	Verbose       bool
 29 | 	Prefix        string
 30 | 	NumQueries    int64
 31 | 	NumRows       int64
 32 | 	DataDir       string
 33 | 	ActualResults bool
 34 | 	QueryTemplate string
 35 | 	Indexes       []string
 36 | 	Seed          int64
 37 | }
 38 | 
 39 | // NewMain creates a new Main object.
 40 | func NewMain() *Main {
 41 | 	return &Main{
 42 | 		Prefix: "dx-",
 43 | 	}
 44 | }
 45 | 
 46 | // NewRootCmd creates an instance of the cobra root command for dx.
 47 | func NewRootCmd() *cobra.Command {
 48 | 	// m is persisted to all subcommands
 49 | 	m := NewMain()
 50 | 
 51 | 	rc := &cobra.Command{
 52 | 		Use:   "dx",
 53 | 		Short: "analyze accuracy and performance regression across Pilosa versions",
 54 | 		Long:  `Analyze accuracy and performance regression across Pilosa versions by running high-load ingest and queries.`,
 55 | 		PersistentPreRun: func(cmd *cobra.Command, args []string) {
 56 | 			// set logger
 57 | 			if m.Verbose {
 58 | 				log.SetOutput(os.Stderr)
 59 | 			} else {
 60 | 				log.SetOutput(ioutil.Discard)
 61 | 			}
 62 | 		},
 63 | 		Run: func(cmd *cobra.Command, args []string) {
 64 | 			fmt.Printf("dx is a tool used to analyze accuracy and performance regression across Pilosa versions.\nThe following checks whether the clusters specified by the hosts flag are running.\n\n")
 65 | 			if err := printServers(m.Hosts); err != nil {
 66 | 				if m.Verbose {
 67 | 					fmt.Printf("%+v\n", err)
 68 | 				} else {
 69 | 					fmt.Printf("%v\n", err)
 70 | 				}
 71 | 				os.Exit(1)
 72 | 			}
 73 | 		},
 74 | 	}
 75 | 
 76 | 	// default
 77 | 	var usrHomeDirDx string
 78 | 	home, err := os.UserHomeDir()
 79 | 	if err == nil {
 80 | 		usrHomeDirDx = filepath.Join(home, "dx")
 81 | 	}
 82 | 
 83 | 	// TODO: flag for which folder to store run in?
 84 | 	flags := rc.PersistentFlags()
 85 | 	flags.StringArrayVarP(&m.Hosts, "hosts", "o", []string{"localhost:10101"}, "Comma-separated list of 'host:port' pairs. Repeat this flag for each cluster")
 86 | 	flags.IntVarP(&m.ThreadCount, "threadcount", "t", 1, "Number of goroutines to allocate")
 87 | 	flags.BoolVarP(&m.Verbose, "verbose", "v", false, "Enable verbose logging")
 88 | 	flags.StringVarP(&m.DataDir, "datadir", "d", usrHomeDirDx, "Data directory to store results")
 89 | 
 90 | 	rc.AddCommand(NewIngestCommand(m))
 91 | 	rc.AddCommand(NewQueryCommand(m))
 92 | 	rc.AddCommand(NewCompareCommand(m))
 93 | 
 94 | 	return rc
 95 | }
 96 | 
 97 | func printServers(hosts []string) error {
 98 | 	for _, clusterHostsString := range hosts {
 99 | 		fmt.Printf("Cluster with hosts %v\n", clusterHostsString)
100 | 		clusterHosts := strings.Split(clusterHostsString, ",")
101 | 
102 | 		client, err := initializeClient(clusterHosts...)
103 | 		if err != nil {
104 | 			fmt.Println(fmt.Errorf("error initializing client: %v", err))
105 | 		}
106 | 		if err = printServerInfo(client); err != nil {
107 | 			return errors.Wrap(err, "could not print server info")
108 | 		}
109 | 	}
110 | 	return nil
111 | }
112 | 
113 | // modified from package imagine
114 | func printServerInfo(client *pilosa.Client) error {
115 | 	serverInfo, err := client.Info()
116 | 	if err != nil {
117 | 		return errors.Wrap(err, "couldn't get server info")
118 | 	}
119 | 	serverMemMB := serverInfo.Memory / (1024 * 1024)
120 | 	serverMemGB := (serverMemMB + 1023) / 1024
121 | 	fmt.Printf("server memory: %dGB [%dMB]\n", serverMemGB, serverMemMB)
122 | 	fmt.Printf("server CPU: %s\n[%d physical cores, %d logical cores available]\n", serverInfo.CPUType, serverInfo.CPUPhysicalCores, serverInfo.CPULogicalCores)
123 | 	serverStatus, err := client.Status()
124 | 	if err != nil {
125 | 		return errors.Wrap(err, "couldn't get cluster status info")
126 | 	}
127 | 	fmt.Printf("cluster nodes: %d\n\n", len(serverStatus.Nodes))
128 | 	return nil
129 | }
130 | 
131 | // initalizeClient creates the Pilosa clients from a slice of strings, where each string is
132 | // a comma-separated list of host:port pairs in a cluster. The order in which the clusters
133 | // appear in hosts is the same order as they appear in output slice.
134 | func initializeClients(hosts []string) ([]*pilosa.Client, error) {
135 | 	// len(hosts) is the number of clusters
136 | 	clients := make([]*pilosa.Client, 0, len(hosts))
137 | 	for _, clusterHostsString := range hosts {
138 | 		clusterHosts := strings.Split(clusterHostsString, ",")
139 | 
140 | 		client, err := initializeClient(clusterHosts...)
141 | 		if err != nil {
142 | 			return nil, errors.Wrap(err, "error creating client for cluster")
143 | 		}
144 | 
145 | 		clients = append(clients, client)
146 | 	}
147 | 	return clients, nil
148 | }
149 | 
150 | // initializeClient creates a Pilosa client using a list of hosts from the cluster.
151 | func initializeClient(clusterHosts ...string) (*pilosa.Client, error) {
152 | 	// initialize uris from this cluster
153 | 	uris := make([]*pilosa.URI, 0, len(clusterHosts))
154 | 	for _, host := range clusterHosts {
155 | 		uri, err := pilosa.NewURIFromAddress(host)
156 | 		if err != nil {
157 | 			return nil, errors.Wrapf(err, "error creating Pilosa URI from host %s", host)
158 | 		}
159 | 		uris = append(uris, uri)
160 | 	}
161 | 
162 | 	// initialize cluster and client from the uris
163 | 	cluster := pilosa.NewClusterWithHost(uris...)
164 | 	client, err := pilosa.NewClient(cluster)
165 | 	if err != nil {
166 | 		return nil, errors.Wrap(err, "error creating Pilosa client from URIs")
167 | 	}
168 | 
169 | 	return client, nil
170 | }
171 | 
172 | // getAllClusterHosts creates a slice for each cluster containing the hosts in that cluster.
173 | // Ex. ["host1,host2", "host3"] -> [[host1, host2], [host3]]
174 | func getAllClusterHosts(hosts []string) [][]string {
175 | 	allClusterHosts := make([][]string, 0)
176 | 	for _, clusterHostsString := range hosts {
177 | 		clusterHosts := strings.Split(clusterHostsString, ",")
178 | 		allClusterHosts = append(allClusterHosts, clusterHosts)
179 | 	}
180 | 	return allClusterHosts
181 | }
182 | 
183 | // makeFolder makes a folder with name "{cmd}-{time now}" in the directory.
184 | // Ex. ("ingest", "usr/home") -> creates the directory usr/home/ingest-2019-07-10T15/52/44-05/00.
185 | func makeFolder(cmdType, dir string) (string, error) {
186 | 	timestamp := time.Now().Format(time.RFC3339)
187 | 	folderName := cmdType + "-" + timestamp
188 | 	path := filepath.Join(dir, folderName)
189 | 
190 | 	if err := os.MkdirAll(path, 0777); err != nil {
191 | 		return "", errors.Wrapf(err, "error mkdir for %v", path)
192 | 	}
193 | 	return path, nil
194 | }
195 | 
196 | // TimeDuration wraps time.Duration to encode to JSON.
197 | type TimeDuration struct {
198 | 	Duration time.Duration
199 | }
200 | 
201 | // UnmarshalJSON deserializes json to TimeDuration.
202 | func (d *TimeDuration) UnmarshalJSON(b []byte) (err error) {
203 | 	d.Duration, err = time.ParseDuration(strings.Trim(string(b), `"`))
204 | 	return
205 | }
206 | 
207 | // MarshalJSON serializes TimeDuration to json.
208 | func (d *TimeDuration) MarshalJSON() (b []byte, err error) {
209 | 	return []byte(fmt.Sprintf(`"%v"`, d.Duration)), nil
210 | }
211 | 
212 | // Benchmark contains the information related to an ingest or query benchmark.
213 | type Benchmark struct {
214 | 	Type        string       `json:"type"`
215 | 	Time        TimeDuration `json:"time"`
216 | 	ThreadCount int          `json:"threadcount"`
217 | 	Query       *Query       `json:"query,omitempty"`
218 | }
219 | 
220 | // NewBenchmark creates an empty benchmark of type cmdType.
221 | func NewBenchmark() *Benchmark {
222 | 	return &Benchmark{}
223 | }
224 | 


--------------------------------------------------------------------------------
/bench/bench.go:
--------------------------------------------------------------------------------
  1 | package bench
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"math/rand"
  7 | 	"time"
  8 | 
  9 | 	"github.com/pilosa/go-pilosa"
 10 | )
 11 | 
 12 | // Benchmark is an interface run benchmark components. Benchmarks should Marshal
 13 | // to valid JSON so that their configuration may be recorded with their results.
 14 | type Benchmark interface {
 15 | 	Run(ctx context.Context, client *pilosa.Client, agentNum int) (*Result, error)
 16 | }
 17 | 
 18 | type HostSetup struct {
 19 | 	Hosts         []string
 20 | 	ClientOptions []pilosa.ClientOption
 21 | }
 22 | 
 23 | // Results holds the output from the run of a benchmark - the Benchmark's Run()
 24 | // method may set Stats, Responses, and Extra, and the RunBenchmark helper
 25 | // function will set the Duration, AgentNum, PilosaVersion, and Configuration.
 26 | // Either may set Error if there is an error. The structure of Result assumes
 27 | // that most benchmarks will run multiple queries and track statistics about how
 28 | // long each one takes. The Extra field is for benchmarks which either do not
 29 | // fit this model, or want to return additional information not covered by Stats
 30 | // and Responses.
 31 | type Result struct {
 32 | 	Stats         *Stats                  `json:"stats"`
 33 | 	Responses     []*pilosa.QueryResponse `json:"responses"`
 34 | 	Extra         map[string]interface{}  `json:"extra"`
 35 | 	AgentNum      int                     `json:"agentnum"`
 36 | 	PilosaVersion string                  `json:"pilosa-version"`
 37 | 	Configuration interface{}             `json:"configuration"`
 38 | 
 39 | 	// Error exists so that errors can be correctly marshalled to JSON. It is set using Result.err.Error()
 40 | 	Error string `json:"error,omitempty"`
 41 | }
 42 | 
 43 | // NewResult intializes and returns a Result.
 44 | func NewResult() *Result {
 45 | 	return &Result{
 46 | 		Stats:     NewStats(),
 47 | 		Extra:     make(map[string]interface{}),
 48 | 		Responses: make([]*pilosa.QueryResponse, 0),
 49 | 	}
 50 | }
 51 | 
 52 | // Add adds the duration to the Result's Stats object. If resp is non-nil, it
 53 | // also adds it to the slice of responses.
 54 | func (r *Result) Add(d time.Duration, resp *pilosa.QueryResponse) {
 55 | 	r.Stats.Add(d)
 56 | 	if resp != nil {
 57 | 		r.Responses = append(r.Responses, resp)
 58 | 	}
 59 | }
 60 | 
 61 | // ensureSchema ensures that a given index and field exist.
 62 | func ensureSchema(client *pilosa.Client, indexName, fieldName string, opts ...interface{}) (index *pilosa.Index, field *pilosa.Field, err error) {
 63 | 	var indexOpts []pilosa.IndexOption
 64 | 	var fieldOpts []pilosa.FieldOption
 65 | 	for _, opt := range opts {
 66 | 		switch opt := opt.(type) {
 67 | 		case pilosa.IndexOption:
 68 | 			indexOpts = append(indexOpts, opt)
 69 | 		case pilosa.FieldOption:
 70 | 			fieldOpts = append(fieldOpts, opt)
 71 | 		}
 72 | 	}
 73 | 
 74 | 	schema, err := client.Schema()
 75 | 	if err != nil {
 76 | 		return nil, nil, fmt.Errorf("cannot read schema: %v", err)
 77 | 	}
 78 | 
 79 | 	index = schema.Index(indexName, indexOpts...)
 80 | 	if err := client.EnsureIndex(index); err != nil {
 81 | 		return nil, nil, fmt.Errorf("cannot ensure index: %v", err)
 82 | 	}
 83 | 	if fieldName != "" {
 84 | 		field = index.Field(fieldName, fieldOpts...)
 85 | 		if err := client.EnsureField(field); err != nil {
 86 | 			return nil, nil, fmt.Errorf("cannot ensure field: %v", err)
 87 | 		}
 88 | 	}
 89 | 	if err := client.SyncSchema(schema); err != nil {
 90 | 		return nil, nil, fmt.Errorf("cannot sync schema: %v", err)
 91 | 	}
 92 | 	return index, field, nil
 93 | }
 94 | 
 95 | // wrapper type to force human-readable JSON output
 96 | type PrettyDuration time.Duration
 97 | 
 98 | // MarshalJSON returns a nicely formatted duration, instead of it just being
 99 | // treated like an int.
100 | func (d PrettyDuration) MarshalJSON() ([]byte, error) {
101 | 	s := time.Duration(d).String()
102 | 	return []byte("\"" + s + "\""), nil
103 | }
104 | 
105 | // Recursively replaces elements of ugly types with their pretty wrappers
106 | func Prettify(m map[string]interface{}) map[string]interface{} {
107 | 	newmap := make(map[string]interface{})
108 | 	for k, v := range m {
109 | 		switch v.(type) {
110 | 		case map[string]interface{}:
111 | 			newmap[k] = Prettify(v.(map[string]interface{}))
112 | 		case []time.Duration:
113 | 			newslice := make([]PrettyDuration, len(v.([]time.Duration)))
114 | 			slice := v.([]time.Duration)
115 | 			for n, e := range slice {
116 | 				newslice[n] = PrettyDuration(e)
117 | 			}
118 | 			newmap[k] = newslice
119 | 		case time.Duration:
120 | 			newmap[k] = PrettyDuration(v.(time.Duration))
121 | 		default:
122 | 			if interv, ok := v.([]map[string]interface{}); ok {
123 | 				for i, iv := range interv {
124 | 					interv[i] = Prettify(iv)
125 | 				}
126 | 			}
127 | 			newmap[k] = v
128 | 		}
129 | 	}
130 | 	return newmap
131 | }
132 | 
133 | // NewQueryGenerator returns a new QueryGenerator.
134 | func NewQueryGenerator(index *pilosa.Index, field *pilosa.Field, seed int64) *QueryGenerator {
135 | 	return &QueryGenerator{
136 | 		index: index,
137 | 		field: field,
138 | 		rand:  rand.New(rand.NewSource(seed)),
139 | 	}
140 | }
141 | 
142 | // QueryGenerator holds the configuration and state for randomly generating queries.
143 | type QueryGenerator struct {
144 | 	index *pilosa.Index
145 | 	field *pilosa.Field
146 | 	rand  *rand.Rand
147 | }
148 | 
149 | // Random returns a randomly generated query.
150 | func (g *QueryGenerator) Random(maxN, depth, maxargs int, idmin, idmax uint64) pilosa.PQLQuery {
151 | 	val := g.rand.Intn(5)
152 | 	switch val {
153 | 	case 0:
154 | 		return g.RandomTopN(maxN, depth, maxargs, idmin, idmax)
155 | 	default:
156 | 		return g.RandomBitmapCall(depth, maxargs, idmin, idmax)
157 | 	}
158 | }
159 | 
160 | // RandomRangeQuery returns a randomly generated sum or range query.
161 | func (g *QueryGenerator) RandomRangeQuery(depth, maxargs int, idmin, idmax uint64) pilosa.PQLQuery {
162 | 	switch g.rand.Intn(5) {
163 | 	case 1:
164 | 		return g.RandomSum(depth, maxargs, idmin, idmax)
165 | 	default:
166 | 		return g.RandomRange(maxargs, idmin, idmax)
167 | 	}
168 | }
169 | 
170 | func (g *QueryGenerator) RandomRange(numArg int, idmin, idmax uint64) *pilosa.PQLRowQuery {
171 | 	choose := g.rand.Intn(4)
172 | 	if choose == 0 {
173 | 		return g.RangeCall(idmin, idmax)
174 | 	}
175 | 	a := make([]*pilosa.PQLRowQuery, numArg)
176 | 	for i := 0; i < numArg; i++ {
177 | 		a[i] = g.RangeCall(idmin, idmax)
178 | 	}
179 | 
180 | 	switch choose {
181 | 	case 1:
182 | 		return g.index.Difference(a...)
183 | 	case 2:
184 | 		return g.index.Intersect(a...)
185 | 	case 3:
186 | 		return g.index.Union(a...)
187 | 	default:
188 | 		panic("unreachable")
189 | 	}
190 | }
191 | 
192 | func (g *QueryGenerator) RangeCall(idmin, idmax uint64) *pilosa.PQLRowQuery {
193 | 	const operationN = 5
194 | 	switch g.rand.Intn(operationN) {
195 | 	case 0:
196 | 		return g.field.GT(g.rand.Intn(int(idmax - idmin)))
197 | 	case 1:
198 | 		return g.field.LT(g.rand.Intn(int(idmax - idmin)))
199 | 	case 2:
200 | 		return g.field.GTE(g.rand.Intn(int(idmax - idmin)))
201 | 	case 3:
202 | 		return g.field.LTE(g.rand.Intn(int(idmax - idmin)))
203 | 	case 4:
204 | 		return g.field.Equals(g.rand.Intn(int(idmax - idmin)))
205 | 	default:
206 | 		panic("unreachable")
207 | 	}
208 | }
209 | 
210 | // RandomSum returns a randomly generated sum query.
211 | func (g *QueryGenerator) RandomSum(depth, maxargs int, idmin, idmax uint64) pilosa.PQLQuery {
212 | 	switch g.rand.Intn(4) {
213 | 	case 0:
214 | 		return g.field.Sum(g.RandomBitmapCall(depth, maxargs, idmin, idmax))
215 | 	default:
216 | 		return g.field.Sum(g.RandomRange(maxargs, idmin, idmax))
217 | 	}
218 | }
219 | 
220 | // RandomTopN returns a randomly generated TopN query.
221 | func (g *QueryGenerator) RandomTopN(maxN, depth, maxargs int, idmin, idmax uint64) *pilosa.PQLRowQuery {
222 | 	return g.field.RowTopN(uint64(g.rand.Intn(maxN-1)+1), g.RandomBitmapCall(depth, maxargs, idmin, idmax))
223 | }
224 | 
225 | // RandomBitmapCall returns a randomly generate query which returns a bitmap.
226 | func (g *QueryGenerator) RandomBitmapCall(depth, maxargs int, idmin, idmax uint64) *pilosa.PQLRowQuery {
227 | 	if depth <= 1 {
228 | 		return g.field.Row(uint64(g.rand.Int63n(int64(idmax)-int64(idmin)) + int64(idmin)))
229 | 	}
230 | 	choose := g.rand.Intn(4)
231 | 	if choose == 0 {
232 | 		return g.RandomBitmapCall(1, 0, idmin, idmax)
233 | 	}
234 | 
235 | 	numargs := 2
236 | 	if maxargs > 2 {
237 | 		numargs = g.rand.Intn(maxargs-2) + 2
238 | 	}
239 | 	a := make([]*pilosa.PQLRowQuery, numargs)
240 | 	for i := 0; i < numargs; i++ {
241 | 		a[i] = g.RandomBitmapCall(depth-1, maxargs, idmin, idmax)
242 | 	}
243 | 
244 | 	switch choose {
245 | 	case 1:
246 | 		return g.index.Difference(a...)
247 | 	case 2:
248 | 		return g.index.Intersect(a...)
249 | 	case 3:
250 | 		return g.index.Union(a...)
251 | 	default:
252 | 		panic("unreachable")
253 | 	}
254 | }
255 | 
256 | // Stats object helps track timing stats.
257 | type Stats struct {
258 | 	sumSquareDelta float64
259 | 
260 | 	Min     time.Duration   `json:"min"`
261 | 	Max     time.Duration   `json:"max"`
262 | 	Mean    time.Duration   `json:"mean"`
263 | 	Total   time.Duration   `json:"total-time"`
264 | 	Num     int64           `json:"num"`
265 | 	All     []time.Duration `json:"all"`
266 | 	SaveAll bool            `json:"-"`
267 | }
268 | 
269 | // NewStats gets a Stats object.
270 | func NewStats() *Stats {
271 | 	return &Stats{
272 | 		Min: 1<<63 - 1,
273 | 		All: make([]time.Duration, 0),
274 | 	}
275 | }
276 | 
277 | // Add adds a new time to the stats object.
278 | func (s *Stats) Add(td time.Duration) {
279 | 	if s.SaveAll {
280 | 		s.All = append(s.All, td)
281 | 	}
282 | 	s.Num += 1
283 | 	s.Total += td
284 | 	if td < s.Min {
285 | 		s.Min = td
286 | 	}
287 | 	if td > s.Max {
288 | 		s.Max = td
289 | 	}
290 | 
291 | 	// online variance calculation
292 | 	// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
293 | 	delta := td - s.Mean
294 | 	s.Mean += delta / time.Duration(s.Num)
295 | 	s.sumSquareDelta += float64(delta * (td - s.Mean))
296 | }
297 | 
298 | func (s *Stats) Combine(other *Stats) {
299 | 	if other.Min < s.Min {
300 | 		s.Min = other.Min
301 | 	}
302 | 	if other.Max > s.Max {
303 | 		s.Max = other.Max
304 | 	}
305 | 	s.Total += other.Total
306 | 	s.Num += other.Num
307 | 	s.Mean = s.Total / time.Duration(s.Num)
308 | 	s.All = append(s.All, other.All...)
309 | }
310 | 
311 | // NumStats object helps track stats. This and Stats (which was
312 | // originally made specifically for time) should probably be unified.
313 | type NumStats struct {
314 | 	sumSquareDelta float64
315 | 
316 | 	// NumZero counts the number of values that have been added which
317 | 	// are zero. This is a cheap, simple, replacement for more
318 | 	// sophisticated tracking of the distribution of the data that
319 | 	// let's us know if (e.g.) we have a bunch of queries doing
320 | 	// nothing because we're querying empty rows or something.
321 | 	NumZero int64   `json:"num-zero"`
322 | 	Min     int64   `json:"min"`
323 | 	Max     int64   `json:"max"`
324 | 	Mean    int64   `json:"mean"`
325 | 	Total   int64   `json:"total"`
326 | 	Num     int64   `json:"num"`
327 | 	All     []int64 `json:"all"`
328 | 	SaveAll bool    `json:"-"`
329 | }
330 | 
331 | // NewNumStats gets a NumStats object.
332 | func NewNumStats() *NumStats {
333 | 	return &NumStats{
334 | 		Min: 1<<63 - 1,
335 | 		All: make([]int64, 0),
336 | 	}
337 | }
338 | 
339 | // Add adds a new value to the stats object.
340 | func (s *NumStats) Add(td int64) {
341 | 	if s.SaveAll {
342 | 		s.All = append(s.All, td)
343 | 	}
344 | 	if td == 0 {
345 | 		s.NumZero++
346 | 	}
347 | 	s.Num += 1
348 | 	s.Total += td
349 | 	if td < s.Min {
350 | 		s.Min = td
351 | 	}
352 | 	if td > s.Max {
353 | 		s.Max = td
354 | 	}
355 | 
356 | 	// online variance calculation
357 | 	// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
358 | 	delta := td - s.Mean
359 | 	s.Mean += delta / int64(s.Num)
360 | 	s.sumSquareDelta += float64(delta * (td - s.Mean))
361 | }
362 | 
363 | func (s *NumStats) Combine(other *NumStats) {
364 | 	if other.Min < s.Min {
365 | 		s.Min = other.Min
366 | 	}
367 | 	if other.Max > s.Max {
368 | 		s.Max = other.Max
369 | 	}
370 | 	s.NumZero += other.NumZero
371 | 	s.Total += other.Total
372 | 	s.Num += other.Num
373 | 	s.Mean = s.Total / s.Num
374 | 	s.All = append(s.All, other.All...)
375 | }
376 | 


--------------------------------------------------------------------------------
/imagine/README.md:
--------------------------------------------------------------------------------
  1 | # Imagine that you had a database with...
  2 | 
  3 | This tool intends to provide a way to populate a Pilosa database with
  4 | predictable contents in a reasonably efficient fashion, without needing
  5 | enormous static data files. Indexes and fields within them can be specified
  6 | in a TOML file.
  7 | 
  8 | ## Invocation
  9 | 
 10 | The `imagine` utility takes command line options, followed by one or more
 11 | spec files, which are TOML files containing specs.
 12 | 
 13 | What `imagine` does with the spec files is controlled by the following behavior options:
 14 | 
 15 | *  `--describe`             describe the specs
 16 | *  `--verify string`        index structure validation: create/error/purge/update/none
 17 | *  `--generate`             generate data as specified by workloads
 18 | *  `--delete`               delete specified fields
 19 | 
 20 | Invoked without behavior options, or with only `--describe`, `imagine` will
 21 | describe the indexes and workloads from its spec files, and terminate. If one
 22 | or more of verify, generate, or delete is provided, it will do those in order.
 23 | 
 24 | The following verification options exist:
 25 | 
 26 | * `create`: Attempts to create all specified indexes and fields, errors out
 27 |   if any already existed.
 28 | * `error`: Verify that indexes and fields exist, error out if they don't.
 29 | * `purge`: Delete all existing indexes and fields, then try to create them.
 30 |   Error out if either part of this fails.
 31 | * `update`: Try to create any missing indexes or fields. Error out if this fails.
 32 | * `none`: Do no verification. (Workloads will still check for index/field
 33 |   existence.)
 34 | 
 35 | The default for `--verify` is determined by other parameters; if `--delete` is
 36 | present, and `--generate` is not, the default verification is "none" (there's
 37 | no point in verifying that things exist right before deleting them), otherwise
 38 | the default verification is "error".
 39 | 
 40 | The following options change how `imagine` goes about its work:
 41 | 
 42 | *  `--column-scale int`     scale number of columns provided by specs
 43 | *  `--cpu-profile string`   record CPU profile to file
 44 | *  `--dry-run`              dry-run; describe what would be done
 45 | *  `--hosts string`         comma separated list of "host:port" pairs of the Pilosa cluster (default "localhost:10101")
 46 | *  `--mem-profile string`   record allocation profile to file
 47 | *  `--prefix string`        prefix to use on index names
 48 | *  `--row-scale int`        scale number of rows provided by specs
 49 | *  `--thread-count int`     number of threads to use for import, overrides value in config file (default 1)
 50 | *  `--time`                 report on time elapsed for operations
 51 | 
 52 | ## Spec files
 53 | 
 54 | The following global settings exist for each spec:
 55 | 
 56 | * densityscale: A density scale factor used to determine the precision
 57 |   used for density computations. Density scale should be a power of two.
 58 |   Higher density scales will take longer to compute and process. (The
 59 |   operation is O(log2(N)).)
 60 | * prefix: A preferred prefix to use for index names. If absent,
 61 |   `imaginary` is used. The `--prefix` command line option overrides
 62 |   this.
 63 | * version: The string "1.0". The intent is that future versions of the
 64 |   tool will attempt to ensure that a given spec produces identical results.
 65 |   If a later version would change the results from a spec, it should do so
 66 |   only when a different string is specified here. However, *this
 67 |   guarantee is not yet in force*. The software is still in an immature
 68 |   state, and may change output significantly during development.
 69 | * seed: A default PRNG seed, used for indexes/fields that don't specify
 70 |   their own.
 71 | 
 72 | A spec can specify two other kinds of things, indexes and workloads.
 73 | Indexes describe the data that will go in a Pilosa index, such as the index's
 74 | name, size (in columns), and number of fields. Workloads describe specific
 75 | patterns of creating and inserting data in fields.
 76 | 
 77 | When multiple specs are provided, they are combined. Indexes and fields are
 78 | merged; any conflicts between them are an error, and `imagine` will report
 79 | such errors and then stop. Workloads are concatenated, with specs processed
 80 | in command-line order.
 81 | 
 82 | ### Indexes
 83 | 
 84 | Indexes are defined in a top-level map, using the index name as the key. Each
 85 | index would typically be written as `[indexes.indexname]`. Each index has
 86 | settings, plus field entries under the index. Fields are a mapping of names
 87 | to field specifications.
 88 | 
 89 | * name: The index's name. (This will be prefixed later.)
 90 | * description: A longer description of the index's purpose within a set.
 91 | * columns: The number of columns.
 92 | * seed: A default PRNG seed to use for fields that don't specify their own.
 93 | 
 94 | ### Fields
 95 | 
 96 | Fields can take several kinds, specified as "type". Defined types:
 97 | * `set`: The default "set" field type, where rows correspond to specific
 98 |   values.
 99 | * `mutex`: The "mutex" field type, which is like a set, only it enforces
100 |   that only one row is set per column.
101 | * `int`: The binary-representation field type, usable for range queries.
102 | * `time`: The "time" field type, which is a set with additional optional
103 |   timestamp information.
104 | 
105 | All fields share some common parameters:
106 | 
107 | * `zipfV`, `zipfS`: the V/S values used for a Zipf distribution of values.
108 | * `min`, `max`: Minimum and maximum values. For int fields, this is the value
109 |   range; for set/mutex fields, it's the range of rows that will be
110 |   potentially generated.
111 | * `sourceIndex`: An index to use for values; the value range will be the
112 |   source index's column range. If source index has 100,000 columns, this
113 |   is equivalent to "min: 0, max: 99999".
114 | * `density`: The field's base density of bits set. For a set, this density
115 |   applies to each row independently; for a mutex or int field, it
116 |   determines how many columns should have a value set.
117 | * `valueRule`: "linear" or "zipf". Exact interpretation varies by field type,
118 |   but "linear" indicates that all rows should have the same density of
119 |   values, while "zipf" indicates that they should follow a Zipf distribution.
120 | 
121 | #### Set/Mutex Fields
122 | 
123 | Set and mutex fields can also configure a cache type:
124 | 
125 | * `cache`: Cache type, one of "lru" or "none".
126 | 
127 | ##### Set/Time Fields
128 | 
129 | Set (and time) fields can be generated either in row-major order (generate
130 | one row at a time for all columns) or column-major order (generate all rows for
131 | each column).
132 | 
133 | * `dimensionOrder`: string, one of "row" or "column". default is "row".
134 | * `quantum`: string, one of "Y", "YM", "YMD", or "YMDH". Valid only for
135 |   time fields. Default is "YMDH".
136 | 
137 | Zipf parameters: The first row will have bits set based on the base density
138 | provided. Following rows will follow the Zipf distribution's probabilities.
139 | For instance, with v=2, s=2, the k=0 probability is proportional to
140 | `(2+0)**(-2)` (1/4), and the k=1 probability is proportional to
141 | `(2+1)**(-2)` (1/9). Thus, the probability of a bit being set in the k=1 row is
142 | 4/9 the base density.
143 | 
144 | The final set of bits does not depend on whether values were computed in
145 | rowMajor order. (This guarantee is slightly weaker than other guarantees.)
146 | 
147 | ##### Mutex Fields
148 | 
149 | Zipf parameters: This just follows the behavior of the Zipf generator in
150 | `math/rand`. A single value is determined for each column, determining which
151 | bit is set.
152 | 
153 | #### Int Fields
154 | 
155 | By default, every member of a int field is set to a random value within the
156 | range.
157 | 
158 | Zipf parameters: This follows the behavior of the Zipf generator in `math/rand`,
159 | with an offset of the minimum value. For instance, a field with min/max of
160 | 10/20 behaves exactly like a field with a min/max of 0/10, with 10 added to
161 | each value.
162 | 
163 | ### Workloads
164 | 
165 | A workload describes a named series of steps, which apply to indexes
166 | and fields previously described. Workloads don't have to be in the same
167 | spec files as the indexes and fields they refer to. Workloads are defined
168 | in a top-level array, usually using `[[workloads]]` to refer to them.
169 | Workloads are sequential. They have the following attributes:
170 | 
171 | * `name`: The name of the workload.
172 | * `description`: A description of the workload.
173 | * `threadCount`: Number of importer threads to use in imports.
174 | * `batchSize`: The default size of import batches (number of records before
175 |   the client transmits records to the server).
176 | 
177 | Each workload also has an array of tasks, which are all executed in parallel.
178 | 
179 | #### Tasks
180 | 
181 | Each task outlines a specific set of data to populate in a given field.
182 | 
183 | * `index`, `field`: the index and field names to identify the field to be
184 |   populated. The index name should match the name in the spec, not including
185 |   any prefixes.
186 | * `seed`: the random number seed to use when populating this field. Defaults
187 |   to the seed for the field's parent index.
188 | * `columns`: the number of columns to populate. default: populate the
189 |   entire field, using the index's columns.
190 | * `columnOffset`: column to start with. The special value "append" means
191 |   to create new columns starting immediately after the highest column
192 |   previously created.
193 | * `columnOrder`: "linear", "stride", "zipf", or "permute" (default linear).
194 |   Indicates order in which to generate column values.
195 | * `stride`: The stride to use with a columnOrder of "stride".
196 | * `rowOrder`: "linear" or "permute" (default linear). Determines the order
197 |   in which row values are computed, for set fields, or whether to permute
198 |   generated values, for mutex or int fields.
199 | * `batchSize`: Size of import batches (overrides, but defaults to,
200 |   batch's batchSize).
201 | * `stamp`: Controls timestamp behavior. One of "none", "random", "increasing".
202 | * `stampRange`: A duration over which to spread timestamps when generating
203 |   them.
204 | * `stampStart`: A specific time to start timestamps at. Defaults to current
205 |   time minus stamp range.
206 | * `zipfV`, `zipfS`: V and S values for a zipf distribution of columns.
207 | * `zipfRange`: The range to use for the zipf distribution (defaults to
208 |   `columns`).
209 | 
210 | As a special case, when `columnOffset` is "append" and `columnOrder` is "zipf",
211 | values are randomly generated using a zipf distribution over [0,`zipfRange`).
212 | These values are then subtracted from the *next* column number -- the lowest
213 | column number not currently known to `imagine`, to produce a range which might
214 | indicate updates to existing columns, or might indicate a new column. A value
215 | is generated for each column. Note that this will pick values the same way
216 | mutex or int fields do, rather than generating all the values for each column,
217 | and the same column may be generated more than once. This behavior attempts
218 | to simulate likely behavior for event streams.
219 | 
220 | The "zipf" `columnOrder` is not supported except with `columnOffset` of
221 | "append", and the Zipf parameters are not defined for any other column order.
222 | 
223 | ## Data Generation
224 | 
225 | Reproducible data generation means being able to generate the same bits every
226 | time. To this end, we use a seekable PRNG -- you can specify an offset into
227 | its stream and get the same bits every time. See the related package
228 | `apophenia` for details.
229 | 
230 | Set values are computed using `apophenia.Weighted`, with `seed` equal to the
231 | row number, and `id` equal to the column number.
232 | 
233 | Mutex/Int: Mutex and int fields both generate a single value in their range.
234 | Linear values are computed using `row` 0, `iter` 0, and are computed as
235 | `min + U % (max - min)`. (For a mutex, the minimum value is always 0.) Zipf
236 | values are computed using iterated values for `row` 0 as inputs to another
237 | algorithm which treats them as [0,1) range values. If RowOrder is set to
238 | `permute`, the permutation is computed using permutation row 2.
239 | 
240 | Permuted column values are generated by requesting a permutation generator for
241 | row 0 with the given seed. Permuted row values for sets are generated using
242 | a permutation generator for row 1.
243 | 


--------------------------------------------------------------------------------
/dx/compare.go:
--------------------------------------------------------------------------------
  1 | package dx
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"log"
  8 | 	"os"
  9 | 	"reflect"
 10 | 	"text/tabwriter"
 11 | 	"time"
 12 | 
 13 | 	"github.com/pkg/errors"
 14 | 	"github.com/spf13/cobra"
 15 | )
 16 | 
 17 | // NewCompareCommand initializes a new compare command for dx.
 18 | func NewCompareCommand(m *Main) *cobra.Command {
 19 | 	compareCmd := &cobra.Command{
 20 | 		Use:   "compare",
 21 | 		Short: "compare two dx results",
 22 | 		Long:  `Compare two result files generated by "dx ingest" or "dx query".`,
 23 | 		PreRun: func(cmd *cobra.Command, args []string) {
 24 | 			if err := validateComparisonArgs(args); err != nil {
 25 | 				if m.Verbose {
 26 | 					fmt.Printf("%+v\n", err)
 27 | 				} else {
 28 | 					fmt.Printf("%v\n", err)
 29 | 				}
 30 | 				os.Exit(1)
 31 | 			}
 32 | 		},
 33 | 		Run: func(cmd *cobra.Command, args []string) {
 34 | 			if err := ExecuteComparison(args[0], args[1]); err != nil {
 35 | 				if m.Verbose {
 36 | 					fmt.Printf("%+v\n", err)
 37 | 				} else {
 38 | 					fmt.Printf("%v\n", err)
 39 | 				}
 40 | 				os.Exit(1)
 41 | 			}
 42 | 		},
 43 | 	}
 44 | 
 45 | 	return compareCmd
 46 | }
 47 | 
 48 | // validateCompareArgs validates that the args passed to compare command has length exactly 2
 49 | // and are valid filenames.
 50 | func validateComparisonArgs(args []string) error {
 51 | 	if len(args) != 2 {
 52 | 		return errors.New("need exactly two files to compare")
 53 | 	}
 54 | 	fileExists, err := checkFileExists(args[0])
 55 | 	if err != nil {
 56 | 		return errors.Wrapf(err, "error verifying file %s exists", args[0])
 57 | 	}
 58 | 	if !fileExists {
 59 | 		return errors.Errorf("%s does not exist or is not a file", args[0])
 60 | 	}
 61 | 	fileExists, err = checkFileExists(args[1])
 62 | 	if err != nil {
 63 | 		return errors.Wrapf(err, "error verifying file %s exists", args[1])
 64 | 	}
 65 | 	if !fileExists {
 66 | 		return errors.Errorf("%s does not exist or is not a file", args[1])
 67 | 	}
 68 | 	return nil
 69 | }
 70 | 
 71 | // Comparison struct contains the information of a comparison. RunTime is the total time it took for the run to complete.
 72 | // The TotalTime is the total of all the individual times of each operation, which may have been running in separate goroutines.
 73 | type Comparison struct {
 74 | 	Type           string
 75 | 	RunTime1       time.Duration
 76 | 	RunTime2       time.Duration
 77 | 	RunTimeDelta   float64
 78 | 	TotalTime1     time.Duration
 79 | 	TotalTime2     time.Duration
 80 | 	TotalTimeDelta float64
 81 | 	ThreadCount1   int
 82 | 	ThreadCount2   int
 83 | 	Accuracy       float64
 84 | 	Size           int64
 85 | }
 86 | 
 87 | // ExecuteComparison executes a comparison on the two files.
 88 | func ExecuteComparison(file1, file2 string) error {
 89 | 	benchChan1 := make(chan *Benchmark)
 90 | 	benchChan2 := make(chan *Benchmark)
 91 | 	cmdTypeChan1 := make(chan string)
 92 | 	cmdTypeChan2 := make(chan string)
 93 | 
 94 | 	go readResults(file1, benchChan1, cmdTypeChan1)
 95 | 	go readResults(file2, benchChan2, cmdTypeChan2)
 96 | 
 97 | 	cmdType1 := <-cmdTypeChan1
 98 | 	cmdType2 := <-cmdTypeChan2
 99 | 
100 | 	if cmdType1 != cmdType2 {
101 | 		return errors.Errorf("results files types don't match: %v and %v", cmdType1, cmdType2)
102 | 	}
103 | 
104 | 	switch cmdType1 {
105 | 	case cmdIngest:
106 | 
107 | 		b1 := <-benchChan1
108 | 		b2 := <-benchChan2
109 | 
110 | 		// compare ingest
111 | 		comparison, err := compareIngest(b1, b2)
112 | 		if err != nil {
113 | 			return errors.Wrap(err, "error comparing ingest")
114 | 		}
115 | 		// print results
116 | 		if err := printIngestResults(comparison); err != nil {
117 | 			return errors.Wrap(err, "error printing ingest results")
118 | 		}
119 | 		return nil
120 | 	case cmdQuery:
121 | 		// consolidate benches
122 | 		benches1 := make([]*Benchmark, 0)
123 | 		benches2 := make([]*Benchmark, 0)
124 | 		for b := range benchChan1 {
125 | 			benches1 = append(benches1, b)
126 | 		}
127 | 		for b := range benchChan2 {
128 | 			benches2 = append(benches2, b)
129 | 		}
130 | 
131 | 		// compare queries
132 | 		comparison, err := compareQueries(benches1, benches2)
133 | 		if err != nil {
134 | 			return errors.Wrap(err, "error comparing queries")
135 | 		}
136 | 
137 | 		// print results
138 | 		if err := printQueryResults(comparison); err != nil {
139 | 			return errors.Wrap(err, "error printing query results")
140 | 		}
141 | 		return nil
142 | 	// even though there is cmdTotal, it must never be at the start of a file, so that is an error.
143 | 	default:
144 | 		return errors.Errorf("invalid command type: %v", cmdType1)
145 | 	}
146 | }
147 | 
148 | // compareQueries returns the total time of all the individual queries, as well as the total time of the run and additional analysis.
149 | func compareQueries(benches1, benches2 []*Benchmark) (*Comparison, error) {
150 | 	var runTime1, runTime2 time.Duration
151 | 
152 | 	// queryMap only contains valid queries from benches1
153 | 	queryMap := make(map[int64]*Query)
154 | 	for _, b1 := range benches1 {
155 | 		if b1.Type == cmdTotal {
156 | 			runTime1 = b1.Time.Duration
157 | 			continue
158 | 		}
159 | 		if isValidQuery(b1.Query) {
160 | 			queryMap[b1.Query.ID] = b1.Query
161 | 		} else {
162 | 			queryT := b1.Query.Type.String()
163 | 			log.Printf("invalid query from first file: ID: %v, %s %v from index: %s field: %s\n",
164 | 				b1.Query.ID, queryT, b1.Query.Rows, b1.Query.IndexName, b1.Query.FieldName)
165 | 		}
166 | 	}
167 | 
168 | 	// validQueries is the number of queries that successfully ran on both clusters.
169 | 	// This is not equivalent to the number of queries with correct results.
170 | 	var validQueries int64
171 | 	var numCorrect int64
172 | 	var totalTime1, totalTime2 time.Duration
173 | 
174 | 	for _, b2 := range benches2 {
175 | 		if b2.Type == cmdTotal {
176 | 			runTime2 = b2.Time.Duration
177 | 			continue
178 | 		}
179 | 
180 | 		query2 := b2.Query
181 | 		// here we assume that same IDs mean the same queries.
182 | 		// if query1 is found, it must already be a valid query.
183 | 		if query1, found := queryMap[query2.ID]; found {
184 | 			if isValidQuery(query2) {
185 | 				if queryResultsEqual(query1, query2) {
186 | 					numCorrect++
187 | 				} else {
188 | 					// valid query2 but unequal results
189 | 					queryT := query2.Type.String()
190 | 					log.Printf("unequal results: ID: %v, %s %v from index: %s field: %s. Got results %v and %v, and result counts %v and %v\n",
191 | 						query1.ID, queryT, query1.Rows, query1.IndexName, query1.FieldName, query1.Result, query2.Result, query1.ResultCount, query2.ResultCount)
192 | 				}
193 | 			} else {
194 | 				// invalid query2
195 | 				queryT := query2.Type.String()
196 | 				log.Printf("invalid query from second file: ID: %v, %s %v from index: %s field: %s\n",
197 | 					query2.ID, queryT, query2.Rows, query2.IndexName, query2.FieldName)
198 | 			}
199 | 
200 | 			// regardless of validity or equality of resuls, add time for valid queries.
201 | 			totalTime1 += query1.Time.Duration
202 | 			totalTime2 += query2.Time.Duration
203 | 			validQueries++
204 | 		} else {
205 | 			// first result does not contain this query
206 | 			queryT := query2.Type.String()
207 | 			log.Printf("query found in second file but not in first: ID: %v, %s %v from index: %s field: %s\n",
208 | 				query2.ID, queryT, query2.Rows, query2.IndexName, query2.FieldName)
209 | 		}
210 | 	}
211 | 
212 | 	totalTimeDelta, err := compareTime(totalTime1, totalTime2)
213 | 	if err != nil {
214 | 		return nil, errors.Wrap(err, "error comparing time")
215 | 	}
216 | 	runTimeDelta, err := compareTime(runTime1, runTime2)
217 | 	if err != nil {
218 | 		return nil, errors.Wrap(err, "error comparing time")
219 | 	}
220 | 	accuracy := float64(numCorrect) / float64(validQueries)
221 | 
222 | 	threadCount1 := benches1[0].ThreadCount
223 | 	threadCount2 := benches2[0].ThreadCount
224 | 
225 | 	return &Comparison{
226 | 		Type:           cmdQuery,
227 | 		RunTime1:       runTime1,
228 | 		RunTime2:       runTime2,
229 | 		RunTimeDelta:   runTimeDelta,
230 | 		TotalTime1:     totalTime1,
231 | 		TotalTime2:     totalTime2,
232 | 		TotalTimeDelta: totalTimeDelta,
233 | 		ThreadCount1:   threadCount1,
234 | 		ThreadCount2:   threadCount2,
235 | 		Accuracy:       accuracy,
236 | 		Size:           validQueries,
237 | 	}, nil
238 | }
239 | 
240 | func compareIngest(b1, b2 *Benchmark) (*Comparison, error) {
241 | 	// analyze time
242 | 	timeDelta, err := compareTime(b1.Time.Duration, b2.Time.Duration)
243 | 	if err != nil {
244 | 		return nil, errors.Wrap(err, "error comparing time")
245 | 	}
246 | 
247 | 	return &Comparison{
248 | 		Type:         cmdIngest,
249 | 		RunTime1:     b1.Time.Duration,
250 | 		RunTime2:     b2.Time.Duration,
251 | 		RunTimeDelta: timeDelta,
252 | 		ThreadCount1: b1.ThreadCount,
253 | 		ThreadCount2: b2.ThreadCount,
254 | 	}, nil
255 | }
256 | 
257 | // compareTime takes two durations and returns the delta.
258 | func compareTime(time1, time2 time.Duration) (float64, error) {
259 | 	if time1 == 0 {
260 | 		return 0, errors.New("time1 is zero")
261 | 	}
262 | 	if time2 == 0 {
263 | 		return 0, errors.New("time2 is zero")
264 | 	}
265 | 	timeDelta := float64(time2-time1) / float64(time1)
266 | 	return timeDelta, nil
267 | }
268 | 
269 | // queryResultsEqual compares the results of two valid queries. If both queries have
270 | // different result types (i.e. result and resultCount), we count the number of results
271 | // and compare the two counts. Results are prioritized over resultCounts.
272 | func queryResultsEqual(query1, query2 *Query) bool {
273 | 	// one of query1.Result and query1.ResultCount is not nil
274 | 	if query1.Result == nil {
275 | 		if query2.Result == nil {
276 | 			return *query1.ResultCount == *query2.ResultCount
277 | 		}
278 | 		return *query1.ResultCount == int64(len(query2.Result.Columns))
279 | 	}
280 | 	// else, query1.Result is not nil
281 | 	if query2.Result == nil {
282 | 		return int64(len(query1.Result.Columns)) == *query2.ResultCount
283 | 	}
284 | 	return reflect.DeepEqual(query1.Result, query2.Result)
285 | }
286 | 
287 | // isValidQuery checks if a query is valid. A valid query has at least one of
288 | // result or resultCount as a non-nil value.
289 | func isValidQuery(query *Query) bool {
290 | 	if query == nil {
291 | 		return false
292 | 	}
293 | 	if query.Result == nil && query.ResultCount == nil {
294 | 		return false
295 | 	}
296 | 	return true
297 | }
298 | 
299 | // readResults streams the decoded benchmarks from file into benchChan. This also checks
300 | // the bench type of the first decoded benchmark and sends this to the cmdTypeChan. The function
301 | // closes both channels if no errors are returned.
302 | func readResults(file string, benchChan chan *Benchmark, cmdTypeChan chan string) {
303 | 	fileReader, err := os.Open(file)
304 | 	if err != nil {
305 | 		log.Fatalf("error opening file %v: %v", file, err)
306 | 	}
307 | 
308 | 	decoder := json.NewDecoder(fileReader)
309 | 
310 | 	// check first benchmark for its type
311 | 	bench := NewBenchmark()
312 | 	if err := decoder.Decode(&bench); err == io.EOF {
313 | 		log.Fatalf("empty file %v: %v", file, err)
314 | 	} else if err != nil {
315 | 		log.Fatalf("error decoding json from %v: %v", file, err)
316 | 	}
317 | 
318 | 	cmdTypeChan <- bench.Type
319 | 	close(cmdTypeChan)
320 | 	benchChan <- bench
321 | 
322 | 	// keep reading until EOF or error
323 | 	for {
324 | 		bench := NewBenchmark()
325 | 		if err := decoder.Decode(&bench); err == io.EOF {
326 | 			break
327 | 		} else if err != nil {
328 | 			log.Fatalf("error decoding json from %v: %v", file, err)
329 | 		}
330 | 		benchChan <- bench
331 | 	}
332 | 	close(benchChan)
333 | }
334 | 
335 | // printIngestResults prints the results of dx ingest.
336 | func printIngestResults(c *Comparison) error {
337 | 	w := new(tabwriter.Writer)
338 | 	w.Init(os.Stdout, 10, 5, 5, ' ', tabwriter.AlignRight)
339 | 
340 | 	// print in percentage
341 | 	delta := c.RunTimeDelta * 100
342 | 
343 | 	fmt.Fprintf(w, "ingest\t\tfirst-threads%v\tsecond-threads%v\tdelta\t\n", c.ThreadCount1, c.ThreadCount2)
344 | 	fmt.Fprintf(w, "\t\t%v\t%v\t%.1f%%\t\n", c.RunTime1, c.RunTime2, delta)
345 | 	fmt.Fprintln(w)
346 | 	if err := w.Flush(); err != nil {
347 | 		return errors.Wrap(err, "could not flush writer")
348 | 	}
349 | 	return nil
350 | }
351 | 
352 | // printQueryResults prints the results of dx query.
353 | func printQueryResults(c *Comparison) error {
354 | 	w := new(tabwriter.Writer)
355 | 	w.Init(os.Stdout, 10, 5, 5, ' ', tabwriter.AlignRight)
356 | 	fmt.Fprintf(w, "queries\taccuracy\tfirst-threads%v\tsecond-threads%v\tdelta\t\n", c.ThreadCount1, c.ThreadCount2)
357 | 
358 | 	// print in percentages
359 | 	accuracy := c.Accuracy * 100
360 | 	runTimeDelta := c.RunTimeDelta * 100
361 | 	totalTimeDelta := c.TotalTimeDelta * 100
362 | 
363 | 	// average ms/op
364 | 	ave1 := (float64(c.TotalTime1) / float64(c.Size)) / float64(1000000)
365 | 	ave2 := (float64(c.TotalTime2) / float64(c.Size)) / float64(1000000)
366 | 	fmt.Fprintf(w, "%v\t%.1f%%\t%.3f ms/op\t%.3f ms/op\t%.1f%%\t\n", c.Size, accuracy, ave1, ave2, totalTimeDelta)
367 | 	fmt.Fprintf(w, "%v\t%v\t%v\t%v\t%.1f%%\t\n", "TOTAL", "", c.RunTime1, c.RunTime2, runTimeDelta)
368 | 	fmt.Fprintln(w)
369 | 	if err := w.Flush(); err != nil {
370 | 		return errors.Wrap(err, "could not flush writer")
371 | 	}
372 | 	return nil
373 | }
374 | 
375 | // checkFileExists checks whether a file exists at path.
376 | func checkFileExists(path string) (bool, error) {
377 | 	fileInfo, err := os.Stat(path)
378 | 	if os.IsNotExist(err) {
379 | 		return false, nil
380 | 	} else if err != nil {
381 | 		return false, errors.Wrap(err, "error statting path")
382 | 	}
383 | 	fileMode := fileInfo.Mode()
384 | 	return fileMode.IsRegular(), nil
385 | }
386 | 


--------------------------------------------------------------------------------
/dx/testdata/query/0:
--------------------------------------------------------------------------------
  1 | {"type":"query","time":"1.966745ms","threadcount":1,"query":{"id":0,"query":1,"index":"dx-users","field":"numbers","rows":[21,51],"time":"1.966745ms","resultcount":82}}
  2 | {"type":"query","time":"601.715µs","threadcount":1,"query":{"id":1,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"601.715µs","resultcount":0}}
  3 | {"type":"query","time":"1.433244ms","threadcount":1,"query":{"id":2,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.433244ms","resultcount":473}}
  4 | {"type":"query","time":"544.12µs","threadcount":1,"query":{"id":3,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"544.12µs","resultcount":0}}
  5 | {"type":"query","time":"750.829µs","threadcount":1,"query":{"id":4,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"750.829µs","resultcount":0}}
  6 | {"type":"query","time":"750.931µs","threadcount":1,"query":{"id":5,"query":3,"index":"imaginary-users","field":"numbers","rows":[0,0],"time":"750.931µs","resultcount":0}}
  7 | {"type":"query","time":"1.584166ms","threadcount":1,"query":{"id":6,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.584166ms","resultcount":0}}
  8 | {"type":"query","time":"654.381µs","threadcount":1,"query":{"id":7,"query":2,"index":"dx","field":"candidate","rows":[0,0],"time":"654.381µs","resultcount":0}}
  9 | {"type":"query","time":"1.312452ms","threadcount":1,"query":{"id":8,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.312452ms","resultcount":0}}
 10 | {"type":"query","time":"505.567µs","threadcount":1,"query":{"id":9,"query":1,"index":"dx-users","field":"numbers","rows":[62,110],"time":"505.567µs","resultcount":81}}
 11 | {"type":"query","time":"1.286691ms","threadcount":1,"query":{"id":10,"query":0,"index":"dx","field":"candidate","rows":[0,0],"time":"1.286691ms","resultcount":0}}
 12 | {"type":"query","time":"464.001µs","threadcount":1,"query":{"id":11,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"464.001µs","resultcount":0}}
 13 | {"type":"query","time":"988.405µs","threadcount":1,"query":{"id":12,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"988.405µs","resultcount":0}}
 14 | {"type":"query","time":"636.468µs","threadcount":1,"query":{"id":13,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"636.468µs","resultcount":0}}
 15 | {"type":"query","time":"1.15774ms","threadcount":1,"query":{"id":14,"query":3,"index":"imaginary-users","field":"numbers","rows":[0,0],"time":"1.15774ms","resultcount":0}}
 16 | {"type":"query","time":"1.545607ms","threadcount":1,"query":{"id":15,"query":0,"index":"dx-index","field":"field","rows":[301,19],"time":"1.545607ms","resultcount":473}}
 17 | {"type":"query","time":"640.937µs","threadcount":1,"query":{"id":16,"query":1,"index":"dx","field":"candidate","rows":[0,0],"time":"640.937µs","resultcount":0}}
 18 | {"type":"query","time":"1.092996ms","threadcount":1,"query":{"id":17,"query":2,"index":"dx-index","field":"field","rows":[55,41],"time":"1.092996ms","resultcount":0}}
 19 | {"type":"query","time":"445.509µs","threadcount":1,"query":{"id":18,"query":0,"index":"dx-users","field":"numbers","rows":[31,18],"time":"445.509µs","resultcount":63}}
 20 | {"type":"query","time":"1.690497ms","threadcount":1,"query":{"id":19,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"1.690497ms","resultcount":0}}
 21 | {"type":"query","time":"453.54µs","threadcount":1,"query":{"id":20,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"453.54µs","resultcount":0}}
 22 | {"type":"query","time":"1.271531ms","threadcount":1,"query":{"id":21,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.271531ms","resultcount":473}}
 23 | {"type":"query","time":"427.067µs","threadcount":1,"query":{"id":22,"query":0,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"427.067µs","resultcount":7}}
 24 | {"type":"query","time":"1.053598ms","threadcount":1,"query":{"id":23,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.053598ms","resultcount":0}}
 25 | {"type":"query","time":"558.563µs","threadcount":1,"query":{"id":24,"query":2,"index":"ibench","field":"fbench","rows":[0,0],"time":"558.563µs","resultcount":0}}
 26 | {"type":"query","time":"1.349004ms","threadcount":1,"query":{"id":25,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"1.349004ms","resultcount":0}}
 27 | {"type":"query","time":"500.181µs","threadcount":1,"query":{"id":26,"query":3,"index":"dx-users","field":"numbers","rows":[111,188],"time":"500.181µs","resultcount":7}}
 28 | {"type":"query","time":"1.002989ms","threadcount":1,"query":{"id":27,"query":3,"index":"dx","field":"candidate","rows":[0,0],"time":"1.002989ms","resultcount":0}}
 29 | {"type":"query","time":"830.771µs","threadcount":1,"query":{"id":28,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"830.771µs","resultcount":473}}
 30 | {"type":"query","time":"414.337µs","threadcount":1,"query":{"id":29,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"414.337µs","resultcount":0}}
 31 | {"type":"query","time":"1.06754ms","threadcount":1,"query":{"id":30,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.06754ms","resultcount":0}}
 32 | {"type":"query","time":"446.4µs","threadcount":1,"query":{"id":31,"query":2,"index":"dx","field":"candidate","rows":[0,0],"time":"446.4µs","resultcount":0}}
 33 | {"type":"query","time":"1.03081ms","threadcount":1,"query":{"id":32,"query":2,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.03081ms","resultcount":0}}
 34 | {"type":"query","time":"701.229µs","threadcount":1,"query":{"id":33,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"701.229µs","resultcount":473}}
 35 | {"type":"query","time":"1.341858ms","threadcount":1,"query":{"id":34,"query":0,"index":"dx-users","field":"numbers","rows":[44,165],"time":"1.341858ms","resultcount":62}}
 36 | {"type":"query","time":"515.69µs","threadcount":1,"query":{"id":35,"query":3,"index":"dx","field":"server1","rows":[0,0],"time":"515.69µs","resultcount":0}}
 37 | {"type":"query","time":"1.107886ms","threadcount":1,"query":{"id":36,"query":0,"index":"dx","field":"candidate","rows":[0,0],"time":"1.107886ms","resultcount":0}}
 38 | {"type":"query","time":"1.041797ms","threadcount":1,"query":{"id":37,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.041797ms","resultcount":0}}
 39 | {"type":"query","time":"467.045µs","threadcount":1,"query":{"id":38,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"467.045µs","resultcount":473}}
 40 | {"type":"query","time":"969.646µs","threadcount":1,"query":{"id":39,"query":0,"index":"dx-index","field":"field","rows":[450,56],"time":"969.646µs","resultcount":473}}
 41 | {"type":"query","time":"456.993µs","threadcount":1,"query":{"id":40,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"456.993µs","resultcount":0}}
 42 | {"type":"query","time":"890.36µs","threadcount":1,"query":{"id":41,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"890.36µs","resultcount":0}}
 43 | {"type":"query","time":"361.448µs","threadcount":1,"query":{"id":42,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"361.448µs","resultcount":0}}
 44 | {"type":"query","time":"818.009µs","threadcount":1,"query":{"id":43,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"818.009µs","resultcount":473}}
 45 | {"type":"query","time":"393.876µs","threadcount":1,"query":{"id":44,"query":1,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"393.876µs","resultcount":9}}
 46 | {"type":"query","time":"1.012226ms","threadcount":1,"query":{"id":45,"query":0,"index":"dx-index","field":"field","rows":[967,276],"time":"1.012226ms","resultcount":473}}
 47 | {"type":"query","time":"443.125µs","threadcount":1,"query":{"id":46,"query":1,"index":"dx-index","field":"field","rows":[700,576],"time":"443.125µs","resultcount":473}}
 48 | {"type":"query","time":"506.99µs","threadcount":1,"query":{"id":47,"query":0,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"506.99µs","resultcount":8}}
 49 | {"type":"query","time":"1.650675ms","threadcount":1,"query":{"id":48,"query":0,"index":"bla","field":"aint","rows":[0,0],"time":"1.650675ms","resultcount":0}}
 50 | {"type":"query","time":"771.629µs","threadcount":1,"query":{"id":49,"query":1,"index":"dx-users","field":"numbers","rows":[59,80],"time":"771.629µs","resultcount":81}}
 51 | {"type":"query","time":"561.013µs","threadcount":1,"query":{"id":50,"query":3,"index":"dx-users","field":"numbers","rows":[62,66],"time":"561.013µs","resultcount":14}}
 52 | {"type":"query","time":"1.412485ms","threadcount":1,"query":{"id":51,"query":0,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.412485ms","resultcount":473}}
 53 | {"type":"query","time":"630.564µs","threadcount":1,"query":{"id":52,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"630.564µs","resultcount":0}}
 54 | {"type":"query","time":"1.375873ms","threadcount":1,"query":{"id":53,"query":0,"index":"imaginary-users","field":"numbers","rows":[0,1],"time":"1.375873ms","resultcount":7}}
 55 | {"type":"query","time":"1.260144ms","threadcount":1,"query":{"id":54,"query":2,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.260144ms","resultcount":0}}
 56 | {"type":"query","time":"630.756µs","threadcount":1,"query":{"id":55,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"630.756µs","resultcount":0}}
 57 | {"type":"query","time":"1.652645ms","threadcount":1,"query":{"id":56,"query":1,"index":"dx-users","field":"numbers","rows":[103,189],"time":"1.652645ms","resultcount":77}}
 58 | {"type":"query","time":"466.839µs","threadcount":1,"query":{"id":57,"query":0,"index":"dx-index","field":"field","rows":[199,98],"time":"466.839µs","resultcount":473}}
 59 | {"type":"query","time":"1.352384ms","threadcount":1,"query":{"id":58,"query":3,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"1.352384ms","resultcount":0}}
 60 | {"type":"query","time":"485.024µs","threadcount":1,"query":{"id":59,"query":3,"index":"dx","field":"candidate","rows":[0,0],"time":"485.024µs","resultcount":0}}
 61 | {"type":"query","time":"1.328906ms","threadcount":1,"query":{"id":60,"query":2,"index":"dx-users","field":"numbers","rows":[52,156],"time":"1.328906ms","resultcount":25}}
 62 | {"type":"query","time":"452.404µs","threadcount":1,"query":{"id":61,"query":1,"index":"imaginary-index","field":"field","rows":[0,0],"time":"452.404µs","resultcount":473}}
 63 | {"type":"query","time":"1.16612ms","threadcount":1,"query":{"id":62,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.16612ms","resultcount":0}}
 64 | {"type":"query","time":"420.155µs","threadcount":1,"query":{"id":63,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"420.155µs","resultcount":0}}
 65 | {"type":"query","time":"991.668µs","threadcount":1,"query":{"id":64,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"991.668µs","resultcount":0}}
 66 | {"type":"query","time":"1.155675ms","threadcount":1,"query":{"id":65,"query":0,"index":"dx","field":"server2","rows":[0,0],"time":"1.155675ms","resultcount":0}}
 67 | {"type":"query","time":"479.89µs","threadcount":1,"query":{"id":66,"query":1,"index":"dx-index","field":"field","rows":[509,100],"time":"479.89µs","resultcount":473}}
 68 | {"type":"query","time":"1.05993ms","threadcount":1,"query":{"id":67,"query":0,"index":"dx-users","field":"numbers","rows":[178,7],"time":"1.05993ms","resultcount":62}}
 69 | {"type":"query","time":"422.199µs","threadcount":1,"query":{"id":68,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"422.199µs","resultcount":0}}
 70 | {"type":"query","time":"930.148µs","threadcount":1,"query":{"id":69,"query":1,"index":"ibench","field":"fbench","rows":[0,0],"time":"930.148µs","resultcount":0}}
 71 | {"type":"query","time":"689.416µs","threadcount":1,"query":{"id":70,"query":0,"index":"dx-users","field":"numbers","rows":[96,139],"time":"689.416µs","resultcount":62}}
 72 | {"type":"query","time":"1.525221ms","threadcount":1,"query":{"id":71,"query":3,"index":"ibench","field":"fbench","rows":[0,0],"time":"1.525221ms","resultcount":0}}
 73 | {"type":"query","time":"505.411µs","threadcount":1,"query":{"id":72,"query":0,"index":"dx","field":"server1","rows":[0,0],"time":"505.411µs","resultcount":0}}
 74 | {"type":"query","time":"1.175815ms","threadcount":1,"query":{"id":73,"query":2,"index":"bla","field":"aint","rows":[0,0],"time":"1.175815ms","resultcount":0}}
 75 | {"type":"query","time":"1.031795ms","threadcount":1,"query":{"id":74,"query":1,"index":"imaginary-index","field":"field","rows":[0,0],"time":"1.031795ms","resultcount":473}}
 76 | {"type":"query","time":"493.309µs","threadcount":1,"query":{"id":75,"query":1,"index":"dx","field":"server1","rows":[0,0],"time":"493.309µs","resultcount":0}}
 77 | {"type":"query","time":"1.144053ms","threadcount":1,"query":{"id":76,"query":3,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"1.144053ms","resultcount":1}}
 78 | {"type":"query","time":"461.322µs","threadcount":1,"query":{"id":77,"query":3,"index":"imaginary-index","field":"field","rows":[0,0],"time":"461.322µs","resultcount":0}}
 79 | {"type":"query","time":"1.027492ms","threadcount":1,"query":{"id":78,"query":0,"index":"dx","field":"primary","rows":[0,0],"time":"1.027492ms","resultcount":0}}
 80 | {"type":"query","time":"403.309µs","threadcount":1,"query":{"id":79,"query":1,"index":"dx-index","field":"field","rows":[808,764],"time":"403.309µs","resultcount":473}}
 81 | {"type":"query","time":"1.307595ms","threadcount":1,"query":{"id":80,"query":3,"index":"dx-users","field":"numbers","rows":[129,147],"time":"1.307595ms","resultcount":10}}
 82 | {"type":"query","time":"415.849µs","threadcount":1,"query":{"id":81,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"415.849µs","resultcount":0}}
 83 | {"type":"query","time":"1.073093ms","threadcount":1,"query":{"id":82,"query":3,"index":"bla","field":"aint","rows":[0,0],"time":"1.073093ms","resultcount":0}}
 84 | {"type":"query","time":"579.685µs","threadcount":1,"query":{"id":83,"query":3,"index":"dx-users","field":"numbers","rows":[124,127],"time":"579.685µs","resultcount":8}}
 85 | {"type":"query","time":"897.954µs","threadcount":1,"query":{"id":84,"query":1,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"897.954µs","resultcount":8}}
 86 | {"type":"query","time":"483.96µs","threadcount":1,"query":{"id":85,"query":3,"index":"dx-users","field":"numbers","rows":[168,187],"time":"483.96µs","resultcount":8}}
 87 | {"type":"query","time":"1.277825ms","threadcount":1,"query":{"id":86,"query":3,"index":"bla","field":"aint","rows":[0,0],"time":"1.277825ms","resultcount":0}}
 88 | {"type":"query","time":"1.072135ms","threadcount":1,"query":{"id":87,"query":2,"index":"dx-users","field":"numbers","rows":[15,8],"time":"1.072135ms","resultcount":16}}
 89 | {"type":"query","time":"409.217µs","threadcount":1,"query":{"id":88,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"409.217µs","resultcount":0}}
 90 | {"type":"query","time":"959.141µs","threadcount":1,"query":{"id":89,"query":2,"index":"imaginary-users","field":"numbers","rows":[1,0],"time":"959.141µs","resultcount":2}}
 91 | {"type":"query","time":"441.65µs","threadcount":1,"query":{"id":90,"query":1,"index":"dx","field":"server2","rows":[0,0],"time":"441.65µs","resultcount":0}}
 92 | {"type":"query","time":"1.233297ms","threadcount":1,"query":{"id":91,"query":3,"index":"dx","field":"server2","rows":[0,0],"time":"1.233297ms","resultcount":0}}
 93 | {"type":"query","time":"469.64µs","threadcount":1,"query":{"id":92,"query":2,"index":"dx-index","field":"field","rows":[86,262],"time":"469.64µs","resultcount":0}}
 94 | {"type":"query","time":"1.0249ms","threadcount":1,"query":{"id":93,"query":0,"index":"bla","field":"aint","rows":[0,0],"time":"1.0249ms","resultcount":0}}
 95 | {"type":"query","time":"392.983µs","threadcount":1,"query":{"id":94,"query":1,"index":"bla","field":"aint","rows":[0,0],"time":"392.983µs","resultcount":0}}
 96 | {"type":"query","time":"896.985µs","threadcount":1,"query":{"id":95,"query":0,"index":"ibench","field":"fbench","rows":[0,0],"time":"896.985µs","resultcount":0}}
 97 | {"type":"query","time":"429.046µs","threadcount":1,"query":{"id":96,"query":2,"index":"dx-users","field":"numbers","rows":[49,43],"time":"429.046µs","resultcount":14}}
 98 | {"type":"query","time":"896.324µs","threadcount":1,"query":{"id":97,"query":3,"index":"imaginary-users","field":"numbers","rows":[1,1],"time":"896.324µs","resultcount":0}}
 99 | {"type":"query","time":"510.944µs","threadcount":1,"query":{"id":98,"query":3,"index":"ibench","field":"fbench","rows":[0,0],"time":"510.944µs","resultcount":0}}
100 | {"type":"query","time":"1.035103ms","threadcount":1,"query":{"id":99,"query":0,"index":"bla","field":"aint","rows":[0,0],"time":"1.035103ms","resultcount":0}}
101 | {"type":"total","time":"169.797748ms","threadcount":1,"query":{"id":-1,"query":0,"index":"","field":"","rows":null,"time":"169.797748ms"}}
102 | 


--------------------------------------------------------------------------------