├── integration
    ├── init_sql_scripts
    │   ├── 0001-drop-database.sql
    │   └── 0002-create-database.sql
    ├── config-local.yaml
    └── grpc_test.go
├── pictures
    └── tables.png
├── blog
    ├── post1_pics
    │   ├── io-write.png
    │   ├── tables.png
    │   ├── cpu-usage.png
    │   ├── disk-usage.png
    │   ├── elastic-count.png
    │   └── memory-usage.png
    └── post1.md
├── embed.go
├── e2etests
    ├── config-local-single.yaml
    ├── config-local-multi1.yaml
    ├── config-local-multi2.yaml
    ├── config-replication-single.yaml
    ├── config-replication-multi1.yaml
    ├── config-replication-multi2.yaml
    ├── clickhouse-replicated.xml
    └── e2e_test.go
├── jaeger-ui.json
├── internal
    └── tools
    │   ├── tools.go
    │   └── go.mod
├── storage
    ├── clickhousespanstore
    │   ├── tablename.go
    │   ├── tablename_test.go
    │   ├── mocks
    │   │   ├── db.go
    │   │   ├── converter.go
    │   │   ├── converter_test.go
    │   │   ├── spylogger.go
    │   │   └── spylogger_test.go
    │   ├── params.go
    │   ├── heap.go
    │   ├── writer.go
    │   ├── pool.go
    │   ├── worker.go
    │   ├── reader.go
    │   ├── worker_test.go
    │   └── reader_test.go
    ├── clickhousedependencystore
    │   ├── dependencystore_test.go
    │   └── noop.go
    ├── config_test.go
    ├── store_test.go
    ├── config.go
    └── store.go
├── RELEASE.md
├── sqlscripts
    ├── distributed-table.tmpl.sql
    ├── jaeger-spans.tmpl.sql
    ├── jaeger-spans-archive.tmpl.sql
    ├── jaeger-index.tmpl.sql
    └── jaeger-operations.tmpl.sql
├── Dockerfile
├── .gitignore
├── .github
    └── workflows
    │   ├── release.yaml
    │   └── build.yaml
├── cmd
    └── jaeger-clickhouse
    │   └── main.go
├── guide-kubernetes.md
├── config.yaml
├── guide-multitenancy.md
├── README.md
├── Makefile
├── .golangci.yml
├── go.mod
├── guide-sharding-and-replication.md
└── LICENSE


/integration/init_sql_scripts/0001-drop-database.sql:
--------------------------------------------------------------------------------
1 | DROP DATABASE IF EXISTS default;
2 | 


--------------------------------------------------------------------------------
/integration/init_sql_scripts/0002-create-database.sql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE IF NOT EXISTS default;
2 | 


--------------------------------------------------------------------------------
/pictures/tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaegertracing/jaeger-clickhouse/HEAD/pictures/tables.png


--------------------------------------------------------------------------------
/blog/post1_pics/io-write.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaegertracing/jaeger-clickhouse/HEAD/blog/post1_pics/io-write.png


--------------------------------------------------------------------------------
/blog/post1_pics/tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaegertracing/jaeger-clickhouse/HEAD/blog/post1_pics/tables.png


--------------------------------------------------------------------------------
/embed.go:
--------------------------------------------------------------------------------
1 | package jaegerclickhouse
2 | 
3 | import "embed"
4 | 
5 | //go:embed sqlscripts/*
6 | var SQLScripts embed.FS
7 | 


--------------------------------------------------------------------------------
/integration/config-local.yaml:
--------------------------------------------------------------------------------
1 | address: localhost:9000
2 | init_sql_scripts_dir: init_sql_scripts
3 | init_tables: true
4 | 


--------------------------------------------------------------------------------
/blog/post1_pics/cpu-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaegertracing/jaeger-clickhouse/HEAD/blog/post1_pics/cpu-usage.png


--------------------------------------------------------------------------------
/blog/post1_pics/disk-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaegertracing/jaeger-clickhouse/HEAD/blog/post1_pics/disk-usage.png


--------------------------------------------------------------------------------
/e2etests/config-local-single.yaml:
--------------------------------------------------------------------------------
1 | address: chi:9000
2 | # For test purposes flush on every write
3 | batch_write_size: 1
4 | 


--------------------------------------------------------------------------------
/blog/post1_pics/elastic-count.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaegertracing/jaeger-clickhouse/HEAD/blog/post1_pics/elastic-count.png


--------------------------------------------------------------------------------
/blog/post1_pics/memory-usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaegertracing/jaeger-clickhouse/HEAD/blog/post1_pics/memory-usage.png


--------------------------------------------------------------------------------
/e2etests/config-local-multi1.yaml:
--------------------------------------------------------------------------------
1 | address: chi:9000
2 | tenant: multi1
3 | # For test purposes flush on every write
4 | batch_write_size: 1
5 | 


--------------------------------------------------------------------------------
/e2etests/config-local-multi2.yaml:
--------------------------------------------------------------------------------
1 | address: chi:9000
2 | tenant: multi2
3 | # For test purposes flush on every write
4 | batch_write_size: 1
5 | 


--------------------------------------------------------------------------------
/e2etests/config-replication-single.yaml:
--------------------------------------------------------------------------------
1 | address: chi:9000
2 | replication: true
3 | # For test purposes flush on every write
4 | batch_write_size: 1
5 | 


--------------------------------------------------------------------------------
/jaeger-ui.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": {
3 |     "dagMaxNumServices": 200,
4 |     "menuEnabled": true
5 |   },
6 |   "archiveEnabled": true
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/internal/tools/tools.go:
--------------------------------------------------------------------------------
1 | package tools
2 | 
3 | import (
4 | 	_ "github.com/golangci/golangci-lint/cmd/golangci-lint"
5 | 	_ "golang.org/x/tools/cmd/goimports"
6 | )
7 | 


--------------------------------------------------------------------------------
/e2etests/config-replication-multi1.yaml:
--------------------------------------------------------------------------------
1 | address: chi:9000
2 | replication: true
3 | tenant: multi1
4 | # For test purposes flush on every write
5 | batch_write_size: 1
6 | 


--------------------------------------------------------------------------------
/e2etests/config-replication-multi2.yaml:
--------------------------------------------------------------------------------
1 | address: chi:9000
2 | replication: true
3 | tenant: multi2
4 | # For test purposes flush on every write
5 | batch_write_size: 1
6 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/tablename.go:
--------------------------------------------------------------------------------
1 | package clickhousespanstore
2 | 
3 | type TableName string
4 | 
5 | func (tableName TableName) ToLocal() TableName {
6 | 	return tableName + "_local"
7 | }
8 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Release
 2 | 
 3 | The release CI job builds binaries and uploads them to Github release page.
 4 | 
 5 | Release is done by pushing a tag:
 6 | 
 7 | ```bash
 8 | git tag 0.1.0 && git push origin 0.1.0
 9 | ```
10 | 


--------------------------------------------------------------------------------
/sqlscripts/distributed-table.tmpl.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS {{.Table}}
2 |     ON CLUSTER '{cluster}' AS {{.Database}}.{{.Table}}_local
3 |     ENGINE = Distributed('{cluster}', {{.Database}}, {{.Table}}_local, {{.Hash}})
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM docker.io/library/alpine:3.16
2 | 
3 | ADD jaeger-clickhouse-linux-amd64 /go/bin/jaeger-clickhouse
4 | 
5 | RUN mkdir /plugin
6 | 
7 | # /plugin/ location is defined in jaeger-operator
8 | CMD ["cp", "/go/bin/jaeger-clickhouse", "/plugin/jaeger-clickhouse"]
9 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/tablename_test.go:
--------------------------------------------------------------------------------
 1 | package clickhousespanstore
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func TestTableName_ToLocal(t *testing.T) {
10 | 	tableName := TableName("some_table")
11 | 	assert.Equal(t, tableName+"_local", tableName.ToLocal())
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/mocks/db.go:
--------------------------------------------------------------------------------
 1 | package mocks
 2 | 
 3 | import (
 4 | 	"database/sql"
 5 | 
 6 | 	sqlmock "github.com/DATA-DOG/go-sqlmock"
 7 | )
 8 | 
 9 | func GetDbMock() (*sql.DB, sqlmock.Sqlmock, error) {
10 | 	return sqlmock.New(
11 | 		sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual),
12 | 		sqlmock.ValueConverterOption(ConverterMock{}),
13 | 	)
14 | }
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # IDE
15 | .idea/
16 | 
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 | 
20 | jaeger-clickhouse*
21 | *.tar.gz
22 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/params.go:
--------------------------------------------------------------------------------
 1 | package clickhousespanstore
 2 | 
 3 | import (
 4 | 	"database/sql"
 5 | 	"time"
 6 | 
 7 | 	hclog "github.com/hashicorp/go-hclog"
 8 | )
 9 | 
10 | // WorkerParams contains parameters that are shared between WriteWorkers
11 | type WorkerParams struct {
12 | 	logger     hclog.Logger
13 | 	db         *sql.DB
14 | 	indexTable TableName
15 | 	spansTable TableName
16 | 	tenant     string
17 | 	encoding   Encoding
18 | 	delay      time.Duration
19 | }
20 | 


--------------------------------------------------------------------------------
/storage/clickhousedependencystore/dependencystore_test.go:
--------------------------------------------------------------------------------
 1 | package clickhousedependencystore
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"testing"
 6 | 	"time"
 7 | 
 8 | 	"github.com/stretchr/testify/assert"
 9 | )
10 | 
11 | func TestDependencyStore_GetDependencies(t *testing.T) {
12 | 	dependencyStore := NewDependencyStore()
13 | 
14 | 	dependencies, err := dependencyStore.GetDependencies(context.Background(), time.Now(), time.Hour)
15 | 
16 | 	assert.EqualError(t, err, errNotImplemented.Error())
17 | 	assert.Nil(t, dependencies)
18 | }
19 | 


--------------------------------------------------------------------------------
/sqlscripts/jaeger-spans.tmpl.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS {{.SpansTable}}
 2 | {{if .Replication}}ON CLUSTER '{cluster}'{{end}}
 3 | (
 4 |     {{if .Multitenant -}}
 5 |     tenant    LowCardinality(String) CODEC (ZSTD(1)),
 6 |     {{- end -}}
 7 |     timestamp DateTime CODEC (Delta, ZSTD(1)),
 8 |     traceID   String CODEC (ZSTD(1)),
 9 |     model     String CODEC (ZSTD(3))
10 | ) ENGINE {{if .Replication}}ReplicatedMergeTree{{else}}MergeTree(){{end}}
11 |     {{.TTLTimestamp}}
12 |     PARTITION BY (
13 |         {{if .Multitenant -}}
14 |         tenant,
15 |         {{- end -}}
16 |         toDate(timestamp)
17 |     )
18 |     ORDER BY traceID
19 |     SETTINGS index_granularity = 1024
20 | 


--------------------------------------------------------------------------------
/sqlscripts/jaeger-spans-archive.tmpl.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS {{.SpansArchiveTable}}
 2 | {{if .Replication}}ON CLUSTER '{cluster}'{{end}}
 3 | (
 4 |     {{if .Multitenant -}}
 5 |     tenant    LowCardinality(String) CODEC (ZSTD(1)),
 6 |     {{- end -}}
 7 |     timestamp DateTime CODEC (Delta, ZSTD(1)),
 8 |     traceID   String CODEC (ZSTD(1)),
 9 |     model     String CODEC (ZSTD(3))
10 | ) ENGINE {{if .Replication}}ReplicatedMergeTree{{else}}MergeTree(){{end}}
11 |     {{.TTLTimestamp}}
12 |     PARTITION BY (
13 |         {{if .Multitenant -}}
14 |         tenant,
15 |         {{- end -}}
16 |         toYYYYMM(timestamp)
17 |     )
18 |     ORDER BY traceID
19 |     SETTINGS index_granularity = 1024
20 | 


--------------------------------------------------------------------------------
/storage/clickhousedependencystore/noop.go:
--------------------------------------------------------------------------------
 1 | package clickhousedependencystore
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"time"
 7 | 
 8 | 	"github.com/jaegertracing/jaeger/model"
 9 | 	"github.com/jaegertracing/jaeger/storage/dependencystore"
10 | )
11 | 
12 | var (
13 | 	errNotImplemented = errors.New("not implemented")
14 | )
15 | 
16 | // DependencyStore handles all queries and insertions to Clickhouse dependencies
17 | type DependencyStore struct {
18 | }
19 | 
20 | var _ dependencystore.Reader = (*DependencyStore)(nil)
21 | 
22 | // NewDependencyStore returns a DependencyStore
23 | func NewDependencyStore() *DependencyStore {
24 | 	return &DependencyStore{}
25 | }
26 | 
27 | // GetDependencies returns all interservice dependencies, implements DependencyReader
28 | func (s *DependencyStore) GetDependencies(_ context.Context, _ time.Time, _ time.Duration) ([]model.DependencyLink, error) {
29 | 	return nil, errNotImplemented
30 | }
31 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/mocks/converter.go:
--------------------------------------------------------------------------------
 1 | package mocks
 2 | 
 3 | import (
 4 | 	"database/sql/driver"
 5 | 	"fmt"
 6 | 	"time"
 7 | 
 8 | 	"github.com/jaegertracing/jaeger/model"
 9 | )
10 | 
11 | var _ driver.ValueConverter = ConverterMock{}
12 | 
13 | type ConverterMock struct{}
14 | 
15 | func (conv ConverterMock) ConvertValue(v interface{}) (driver.Value, error) {
16 | 	switch t := v.(type) {
17 | 	case model.TraceID:
18 | 		return driver.Value(t.String()), nil
19 | 	case time.Time:
20 | 		return driver.Value(t), nil
21 | 	case time.Duration:
22 | 		return driver.Value(t.Nanoseconds()), nil
23 | 	case model.SpanID:
24 | 		return driver.Value(t), nil
25 | 	case string:
26 | 		return driver.Value(t), nil
27 | 	case []uint8:
28 | 		return driver.Value(t), nil
29 | 	case int64:
30 | 		return driver.Value(t), nil
31 | 	case uint64:
32 | 		return driver.Value(t), nil
33 | 	case int:
34 | 		return driver.Value(t), nil
35 | 	case []string:
36 | 		return driver.Value(fmt.Sprint(t)), nil
37 | 	default:
38 | 		return nil, fmt.Errorf("unknown type %T", t)
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/sqlscripts/jaeger-index.tmpl.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS {{.SpansIndexTable}}
 2 | {{if .Replication}}ON CLUSTER '{cluster}'{{end}}
 3 | (
 4 |     {{if .Multitenant -}}
 5 |     tenant     LowCardinality(String) CODEC (ZSTD(1)),
 6 |     {{- end -}}
 7 |     timestamp  DateTime CODEC (Delta, ZSTD(1)),
 8 |     traceID    String CODEC (ZSTD(1)),
 9 |     service    LowCardinality(String) CODEC (ZSTD(1)),
10 |     operation  LowCardinality(String) CODEC (ZSTD(1)),
11 |     durationUs UInt64 CODEC (ZSTD(1)),
12 |     tags Nested
13 |     (
14 |         key LowCardinality(String),
15 |         value String
16 |     ) CODEC (ZSTD(1)),
17 |     INDEX idx_tag_keys tags.key TYPE bloom_filter(0.01) GRANULARITY 64,
18 |     INDEX idx_duration durationUs TYPE minmax GRANULARITY 1
19 | ) ENGINE {{if .Replication}}ReplicatedMergeTree{{else}}MergeTree(){{end}}
20 |     {{.TTLTimestamp}}
21 |     PARTITION BY (
22 |         {{if .Multitenant -}}
23 |         tenant,
24 |         {{- end -}}
25 |         toDate(timestamp)
26 |     )
27 |     ORDER BY (service, -toUnixTimestamp(timestamp))
28 |     SETTINGS index_granularity = 1024
29 | 


--------------------------------------------------------------------------------
/sqlscripts/jaeger-operations.tmpl.sql:
--------------------------------------------------------------------------------
 1 | CREATE MATERIALIZED VIEW IF NOT EXISTS {{.OperationsTable}}
 2 | {{if .Replication}}ON CLUSTER '{cluster}'{{end}}
 3 |     ENGINE {{if .Replication}}ReplicatedSummingMergeTree{{else}}SummingMergeTree{{end}}
 4 |     {{.TTLDate}}
 5 |     PARTITION BY (
 6 |         {{if .Multitenant -}}
 7 |         tenant,
 8 |         {{- end -}}
 9 |         toYYYYMM(date)
10 |     )
11 |     ORDER BY (
12 |         {{if .Multitenant -}}
13 |         tenant,
14 |         {{- end -}}
15 |         date,
16 |         service,
17 |         operation
18 |     )
19 |     SETTINGS index_granularity = 32
20 |     POPULATE
21 | AS SELECT
22 |     {{if .Multitenant -}}
23 |     tenant,
24 |     {{- end -}}
25 |     toDate(timestamp) AS date,
26 |     service,
27 |     operation,
28 |     count() AS count,
29 |     if(
30 |         has(tags.key, 'span.kind'),
31 |         tags.value[indexOf(tags.key, 'span.kind')],
32 |         ''
33 |     ) AS spankind
34 | FROM {{.Database}}.{{.SpansIndexTable}}
35 | GROUP BY
36 |     {{if .Multitenant -}}
37 |     tenant,
38 |     {{- end -}}
39 |     date,
40 |     service,
41 |     operation,
42 |     tags.key,
43 |     tags.value
44 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags: ['[0-9]+\.[0-9]+\.[0-9]+']
 6 | 
 7 | jobs:
 8 |   release:
 9 |     runs-on: ubuntu-latest
10 |     name: Release
11 |     steps:
12 |       - uses: actions/checkout@v2.3.4
13 |         with:
14 |           submodules: true
15 | 
16 |       - uses: actions/setup-go@v2
17 |         with:
18 |           go-version: ^1.19
19 | 
20 |       - name: Create release distribution
21 |         run: make build-all-platforms tar-all-platforms
22 | 
23 |       - name: Log in to the Container registry
24 |         uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
25 |         with:
26 |           registry: ghcr.io
27 |           username: ${{ github.actor }}
28 |           password: ${{ secrets.GITHUB_TOKEN }}
29 | 
30 |       - name: Build docker image
31 |         run: |
32 |           export DOCKER_TAG=${GITHUB_REF##*/}
33 |           make docker docker-push
34 | 
35 |       - name: Create Github release
36 |         run: |
37 |           export TAG=${GITHUB_REF##*/}
38 |           gh release create ${TAG} --title "Release ${TAG}" *.tar.gz
39 |         env:
40 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
41 | 


--------------------------------------------------------------------------------
/e2etests/clickhouse-replicated.xml:
--------------------------------------------------------------------------------
 1 | <!-- Minimal configuration to enable cluster mode in a single clickhouse process -->
 2 | <yandex>
 3 |     <macros>
 4 |         <installation>cluster</installation>
 5 |         <all-sharded-shard>0</all-sharded-shard>
 6 |         <cluster>cluster</cluster>
 7 |         <shard>0</shard>
 8 |         <replica>cluster-0-0</replica>
 9 |     </macros>
10 | 
11 |     <remote_servers>
12 |         <cluster>
13 |             <shard>
14 |                 <internal_replication>true</internal_replication>
15 |                 <replica>
16 |                     <host>localhost</host>
17 |                     <port>9000</port>
18 |                 </replica>
19 |             </shard>
20 |         </cluster>
21 |     </remote_servers>
22 | 
23 |     <keeper_server>
24 |         <tcp_port>2181</tcp_port>
25 |         <server_id>0</server_id>
26 |         <log_storage_path>/var/log/clickhouse-server/coordination/log</log_storage_path>
27 |         <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
28 |         <raft_configuration>
29 |             <server>
30 |                 <id>0</id>
31 |                 <hostname>localhost</hostname>
32 |                 <port>9444</port>
33 |             </server>
34 |         </raft_configuration>
35 |     </keeper_server>
36 | 
37 |     <zookeeper>
38 |         <!-- Clickhouse Keeper -->
39 |         <node>
40 |             <host>localhost</host>
41 |             <port>2181</port>
42 |         </node>
43 |     </zookeeper>
44 |     <distributed_ddl>
45 |         <path>/clickhouse/cluster/task_queue/ddl</path>
46 |     </distributed_ddl>
47 | </yandex>
48 | 


--------------------------------------------------------------------------------
/cmd/jaeger-clickhouse/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"net/http"
 6 | 	"os"
 7 | 	"path/filepath"
 8 | 
 9 | 	// Package contains time zone info for connecting to ClickHouse servers with non-UTC time zone
10 | 	_ "time/tzdata"
11 | 
12 | 	hclog "github.com/hashicorp/go-hclog"
13 | 	"github.com/jaegertracing/jaeger/plugin/storage/grpc"
14 | 	"github.com/jaegertracing/jaeger/plugin/storage/grpc/shared"
15 | 	"github.com/prometheus/client_golang/prometheus/promhttp"
16 | 	yaml "gopkg.in/yaml.v3"
17 | 
18 | 	"github.com/jaegertracing/jaeger-clickhouse/storage"
19 | )
20 | 
21 | func main() {
22 | 	var configPath string
23 | 	flag.StringVar(&configPath, "config", "", "The absolute path to the ClickHouse plugin's configuration file")
24 | 	flag.Parse()
25 | 
26 | 	logger := hclog.New(&hclog.LoggerOptions{
27 | 		Name: "jaeger-clickhouse",
28 | 		// If this is set to e.g. Warn, the debug logs are never sent to Jaeger even despite
29 | 		// --grpc-storage-plugin.log-level=debug
30 | 		Level:      hclog.Trace,
31 | 		JSONFormat: true,
32 | 	})
33 | 
34 | 	cfgFile, err := os.ReadFile(filepath.Clean(configPath))
35 | 	if err != nil {
36 | 		logger.Error("Could not read config file", "config", configPath, "error", err)
37 | 		os.Exit(1)
38 | 	}
39 | 	var cfg storage.Configuration
40 | 	err = yaml.Unmarshal(cfgFile, &cfg)
41 | 	if err != nil {
42 | 		logger.Error("Could not parse config file", "error", err)
43 | 	}
44 | 
45 | 	go func() {
46 | 		http.Handle("/metrics", promhttp.Handler())
47 | 		err = http.ListenAndServe(cfg.MetricsEndpoint, nil)
48 | 		if err != nil {
49 | 			logger.Error("Failed to listen for metrics endpoint", "error", err)
50 | 		}
51 | 	}()
52 | 
53 | 	var pluginServices shared.PluginServices
54 | 	store, err := storage.NewStore(logger, cfg)
55 | 	if err != nil {
56 | 		logger.Error("Failed to create a storage", err)
57 | 		os.Exit(1)
58 | 	}
59 | 	pluginServices.Store = store
60 | 	pluginServices.ArchiveStore = store
61 | 	pluginServices.StreamingSpanWriter = store
62 | 
63 | 	grpc.Serve(&pluginServices)
64 | 	if err = store.Close(); err != nil {
65 | 		logger.Error("Failed to close store", "error", err)
66 | 		os.Exit(1)
67 | 	}
68 | }
69 | 


--------------------------------------------------------------------------------
/guide-kubernetes.md:
--------------------------------------------------------------------------------
 1 | # Kubernetes Deployment
 2 | 
 3 | This is a guide to deploy Jaeger with Clickhouse storage on Kubernetes.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | 1. Deploy [Jaeger operator](https://github.com/jaegertracing/jaeger-operator). Note that `grpc-plugin` storage type is supported since version 1.25.0.
 8 | 2. Deploy [Clickhouse operator](https://github.com/Altinity/clickhouse-operator)
 9 | 3. Deploy [Zookeeper](https://github.com/Altinity/clickhouse-operator/blob/master/docs/replication_setup.md) (if replication is used)
10 | 
11 | ## Deploy
12 | 
13 | Deploy Clickhouse:
14 | 
15 | ```yaml
16 | cat <<EOF | kubectl apply -f -
17 | apiVersion: clickhouse.altinity.com/v1
18 | kind: ClickHouseInstallation
19 | metadata:
20 |   name: jaeger
21 |   labels:
22 |     jaeger-clickhouse: demo
23 | spec:
24 |   configuration:
25 |     clusters:
26 |       - name: cluster1
27 |         layout:
28 |           shardsCount: 1
29 | EOF
30 | ```
31 | 
32 | Create config map for Jaeger Clickhouse plugin:
33 | 
34 | ```yaml
35 | cat <<EOF | kubectl apply -f -
36 | apiVersion: v1
37 | kind: ConfigMap
38 | metadata:
39 |   name: jaeger-clickhouse
40 |   labels:
41 |     jaeger-clickhouse: demo
42 | data:
43 |   config.yaml: |
44 |     address: clickhouse-jaeger:9000
45 |     username: clickhouse_operator
46 |     password: clickhouse_operator_password
47 |     spans_table:
48 |     spans_index_table:
49 |     operations_table:
50 | EOF
51 | ```
52 | 
53 | Deploy Jaeger:
54 | 
55 | ```yaml
56 | cat <<EOF | kubectl apply -f -
57 | apiVersion: jaegertracing.io/v1
58 | kind: Jaeger
59 | metadata:
60 |   name: jaeger-clickhouse
61 |   labels:
62 |     jaeger-clickhouse: demo
63 | spec:
64 |   storage:
65 |     type: grpc-plugin
66 |     grpcPlugin:
67 |       image: ghcr.io/jaegertracing/jaeger-clickhouse:0.7.0
68 |     options:
69 |       grpc-storage-plugin:
70 |         binary: /plugin/jaeger-clickhouse
71 |         configuration-file: /plugin-config/config.yaml
72 |         log-level: debug
73 |   volumeMounts:
74 |     - name: plugin-config
75 |       mountPath: /plugin-config
76 |   volumes:
77 |     - name: plugin-config
78 |       configMap:
79 |         name: jaeger-clickhouse
80 | EOF
81 | ```
82 | 
83 | ## Delete all
84 | 
85 | ```bash
86 | kubectl delete jaeger,cm,chi -l jaeger-clickhouse=demo
87 | ```
88 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/heap.go:
--------------------------------------------------------------------------------
 1 | package clickhousespanstore
 2 | 
 3 | import (
 4 | 	"container/heap"
 5 | 	"fmt"
 6 | 	"time"
 7 | )
 8 | 
 9 | var (
10 | 	_                 heap.Interface = workerHeap{}
11 | 	errWorkerNotFound                = fmt.Errorf("worker not found in heap")
12 | )
13 | 
14 | type heapItem struct {
15 | 	pushTime time.Time
16 | 	worker   *WriteWorker
17 | }
18 | 
19 | // workerHeap is a heap for WriteWorkers where worker's push time is the key.
20 | type workerHeap struct {
21 | 	elems   *[]*heapItem
22 | 	indexes map[*WriteWorker]int
23 | }
24 | 
25 | func newWorkerHeap(cap int) workerHeap {
26 | 	elems := make([]*heapItem, 0, cap)
27 | 	return workerHeap{
28 | 		elems:   &elems,
29 | 		indexes: make(map[*WriteWorker]int),
30 | 	}
31 | }
32 | 
33 | func (workerHeap workerHeap) AddWorker(worker *WriteWorker) {
34 | 	heap.Push(workerHeap, heapItem{
35 | 		worker:   worker,
36 | 		pushTime: time.Now(),
37 | 	})
38 | }
39 | 
40 | func (workerHeap *workerHeap) RemoveWorker(worker *WriteWorker) error {
41 | 	idx, ok := workerHeap.indexes[worker]
42 | 	if !ok {
43 | 		return errWorkerNotFound
44 | 	}
45 | 	heap.Remove(workerHeap, idx)
46 | 	return nil
47 | }
48 | 
49 | func (workerHeap *workerHeap) CloseWorkers() {
50 | 	for _, item := range *workerHeap.elems {
51 | 		item.worker.Close()
52 | 	}
53 | }
54 | 
55 | func (workerHeap workerHeap) Len() int {
56 | 	return len(*workerHeap.elems)
57 | }
58 | 
59 | func (workerHeap workerHeap) Less(i, j int) bool {
60 | 	return (*workerHeap.elems)[i].pushTime.Before((*workerHeap.elems)[j].pushTime)
61 | }
62 | 
63 | func (workerHeap workerHeap) Swap(i, j int) {
64 | 	(*workerHeap.elems)[i], (*workerHeap.elems)[j] = (*workerHeap.elems)[j], (*workerHeap.elems)[i]
65 | 	workerHeap.indexes[(*workerHeap.elems)[i].worker] = i
66 | 	workerHeap.indexes[(*workerHeap.elems)[j].worker] = j
67 | }
68 | 
69 | func (workerHeap workerHeap) Push(x interface{}) {
70 | 	switch t := x.(type) {
71 | 	case heapItem:
72 | 		*workerHeap.elems = append(*workerHeap.elems, &t)
73 | 		workerHeap.indexes[t.worker] = len(*workerHeap.elems) - 1
74 | 	default:
75 | 		panic("Unknown type")
76 | 	}
77 | }
78 | 
79 | func (workerHeap workerHeap) Pop() interface{} {
80 | 	lastInd := len(*workerHeap.elems) - 1
81 | 	last := (*workerHeap.elems)[lastInd]
82 | 	delete(workerHeap.indexes, last.worker)
83 | 	*workerHeap.elems = (*workerHeap.elems)[:lastInd]
84 | 	return last.worker
85 | }
86 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | address: some-clickhouse-server:9000
 2 | # Directory with .sql files to run at plugin startup, mainly for integration tests.
 3 | # Depending on the value of "init_tables", this can be run as a
 4 | # replacement or supplement to creating default tables for span storage.
 5 | # If init_tables is also enabled, the scripts in this directory will be run first.
 6 | init_sql_scripts_dir:
 7 | # Whether to automatically attempt to create tables in ClickHouse.
 8 | # By default, this is enabled if init_sql_scripts_dir is empty,
 9 | # or disabled if init_sql_scripts_dir is provided.
10 | init_tables:
11 | # Maximal amount of spans that can be pending writes at a time.
12 | # New spans exceeding this limit will be discarded,
13 | # keeping memory in check if there are issues writing to ClickHouse.
14 | # Check the "jaeger_clickhouse_discarded_spans" metric to keep track of discards.
15 | # If 0, no limit is set. Default 10_000_000.
16 | max_span_count:
17 | # Batch write size. Default 10_000.
18 | batch_write_size:
19 | # Batch flush interval. Default 5s.
20 | batch_flush_interval:
21 | # Encoding of stored data. Either json or protobuf. Default json.
22 | encoding:
23 | # Path to CA TLS certificate.
24 | ca_file:
25 | # Username for connection to ClickHouse. Default is "default".
26 | username:
27 | # Password for connection to ClickHouse.
28 | password:
29 | # ClickHouse database name. The database must be created manually before Jaeger starts. Default is "default".
30 | database:
31 | # If non-empty, enables a tenant column in tables, and uses the provided tenant name for this instance.
32 | # Default is empty. See guide-multitenancy.md for more information.
33 | tenant:
34 | # Endpoint for serving prometheus metrics. Default localhost:9090.
35 | metrics_endpoint: localhost:9090
36 | # Whether to use sql scripts supporting replication and sharding.
37 | # Replication can be used only on database with Atomic engine.
38 | # Default false.
39 | replication:
40 | # Table with spans. Default "jaeger_spans_local" or "jaeger_spans" when replication is enabled.
41 | spans_table:
42 | # Span index table. Default "jaeger_index_local" or "jaeger_index" when replication is enabled.
43 | spans_index_table:
44 | # Operations table. Default "jaeger_operations_local" or "jaeger_operations" when replication is enabled.
45 | operations_table:
46 | # TTL for data in tables in days. If 0, no TTL is set. Default 0.
47 | ttl:
48 | # The maximum number of spans to fetch per trace. If 0, no limit is set. Default 0.
49 | max_num_spans:
50 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/mocks/converter_test.go:
--------------------------------------------------------------------------------
 1 | package mocks
 2 | 
 3 | import (
 4 | 	"database/sql/driver"
 5 | 	"testing"
 6 | 	"time"
 7 | 
 8 | 	"github.com/jaegertracing/jaeger/model"
 9 | 	"github.com/stretchr/testify/assert"
10 | )
11 | 
12 | func TestConverterMock_ConvertValue(t *testing.T) {
13 | 	converter := ConverterMock{}
14 | 
15 | 	testCases := map[string]struct {
16 | 		valueToConvert interface{}
17 | 		expectedResult driver.Value
18 | 	}{
19 | 		"string value":       {valueToConvert: "some string value", expectedResult: driver.Value("some string value")},
20 | 		"string slice value": {valueToConvert: []string{"some", "slice", "of", "strings"}, expectedResult: driver.Value("[some slice of strings]")},
21 | 		"time value": {
22 | 			valueToConvert: time.Date(2002, time.February, 19, 14, 43, 51, 0, time.UTC),
23 | 			expectedResult: driver.Value(time.Date(2002, time.February, 19, 14, 43, 51, 0, time.UTC)),
24 | 		},
25 | 		"duration value": {
26 | 			valueToConvert: time.Unix(12340, 123456789).Sub(time.Unix(0, 0)),
27 | 			expectedResult: driver.Value(int64(12340123456789)),
28 | 		},
29 | 		"int64 value":         {valueToConvert: int64(1823), expectedResult: driver.Value(int64(1823))},
30 | 		"int value":           {valueToConvert: 1823, expectedResult: driver.Value(1823)},
31 | 		"model.SpanID value":  {valueToConvert: model.SpanID(318148), expectedResult: driver.Value(model.SpanID(318148))},
32 | 		"model.TraceID value": {valueToConvert: model.TraceID{Low: 0xabd5, High: 0xa31}, expectedResult: driver.Value("0000000000000a31000000000000abd5")},
33 | 		"uint8 slice value":   {valueToConvert: []uint8("asdkja"), expectedResult: driver.Value([]uint8{0x61, 0x73, 0x64, 0x6b, 0x6a, 0x61})},
34 | 	}
35 | 
36 | 	for name, test := range testCases {
37 | 		t.Run(name, func(t *testing.T) {
38 | 			converted, err := converter.ConvertValue(test.valueToConvert)
39 | 			assert.NoError(t, err)
40 | 			assert.Equal(t, test.expectedResult, converted)
41 | 		})
42 | 	}
43 | }
44 | 
45 | func TestConverterMock_Fail(t *testing.T) {
46 | 	converter := ConverterMock{}
47 | 
48 | 	tests := map[string]struct {
49 | 		valueToConvert   interface{}
50 | 		expectedErrorMsg string
51 | 	}{
52 | 		"float64 value": {valueToConvert: float64(1e-4), expectedErrorMsg: "unknown type float64"},
53 | 		"int32 value":   {valueToConvert: int32(12831), expectedErrorMsg: "unknown type int32"},
54 | 	}
55 | 
56 | 	for name, test := range tests {
57 | 		t.Run(name, func(t *testing.T) {
58 | 			val, err := converter.ConvertValue(test.valueToConvert)
59 | 			assert.Equal(t, nil, val)
60 | 			assert.EqualError(t, err, test.expectedErrorMsg)
61 | 		})
62 | 	}
63 | }
64 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
  1 | name: Build, test, format and lint
  2 | 
  3 | on:
  4 |   push:
  5 |   pull_request:
  6 | 
  7 | jobs:
  8 |   build-binaries:
  9 |     runs-on: ubuntu-latest
 10 |     name: Build binary for ${{ matrix.platform.name }}
 11 |     strategy:
 12 |       matrix:
 13 |         platform:
 14 |         - name: linux on amd64
 15 |           task: build-linux-amd64
 16 |         - name: linux on arm64
 17 |           task: build-linux-arm64
 18 |         - name: osx on amd64
 19 |           task: build-darwin-amd64
 20 |         - name: osx on arm64
 21 |           task: build-darwin-arm64
 22 |     steps:
 23 |       - uses: actions/checkout@v2.3.4
 24 |         with:
 25 |           submodules: true
 26 | 
 27 |       - uses: actions/setup-go@v2
 28 |         with:
 29 |           go-version: ^1.19
 30 | 
 31 |       - name: Build binaries
 32 |         run: make ${{ matrix.platform.task }}
 33 | 
 34 |   format-lint:
 35 |     runs-on: ubuntu-latest
 36 |     name: Format and lint
 37 |     steps:
 38 |       - uses: actions/checkout@v2.3.4
 39 |         with:
 40 |           submodules: true
 41 | 
 42 |       - uses: actions/setup-go@v2
 43 |         with:
 44 |           go-version: ^1.19
 45 | 
 46 |       - name: Install tools
 47 |         run: make install-tools
 48 | 
 49 |       - name: Format
 50 |         run: make fmt && git diff --quiet
 51 | 
 52 |       - name: Lint
 53 |         run: make lint
 54 | 
 55 |   e2e-test:
 56 |     runs-on: ubuntu-latest
 57 |     name: E2E Test
 58 |     steps:
 59 |       - uses: actions/checkout@v2.3.4
 60 |         with:
 61 |           submodules: true
 62 | 
 63 |       - uses: actions/setup-go@v2
 64 |         with:
 65 |           go-version: ^1.19
 66 | 
 67 |       - name: Run e2e test
 68 |         run: make e2e-tests
 69 | 
 70 |   unit-tests:
 71 |     runs-on: ubuntu-latest
 72 |     name: Unit tests
 73 |     steps:
 74 |       - uses: actions/checkout@v2.3.4
 75 |         with:
 76 |           submodules: true
 77 | 
 78 |       - uses: actions/setup-go@v2
 79 |         with:
 80 |           go-version: ^1.19
 81 | 
 82 |       - name: Run unit test
 83 |         run: make test
 84 | 
 85 |   integration-tests:
 86 |     runs-on: ubuntu-latest
 87 |     name: Integration tests
 88 |     steps:
 89 |       - uses: actions/checkout@v2.3.4
 90 |         with:
 91 |           submodules: true
 92 | 
 93 |       - uses: actions/setup-go@v2
 94 |         with:
 95 |           go-version: ^1.19
 96 | 
 97 |       - name: Setup database
 98 |         run: docker run --rm -d -p9000:9000 --name test-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server:22
 99 | 
100 |       - name: Run integration tests
101 |         run: make integration-test
102 | 


--------------------------------------------------------------------------------
/guide-multitenancy.md:
--------------------------------------------------------------------------------
 1 | # Multi-tenant deployment
 2 | 
 3 | It may be desirable to share a common ClickHouse instance across multiple Jaeger instances.
 4 | There are two ways of doing this, depending on whether spanning the tenants across separate databases is preferable.
 5 | 
 6 | ## Shared database/tables
 7 | 
 8 | If you wish to reuse the same ClickHouse database/tables across all tenants, you can specify a different `tenant: "<name>"` in each jaeger-clickhouse instance config.
 9 | 
10 | When a non-empty `tenant` is specified, all tables will be created with a `tenant` column, and all reads/writes for a given Jaeger instance will be applied against the configured tenant name for that instance.
11 | 
12 | 1. Create a shared database:
13 |     ```sql
14 |     CREATE DATABASE shared ENGINE=Atomic
15 |     ```
16 | 2. Configure the per-tenant jaeger-clickhouse clients to specify tenant names:
17 |     ```yaml
18 |     database: shared
19 |     tenant: tenant_1
20 |     ```
21 |     ```yaml
22 |     database: shared
23 |     tenant: tenant_2
24 |     ```
25 | 
26 | Multitenant mode must be enabled when the deployment is first created and cannot be toggled later, except perhaps by manually adding/removing the `tenant` column from all tables.
27 | Multitenant/singletenant instances must not be mixed within the same database - the two modes are mutually exclusive of each other.
28 | 
29 | ## Separate databases
30 | 
31 | If you wish to keep instances fully separate, you can configure one ClickHouse database per tenant.
32 | This may be useful when different per-database configuration across tenants is desirable.
33 | 
34 | 1. Create a database for each tenant:
35 |     ```sql
36 |     CREATE DATABASE tenant_1 ENGINE=Atomic;
37 |     CREATE DATABASE tenant_2 ENGINE=Atomic;
38 |     ```
39 | 2. Configure the per-tenant jaeger-clickhouse plugins matching databases:
40 |     ```yaml
41 |     database: tenant_1
42 |     ```
43 |     ```yaml
44 |     database: tenant_2
45 |     ```
46 | 
47 | ## Mixing methods in the same ClickHouse instance
48 | 
49 | Each of the methods applies on a per-database basis. The methods require different schemas and must not be mixed in a single database, but it is possible to have different databases using different methods in the same ClickHouse instance.
50 | 
51 | For example, there could be a `shared` database where multiple tenants are sharing the same tables:
52 | 
53 |     ```sql
54 |     CREATE DATABASE shared ENGINE=Atomic
55 |     ```
56 |     ```yaml
57 |     database: shared
58 |     tenant: tenant_1
59 |     ```
60 |     ```yaml
61 |     database: shared
62 |     tenant: tenant_2
63 |     ```
64 | 
65 | Then there could be separate `isolated_x` databases for tenants that should be provided with their own dedicated tables, enabling e.g. better ACL isolation:
66 | 
67 |     ```sql
68 |     CREATE DATABASE isolated_1 ENGINE=Atomic
69 |     CREATE DATABASE isolated_2 ENGINE=Atomic
70 |     ```
71 |     ```yaml
72 |     database: isolated_1
73 |     ```
74 |     ```yaml
75 |     database: isolated_2
76 |     ```
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/mocks/spylogger.go:
--------------------------------------------------------------------------------
  1 | package mocks
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"log"
  6 | 	"testing"
  7 | 
  8 | 	hclog "github.com/hashicorp/go-hclog"
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | const levelCount = 5
 13 | 
 14 | var _ hclog.Logger = SpyLogger{}
 15 | 
 16 | type LogMock struct {
 17 | 	Msg  string
 18 | 	Args []interface{}
 19 | }
 20 | 
 21 | type SpyLogger struct {
 22 | 	logs [][]LogMock
 23 | }
 24 | 
 25 | func NewSpyLogger() SpyLogger {
 26 | 	return SpyLogger{logs: make([][]LogMock, levelCount)}
 27 | }
 28 | 
 29 | func (logger *SpyLogger) AssertLogsOfLevelEqual(t *testing.T, level hclog.Level, want []LogMock) {
 30 | 	assert.Equal(t, want, logger.getLogs(level))
 31 | }
 32 | 
 33 | func (logger *SpyLogger) getLogs(level hclog.Level) []LogMock {
 34 | 	return logger.logs[level-1]
 35 | }
 36 | 
 37 | func (logger *SpyLogger) AssertLogsEmpty(t *testing.T) {
 38 | 	assert.Equal(t, logger.logs, make([][]LogMock, levelCount))
 39 | }
 40 | 
 41 | func (logger SpyLogger) Log(level hclog.Level, msg string, args ...interface{}) {
 42 | 	logger.logs[level-1] = append(logger.getLogs(level), LogMock{msg, args})
 43 | }
 44 | 
 45 | func (logger SpyLogger) Trace(msg string, args ...interface{}) {
 46 | 	logger.Log(hclog.Trace, msg, args...)
 47 | }
 48 | 
 49 | func (logger SpyLogger) Debug(msg string, args ...interface{}) {
 50 | 	logger.Log(hclog.Debug, msg, args...)
 51 | }
 52 | 
 53 | func (logger SpyLogger) Info(msg string, args ...interface{}) {
 54 | 	logger.Log(hclog.Info, msg, args...)
 55 | }
 56 | 
 57 | func (logger SpyLogger) Warn(msg string, args ...interface{}) {
 58 | 	logger.Log(hclog.Warn, msg, args...)
 59 | }
 60 | 
 61 | func (logger SpyLogger) Error(msg string, args ...interface{}) {
 62 | 	logger.Log(hclog.Error, msg, args...)
 63 | }
 64 | 
 65 | func (logger SpyLogger) IsTrace() bool {
 66 | 	panic("implement me")
 67 | }
 68 | 
 69 | func (logger SpyLogger) IsDebug() bool {
 70 | 	panic("implement me")
 71 | }
 72 | 
 73 | func (logger SpyLogger) IsInfo() bool {
 74 | 	panic("implement me")
 75 | }
 76 | 
 77 | func (logger SpyLogger) IsWarn() bool {
 78 | 	panic("implement me")
 79 | }
 80 | 
 81 | func (logger SpyLogger) IsError() bool {
 82 | 	panic("implement me")
 83 | }
 84 | 
 85 | func (logger SpyLogger) ImpliedArgs() []interface{} {
 86 | 	panic("implement me")
 87 | }
 88 | 
 89 | func (logger SpyLogger) With(args ...interface{}) hclog.Logger {
 90 | 	panic("implement me")
 91 | }
 92 | 
 93 | func (logger SpyLogger) Name() string {
 94 | 	return "spy logger"
 95 | }
 96 | 
 97 | func (logger SpyLogger) Named(name string) hclog.Logger {
 98 | 	panic("implement me")
 99 | }
100 | 
101 | func (logger SpyLogger) ResetNamed(name string) hclog.Logger {
102 | 	panic("implement me")
103 | }
104 | 
105 | func (logger SpyLogger) SetLevel(level hclog.Level) {
106 | 	panic("implement me")
107 | }
108 | 
109 | func (logger SpyLogger) StandardLogger(opts *hclog.StandardLoggerOptions) *log.Logger {
110 | 	panic("implement me")
111 | }
112 | 
113 | func (logger SpyLogger) StandardWriter(opts *hclog.StandardLoggerOptions) io.Writer {
114 | 	panic("implement me")
115 | }
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Jaeger ClickHouse (experimental)
 2 | 
 3 | ⚠️ This module only implements grpc-plugin API that has been deprecated in Jaeger (https://github.com/jaegertracing/jaeger/issues/4647).
 4 | 
 5 | 🏗️ Jaeger v2 will support ClickHouse natively, see https://github.com/jaegertracing/jaeger/issues/5058 .
 6 | 
 7 | This is a [Jaeger gRPC storage plugin](https://github.com/jaegertracing/jaeger/tree/master/plugin/storage/grpc) implementation for storing traces in ClickHouse.
 8 | 
 9 | ## Project status
10 | 
11 | This is a community-driven project, and we would love to hear your issues and feature requests.
12 | Pull requests are also greatly appreciated.
13 | 
14 | ## Why use ClickHouse for Jaeger?
15 | 
16 | [ClickHouse](https://clickhouse.com) is an analytical column-oriented database management system.
17 | It is designed to analyze streams of events which are kind of resemblant to spans.
18 | It's open-source, optimized for performance, and actively developed.
19 | 
20 | ## How it works
21 | 
22 | Jaeger spans are stored in 2 tables. The first contains the whole span encoded either in JSON or Protobuf.
23 | The second stores key information about spans for searching. This table is indexed by span duration and tags.
24 | Also, info about operations is stored in the materialized view. There are not indexes for archived spans.
25 | Storing data in replicated local tables with distributed global tables is natively supported. Spans are bufferized.
26 | Span buffers are flushed to DB either by timer or after reaching max batch size. Timer interval and batch size can be
27 | set in [config file](./config.yaml).
28 | 
29 | Database schema generated by JetBrains DataGrip
30 | ![Picture of tables](./pictures/tables.png)
31 | 
32 | # How to start using Jaeger over ClickHouse
33 | 
34 | ## Documentation
35 | 
36 | Refer to the [config.yaml](./config.yaml) for all supported configuration options.
37 | 
38 | * [Kubernetes deployment](./guide-kubernetes.md)
39 | * [Sharding and replication](./guide-sharding-and-replication.md)
40 | * [Multi-tenancy](./guide-multitenancy.md)
41 | 
42 | ## Build & Run
43 | 
44 | ### Docker database example
45 | 
46 | ```bash
47 | docker run --rm -it -p9000:9000 --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server:22
48 | GOOS=linux make build run
49 | make run-hotrod
50 | ```
51 | 
52 | Open [localhost:16686](http://localhost:16686) and [localhost:8080](http://localhost:8080).
53 | 
54 | ### Custom database
55 | 
56 | You need to specify connection options in `config.yaml`, then you can run
57 | 
58 | ```bash
59 | make build
60 | SPAN_STORAGE_TYPE=grpc-plugin {Jaeger binary adress} --query.ui-config=jaeger-ui.json --grpc-storage-plugin.binary=./{name of built binary} --grpc-storage-plugin.configuration-file=config.yaml --grpc-storage-plugin.log-level=debug
61 | ```
62 | 
63 | ## Credits
64 | 
65 | This project is originally based on [this clickhouse plugin implementation](https://github.com/bobrik/jaeger/tree/ivan/clickhouse/plugin/storage/clickhouse).
66 | 
67 | See also [jaegertracing/jaeger/issues/1438](https://github.com/jaegertracing/jaeger/issues/1438) for historical discussion regarding the implementation of a ClickHouse plugin.
68 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | GOOS ?= $(shell go env GOOS)
  2 | GOARCH ?= $(shell go env GOARCH)
  3 | GOBUILD=CGO_ENABLED=0 installsuffix=cgo go build -trimpath
  4 | 
  5 | TOOLS_MOD_DIR = ./internal/tools
  6 | JAEGER_VERSION ?= 1.32.0
  7 | 
  8 | DOCKER_REPO ?= ghcr.io/jaegertracing/jaeger-clickhouse
  9 | DOCKER_TAG ?= latest
 10 | 
 11 | .PHONY: build
 12 | build:
 13 | 	${GOBUILD} -o jaeger-clickhouse-$(GOOS)-$(GOARCH) ./cmd/jaeger-clickhouse/main.go
 14 | 
 15 | .PHONY: build-linux-amd64
 16 | build-linux-amd64:
 17 | 	GOOS=linux GOARCH=amd64 $(MAKE) build
 18 | 
 19 | .PHONY: build-linux-arm64
 20 | build-linux-arm64:
 21 | 	GOOS=linux GOARCH=arm64 $(MAKE) build
 22 | 
 23 | .PHONY: build-darwin-amd64
 24 | build-darwin-amd64:
 25 | 	GOOS=darwin GOARCH=amd64 $(MAKE) build
 26 | 
 27 | .PHONY: build-darwin-arm64
 28 | build-darwin-arm64:
 29 | 	GOOS=darwin GOARCH=arm64 $(MAKE) build
 30 | 
 31 | .PHONY: build-all-platforms
 32 | build-all-platforms: build-linux-amd64 build-linux-arm64 build-darwin-amd64 build-darwin-arm64
 33 | 
 34 | .PHONY: e2e-tests
 35 | e2e-tests:
 36 | 	GOOS=linux GOARCH=amd64 $(MAKE) build
 37 | 	E2E_TEST=true go test ./e2etests... -v
 38 | 
 39 | .PHONY: run
 40 | run:
 41 | 	docker run --rm --name jaeger -e JAEGER_DISABLED=false --link some-clickhouse-server -it -u ${shell id -u} -p16686:16686 -p14250:14250 -p14268:14268 -p6831:6831/udp -v "${PWD}:/data" -e SPAN_STORAGE_TYPE=grpc-plugin jaegertracing/all-in-one:${JAEGER_VERSION} --query.ui-config=/data/jaeger-ui.json --grpc-storage-plugin.binary=/data/jaeger-clickhouse-$(GOOS)-$(GOARCH) --grpc-storage-plugin.configuration-file=/data/config.yaml --grpc-storage-plugin.log-level=debug
 42 | 
 43 | .PHONY: run-hotrod
 44 | run-hotrod:
 45 | 	docker run --rm --link jaeger --env JAEGER_AGENT_HOST=jaeger --env JAEGER_AGENT_PORT=6831 -p8080:8080 jaegertracing/example-hotrod:${JAEGER_VERSION} all
 46 | 
 47 | .PHONY: fmt
 48 | fmt:
 49 | 	go fmt ./...
 50 | 	goimports -w -local github.com/jaegertracing/jaeger-clickhouse ./
 51 | 
 52 | .PHONY: lint
 53 | lint:
 54 | 	golangci-lint -v run --allow-parallel-runners ./...
 55 | 
 56 | .PHONY: test
 57 | test:
 58 | 	go test ./...
 59 | 
 60 | .PHONY: integration-test
 61 | integration-test: build
 62 | 	STORAGE=grpc-plugin \
 63 | 	PLUGIN_BINARY_PATH=$(PWD)/jaeger-clickhouse-linux-amd64 \
 64 | 	PLUGIN_CONFIG_PATH=$(PWD)/integration/config-local.yaml \
 65 | 	go test ./integration
 66 | 
 67 | .PHONY: tar
 68 | tar:
 69 | 	tar -czvf jaeger-clickhouse-$(GOOS)-$(GOARCH).tar.gz  jaeger-clickhouse-$(GOOS)-$(GOARCH) config.yaml
 70 | 
 71 | .PHONY: tar-linux-amd64
 72 | tar-linux-amd64:
 73 | 	GOOS=linux GOARCH=amd64 $(MAKE) tar
 74 | 
 75 | .PHONY: tar-linux-arm64
 76 | tar-linux-arm64:
 77 | 	GOOS=linux GOARCH=arm64 $(MAKE) tar
 78 | 
 79 | .PHONY: tar-darwin-amd64
 80 | tar-darwin-amd64:
 81 | 	GOOS=darwin GOARCH=amd64 $(MAKE) tar
 82 | 
 83 | .PHONY: tar-darwin-arm64
 84 | tar-darwin-arm64:
 85 | 	GOOS=darwin GOARCH=arm64 $(MAKE) tar
 86 | 
 87 | .PHONY: tar-all-platforms
 88 | tar-all-platforms: tar-linux-amd64 tar-linux-arm64 tar-darwin-amd64 tar-darwin-arm64
 89 | 
 90 | .PHONY: docker
 91 | docker: build
 92 | 	docker build -t ${DOCKER_REPO}:${DOCKER_TAG} -f Dockerfile .
 93 | 
 94 | .PHONY: docker-push
 95 | docker-push: build
 96 | 	docker push ${DOCKER_REPO}:${DOCKER_TAG}
 97 | 
 98 | .PHONY: install-tools
 99 | install-tools:
100 | 	cd $(TOOLS_MOD_DIR) && go install golang.org/x/tools/cmd/goimports
101 | 	cd $(TOOLS_MOD_DIR) && go install github.com/golangci/golangci-lint/cmd/golangci-lint
102 | 
103 | 


--------------------------------------------------------------------------------
/integration/grpc_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019 The Jaeger Authors.
  2 | // Copyright (c) 2018 Uber Technologies, Inc.
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | // http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | package integration
 17 | 
 18 | import (
 19 | 	"os"
 20 | 	"testing"
 21 | 
 22 | 	"github.com/jaegertracing/jaeger/pkg/config"
 23 | 	"github.com/jaegertracing/jaeger/pkg/metrics"
 24 | 	"github.com/jaegertracing/jaeger/pkg/testutils"
 25 | 	"github.com/jaegertracing/jaeger/plugin/storage/grpc"
 26 | 	"github.com/jaegertracing/jaeger/plugin/storage/integration"
 27 | 	"github.com/stretchr/testify/require"
 28 | 	"go.uber.org/zap"
 29 | )
 30 | 
 31 | const defaultPluginBinaryPath = "../../../examples/memstore-plugin/memstore-plugin"
 32 | 
 33 | type GRPCStorageIntegrationTestSuite struct {
 34 | 	integration.StorageIntegration
 35 | 	logger           *zap.Logger
 36 | 	pluginBinaryPath string
 37 | 	pluginConfigPath string
 38 | }
 39 | 
 40 | func (s *GRPCStorageIntegrationTestSuite) initialize() error {
 41 | 	s.logger, _ = testutils.NewLogger()
 42 | 
 43 | 	f := grpc.NewFactory()
 44 | 	v, command := config.Viperize(f.AddFlags)
 45 | 	flags := []string{
 46 | 		"--grpc-storage-plugin.binary",
 47 | 		s.pluginBinaryPath,
 48 | 		"--grpc-storage-plugin.log-level",
 49 | 		"debug",
 50 | 	}
 51 | 	if s.pluginConfigPath != "" {
 52 | 		flags = append(flags,
 53 | 			"--grpc-storage-plugin.configuration-file",
 54 | 			s.pluginConfigPath,
 55 | 		)
 56 | 	}
 57 | 	err := command.ParseFlags(flags)
 58 | 	if err != nil {
 59 | 		return err
 60 | 	}
 61 | 	f.InitFromViper(v, zap.NewNop())
 62 | 	if err = f.Initialize(metrics.NullFactory, s.logger); err != nil {
 63 | 		return err
 64 | 	}
 65 | 
 66 | 	if s.SpanWriter, err = f.CreateSpanWriter(); err != nil {
 67 | 		return err
 68 | 	}
 69 | 	if s.SpanReader, err = f.CreateSpanReader(); err != nil {
 70 | 		return err
 71 | 	}
 72 | 
 73 | 	// TODO DependencyWriter is not implemented in grpc store
 74 | 
 75 | 	s.Refresh = s.refresh
 76 | 	s.CleanUp = s.cleanUp
 77 | 	return nil
 78 | }
 79 | 
 80 | func (s *GRPCStorageIntegrationTestSuite) refresh() error {
 81 | 	return nil
 82 | }
 83 | 
 84 | func (s *GRPCStorageIntegrationTestSuite) cleanUp() error {
 85 | 	return s.initialize()
 86 | }
 87 | 
 88 | func TestGRPCStorage(t *testing.T) {
 89 | 	if os.Getenv("STORAGE") != "grpc-plugin" {
 90 | 		t.Skip("Integration test against grpc skipped; set STORAGE env var to grpc-plugin to run this")
 91 | 	}
 92 | 	binaryPath := os.Getenv("PLUGIN_BINARY_PATH")
 93 | 	if binaryPath == "" {
 94 | 		t.Logf("PLUGIN_BINARY_PATH env var not set, using %s", defaultPluginBinaryPath)
 95 | 		binaryPath = defaultPluginBinaryPath
 96 | 	}
 97 | 	configPath := os.Getenv("PLUGIN_CONFIG_PATH")
 98 | 	if configPath == "" {
 99 | 		t.Log("PLUGIN_CONFIG_PATH env var not set")
100 | 	}
101 | 	s := &GRPCStorageIntegrationTestSuite{
102 | 		pluginBinaryPath: binaryPath,
103 | 		pluginConfigPath: configPath,
104 | 	}
105 | 	require.NoError(t, s.initialize())
106 | 	s.IntegrationTestAll(t)
107 | }
108 | 


--------------------------------------------------------------------------------
/blog/post1.md:
--------------------------------------------------------------------------------
 1 | # Jaeger ClickHouse
 2 | This is an implementation of Jaeger's [storage plugin](https://github.com/jaegertracing/jaeger/tree/master/plugin/storage/grpc) for ClickHouse.
 3 | See as well [jaegertracing/jaeger/issues/1438](https://github.com/jaegertracing/jaeger/issues/1438) for historical discussion regarding Clickhouse plugin.
 4 | 
 5 | ## Project status
 6 | 
 7 | Jaeger ClickHouse is a community-driven project, we would love to hear your feature requests.
 8 | Pull requests also will be greatly appreciated.
 9 | 
10 | ## Why use ClickHouse for Jaeger?
11 | 
12 | [ClickHouse](https://github.com/clickhouse/clickhouse) is an analytical column-oriented database management system. It is designed to analyze streams of clicks which are kind of resemblant to spans. It's open-source, optimized for performance, and actively developed.
13 | 
14 | ## How does it work?
15 | 
16 | Jaeger spans are stored in 2 tables. First one contains whole span encoded either in JSON or Protobuf.
17 | Second stores key information about spans for searching. This table is indexed by span duration and tags.
18 | Also, info about operations is stored in the materialized view. There are no indexes for archived spans.
19 | Storing data in replicated local tables with distributed global tables is natively supported. Spans are buffered.
20 | Span buffers are flushed to DB either by timer or after reaching max batch size.
21 | Timer interval and batch size can be set in [config file](../config.yaml).
22 | 
23 | ![Picture of tables](post1_pics/tables.png)
24 | 
25 | ## Benchmarks
26 | 
27 | 10^8 traces were flushed using [jaeger-tracegen](https://www.jaegertracing.io/docs/1.25/tools/) to Clickhouse and ElasticSearch servers.
28 | Clickhouse server consisted of 3 shards, 2 hosts in each, and 3 Zookeeper hosts. Elasticsearch server consisted of 6 hosts,
29 | with 5 shards for primary index and 1 replica. All hosts were equal(8 vCPU, 32 GiB RAM, 20 GiB SSD).
30 | 
31 | ### General stats
32 | 
33 | Cpu usage, [% of 1 host CPU]
34 | ![CPU usage](post1_pics/cpu-usage.png)
35 | 
36 | Memory usage, [bytes]
37 | ![Memory usage](post1_pics/memory-usage.png)
38 | 
39 | IO write, [operations]
40 | ![IO write](post1_pics/io-write.png)
41 | 
42 | Disk usage, [bytes]
43 | ![Disk usage](post1_pics/disk-usage.png)
44 | 
45 | ### Recorded
46 | 
47 | #### ClickHouse
48 | 
49 | ```sql
50 | SELECT count()
51 | FROM jaeger_index
52 | WHERE service = 'tracegen'
53 | 
54 | ┌──count()─┐
55 | │ 57026426 │
56 | └──────────┘
57 | ```
58 | 
59 | #### Elasticsearch
60 | 
61 | ![Elasticsearch span count](post1_pics/elastic-count.png?raw=true)
62 | 
63 | # How to start using Jaeger over ClickHouse
64 | 
65 | ## Documentation
66 | 
67 | Refer to the [config.yaml](../config.yaml) for all supported configuration options.
68 | 
69 | * [Kubernetes deployment](../guide-kubernetes.md)
70 | * [Sharding and replication](../guide-sharding-and-replication.md)
71 | * [Multi-tenancy](../guide-multitenancy.md)
72 | 
73 | ## Build & Run
74 | 
75 | ### Docker database example
76 | 
77 | ```bash
78 | docker run --rm -it -p9000:9000 --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server:22
79 | GOOS=linux make build run
80 | make run-hotrod
81 | ```
82 | 
83 | Open [localhost:16686](http://localhost:16686) and [localhost:8080](http://localhost:8080).
84 | 
85 | ### Custom database
86 | 
87 | You need to specify connection options in config.yaml file, then you can run
88 | 
89 | ```bash
90 | make build
91 | SPAN_STORAGE_TYPE=grpc-plugin {Jaeger binary adress} --query.ui-config=jaeger-ui.json --grpc-storage-plugin.binary=./{name of built binary} --grpc-storage-plugin.configuration-file=config.yaml --grpc-storage-plugin.log-level=debug
92 | ```
93 | 
94 | ## Credits
95 | 
96 | This project is based on https://github.com/bobrik/jaeger/tree/ivan/clickhouse/plugin/storage/clickhouse.
97 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/writer.go:
--------------------------------------------------------------------------------
  1 | package clickhousespanstore
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 	"sync"
  7 | 	"time"
  8 | 
  9 | 	hclog "github.com/hashicorp/go-hclog"
 10 | 	"github.com/jaegertracing/jaeger/model"
 11 | 	"github.com/jaegertracing/jaeger/storage/spanstore"
 12 | 	"github.com/prometheus/client_golang/prometheus"
 13 | )
 14 | 
 15 | type Encoding string
 16 | 
 17 | const (
 18 | 	// EncodingJSON is used for spans encoded as JSON.
 19 | 	EncodingJSON Encoding = "json"
 20 | 	// EncodingProto is used for spans encoded as Protobuf.
 21 | 	EncodingProto Encoding = "protobuf"
 22 | )
 23 | 
 24 | var (
 25 | 	numWritesWithBatchSize = prometheus.NewCounter(prometheus.CounterOpts{
 26 | 		Name: "jaeger_clickhouse_writes_with_batch_size_total",
 27 | 		Help: "Number of clickhouse writes due to batch size criteria",
 28 | 	})
 29 | 	numWritesWithFlushInterval = prometheus.NewCounter(prometheus.CounterOpts{
 30 | 		Name: "jaeger_clickhouse_writes_with_flush_interval_total",
 31 | 		Help: "Number of clickhouse writes due to flush interval criteria",
 32 | 	})
 33 | )
 34 | 
 35 | // SpanWriter for writing spans to ClickHouse
 36 | type SpanWriter struct {
 37 | 	workerParams WorkerParams
 38 | 
 39 | 	size   int64
 40 | 	spans  chan *model.Span
 41 | 	finish chan bool
 42 | 	done   sync.WaitGroup
 43 | }
 44 | 
 45 | var registerWriterMetrics sync.Once
 46 | var _ spanstore.Writer = (*SpanWriter)(nil)
 47 | 
 48 | // NewSpanWriter returns a SpanWriter for the database
 49 | func NewSpanWriter(
 50 | 	logger hclog.Logger,
 51 | 	db *sql.DB,
 52 | 	indexTable,
 53 | 	spansTable TableName,
 54 | 	tenant string,
 55 | 	encoding Encoding,
 56 | 	delay time.Duration,
 57 | 	size int64,
 58 | 	maxSpanCount int,
 59 | ) *SpanWriter {
 60 | 	writer := &SpanWriter{
 61 | 		workerParams: WorkerParams{
 62 | 			logger:     logger,
 63 | 			db:         db,
 64 | 			indexTable: indexTable,
 65 | 			spansTable: spansTable,
 66 | 			tenant:     tenant,
 67 | 			encoding:   encoding,
 68 | 			delay:      delay,
 69 | 		},
 70 | 		size:   size,
 71 | 		spans:  make(chan *model.Span, size),
 72 | 		finish: make(chan bool),
 73 | 	}
 74 | 
 75 | 	writer.registerMetrics()
 76 | 	go writer.backgroundWriter(maxSpanCount)
 77 | 
 78 | 	return writer
 79 | }
 80 | 
 81 | func (w *SpanWriter) registerMetrics() {
 82 | 	registerWriterMetrics.Do(func() {
 83 | 		prometheus.MustRegister(numWritesWithBatchSize)
 84 | 		prometheus.MustRegister(numWritesWithFlushInterval)
 85 | 	})
 86 | }
 87 | 
 88 | func (w *SpanWriter) backgroundWriter(maxSpanCount int) {
 89 | 	pool := NewWorkerPool(&w.workerParams, maxSpanCount)
 90 | 	go pool.Work()
 91 | 	batch := make([]*model.Span, 0, w.size)
 92 | 
 93 | 	timer := time.After(w.workerParams.delay)
 94 | 	last := time.Now()
 95 | 
 96 | 	for {
 97 | 		w.done.Add(1)
 98 | 
 99 | 		flush := false
100 | 		finish := false
101 | 
102 | 		select {
103 | 		case span := <-w.spans:
104 | 			batch = append(batch, span)
105 | 			flush = len(batch) == cap(batch)
106 | 			if flush {
107 | 				w.workerParams.logger.Debug("Flush due to batch size", "size", len(batch))
108 | 				numWritesWithBatchSize.Inc()
109 | 			}
110 | 		case <-timer:
111 | 			timer = time.After(w.workerParams.delay)
112 | 			flush = time.Since(last) > w.workerParams.delay && len(batch) > 0
113 | 			if flush {
114 | 				w.workerParams.logger.Debug("Flush due to timer")
115 | 				numWritesWithFlushInterval.Inc()
116 | 			}
117 | 		case <-w.finish:
118 | 			finish = true
119 | 			flush = len(batch) > 0
120 | 			w.workerParams.logger.Debug("Finish channel")
121 | 		}
122 | 
123 | 		if flush {
124 | 			pool.WriteBatch(batch)
125 | 
126 | 			batch = make([]*model.Span, 0, w.size)
127 | 			last = time.Now()
128 | 		}
129 | 
130 | 		if finish {
131 | 			pool.Close()
132 | 		}
133 | 		w.done.Done()
134 | 
135 | 		if finish {
136 | 			break
137 | 		}
138 | 	}
139 | }
140 | 
141 | // WriteSpan writes the encoded span
142 | func (w *SpanWriter) WriteSpan(_ context.Context, span *model.Span) error {
143 | 	w.spans <- span
144 | 	return nil
145 | }
146 | 
147 | // Close Implements io.Closer and closes the underlying storage
148 | func (w *SpanWriter) Close() error {
149 | 	w.finish <- true
150 | 	w.done.Wait()
151 | 	return nil
152 | }
153 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/pool.go:
--------------------------------------------------------------------------------
  1 | package clickhousespanstore
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"sync"
  6 | 
  7 | 	"github.com/jaegertracing/jaeger/model"
  8 | 	"github.com/prometheus/client_golang/prometheus"
  9 | )
 10 | 
 11 | var (
 12 | 	numDiscardedSpans = prometheus.NewCounter(prometheus.CounterOpts{
 13 | 		Name: "jaeger_clickhouse_discarded_spans",
 14 | 		Help: "Count of spans that have been discarded due to pending writes exceeding max_span_count",
 15 | 	})
 16 | 	numPendingSpans = prometheus.NewGauge(prometheus.GaugeOpts{
 17 | 		Name: "jaeger_clickhouse_pending_spans",
 18 | 		Help: "Number of spans that are currently pending, counts against max_span_count",
 19 | 	})
 20 | )
 21 | 
 22 | // WriteWorkerPool is a worker pool for writing batches of spans.
 23 | // Given a new batch, WriteWorkerPool creates a new WriteWorker.
 24 | // If the number of currently processed spans if more than maxSpanCount, then the oldest worker is removed.
 25 | type WriteWorkerPool struct {
 26 | 	params *WorkerParams
 27 | 
 28 | 	finish  chan bool
 29 | 	done    sync.WaitGroup
 30 | 	batches chan []*model.Span
 31 | 
 32 | 	maxSpanCount int
 33 | 	mutex        sync.Mutex
 34 | 	workers      workerHeap
 35 | 	workerDone   chan *WriteWorker
 36 | }
 37 | 
 38 | var registerPoolMetrics sync.Once
 39 | 
 40 | func NewWorkerPool(params *WorkerParams, maxSpanCount int) WriteWorkerPool {
 41 | 	registerPoolMetrics.Do(func() {
 42 | 		prometheus.MustRegister(numDiscardedSpans, numPendingSpans)
 43 | 	})
 44 | 
 45 | 	return WriteWorkerPool{
 46 | 		params:  params,
 47 | 		finish:  make(chan bool),
 48 | 		done:    sync.WaitGroup{},
 49 | 		batches: make(chan []*model.Span),
 50 | 
 51 | 		mutex:      sync.Mutex{},
 52 | 		workers:    newWorkerHeap(100),
 53 | 		workerDone: make(chan *WriteWorker),
 54 | 
 55 | 		maxSpanCount: maxSpanCount,
 56 | 	}
 57 | }
 58 | 
 59 | func (pool *WriteWorkerPool) Work() {
 60 | 	finish := false
 61 | 	nextWorkerID := int32(1)
 62 | 	pendingSpanCount := 0
 63 | 	for {
 64 | 		// Initialize to zero, or update value from previous loop
 65 | 		numPendingSpans.Set(float64(pendingSpanCount))
 66 | 
 67 | 		pool.done.Add(1)
 68 | 		select {
 69 | 		case batch := <-pool.batches:
 70 | 			batchSize := len(batch)
 71 | 			if pool.checkLimit(pendingSpanCount, batchSize) {
 72 | 				// Limit disabled or batch fits within limit, write the batch.
 73 | 				worker := WriteWorker{
 74 | 					workerID: nextWorkerID,
 75 | 
 76 | 					params: pool.params,
 77 | 					batch:  batch,
 78 | 
 79 | 					finish:     make(chan bool),
 80 | 					workerDone: pool.workerDone,
 81 | 					done:       sync.WaitGroup{},
 82 | 				}
 83 | 				if nextWorkerID == math.MaxInt32 {
 84 | 					nextWorkerID = 1
 85 | 				} else {
 86 | 					nextWorkerID++
 87 | 				}
 88 | 				pool.workers.AddWorker(&worker)
 89 | 				pendingSpanCount += batchSize
 90 | 				go worker.Work()
 91 | 			} else {
 92 | 				// Limit exceeded, complain
 93 | 				numDiscardedSpans.Add(float64(batchSize))
 94 | 				pool.params.logger.Error("Discarding batch of spans due to exceeding pending span count", "batch_size", batchSize, "pending_span_count", pendingSpanCount, "max_span_count", pool.maxSpanCount)
 95 | 			}
 96 | 		case worker := <-pool.workerDone:
 97 | 			// The worker has finished, subtract its work from the count and clean it from the heap.
 98 | 			pendingSpanCount -= len(worker.batch)
 99 | 			if err := pool.workers.RemoveWorker(worker); err != nil {
100 | 				pool.params.logger.Error("could not remove worker", "worker", worker, "error", err)
101 | 			}
102 | 		case <-pool.finish:
103 | 			pool.workers.CloseWorkers()
104 | 			finish = true
105 | 		}
106 | 		pool.done.Done()
107 | 
108 | 		if finish {
109 | 			break
110 | 		}
111 | 	}
112 | }
113 | 
114 | func (pool *WriteWorkerPool) WriteBatch(batch []*model.Span) {
115 | 	pool.batches <- batch
116 | }
117 | 
118 | func (pool *WriteWorkerPool) Close() {
119 | 	pool.finish <- true
120 | 	pool.done.Wait()
121 | }
122 | 
123 | // checkLimit returns whether batchSize fits within the maxSpanCount
124 | func (pool *WriteWorkerPool) checkLimit(pendingSpanCount int, batchSize int) bool {
125 | 	if pool.maxSpanCount <= 0 {
126 | 		return true
127 | 	}
128 | 
129 | 	// Check limit, add batchSize if within limit
130 | 	return pendingSpanCount+batchSize <= pool.maxSpanCount
131 | }
132 | 


--------------------------------------------------------------------------------
/storage/config_test.go:
--------------------------------------------------------------------------------
  1 | package storage
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 
  9 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousespanstore"
 10 | )
 11 | 
 12 | func TestSetDefaults(t *testing.T) {
 13 | 	tests := map[string]struct {
 14 | 		replication bool
 15 | 		getField    func(Configuration) interface{}
 16 | 		expected    interface{}
 17 | 	}{
 18 | 		"username": {
 19 | 			getField: func(config Configuration) interface{} { return config.Username },
 20 | 			expected: defaultUsername,
 21 | 		},
 22 | 		"database name": {
 23 | 			getField: func(config Configuration) interface{} { return config.Database },
 24 | 			expected: defaultDatabaseName,
 25 | 		},
 26 | 		"encoding": {
 27 | 			getField: func(config Configuration) interface{} { return config.Encoding },
 28 | 			expected: defaultEncoding,
 29 | 		},
 30 | 		"batch write size": {
 31 | 			getField: func(config Configuration) interface{} { return config.BatchWriteSize },
 32 | 			expected: defaultBatchSize,
 33 | 		},
 34 | 		"batch flush interval": {
 35 | 			getField: func(config Configuration) interface{} { return config.BatchFlushInterval },
 36 | 			expected: defaultBatchDelay,
 37 | 		},
 38 | 		"max span count": {
 39 | 			getField: func(config Configuration) interface{} { return config.MaxSpanCount },
 40 | 			expected: defaultMaxSpanCount,
 41 | 		},
 42 | 		"metrics endpoint": {
 43 | 			getField: func(config Configuration) interface{} { return config.MetricsEndpoint },
 44 | 			expected: defaultMetricsEndpoint,
 45 | 		},
 46 | 		"spans table name local": {
 47 | 			getField: func(config Configuration) interface{} { return config.SpansTable },
 48 | 			expected: defaultSpansTable.ToLocal(),
 49 | 		},
 50 | 		"spans table name replication": {
 51 | 			replication: true,
 52 | 			getField:    func(config Configuration) interface{} { return config.SpansTable },
 53 | 			expected:    defaultSpansTable,
 54 | 		},
 55 | 		"index table name local": {
 56 | 			getField: func(config Configuration) interface{} { return config.SpansIndexTable },
 57 | 			expected: defaultSpansIndexTable.ToLocal(),
 58 | 		},
 59 | 		"index table name replication": {
 60 | 			replication: true,
 61 | 			getField:    func(config Configuration) interface{} { return config.SpansIndexTable },
 62 | 			expected:    defaultSpansIndexTable,
 63 | 		},
 64 | 		"operations table name local": {
 65 | 			getField: func(config Configuration) interface{} { return config.OperationsTable },
 66 | 			expected: defaultOperationsTable.ToLocal(),
 67 | 		},
 68 | 		"operations table name replication": {
 69 | 			replication: true,
 70 | 			getField:    func(config Configuration) interface{} { return config.OperationsTable },
 71 | 			expected:    defaultOperationsTable,
 72 | 		},
 73 | 		"max number spans": {
 74 | 			getField: func(config Configuration) interface{} { return config.MaxNumSpans },
 75 | 			expected: defaultMaxNumSpans,
 76 | 		},
 77 | 	}
 78 | 
 79 | 	for name, test := range tests {
 80 | 		t.Run(fmt.Sprintf("default %s", name), func(t *testing.T) {
 81 | 			config := Configuration{Replication: test.replication}
 82 | 			config.setDefaults()
 83 | 			assert.EqualValues(t, test.expected, test.getField(config))
 84 | 		})
 85 | 	}
 86 | }
 87 | 
 88 | func TestConfiguration_GetSpansArchiveTable(t *testing.T) {
 89 | 	tests := map[string]struct {
 90 | 		config                        Configuration
 91 | 		expectedSpansArchiveTableName clickhousespanstore.TableName
 92 | 	}{
 93 | 		"default_config_local":       {config: Configuration{}, expectedSpansArchiveTableName: (defaultSpansTable + "_archive").ToLocal()},
 94 | 		"default_config_replication": {config: Configuration{Replication: true}, expectedSpansArchiveTableName: defaultSpansTable + "_archive"},
 95 | 		"custom_spans_table":         {config: Configuration{SpansTable: "custom_table_name"}, expectedSpansArchiveTableName: "custom_table_name_archive"},
 96 | 	}
 97 | 
 98 | 	for name, test := range tests {
 99 | 		t.Run(name, func(t *testing.T) {
100 | 			test.config.setDefaults()
101 | 			assert.Equal(t, test.expectedSpansArchiveTableName, test.config.GetSpansArchiveTable())
102 | 		})
103 | 	}
104 | }
105 | 
106 | func TestConfiguration_InitTables(test *testing.T) {
107 | 	// for pointers below
108 | 	t := true
109 | 	f := false
110 | 	tests := map[string]struct {
111 | 		config             Configuration
112 | 		expectedInitTables bool
113 | 	}{
114 | 		"scriptsempty_initnil":      {config: Configuration{}, expectedInitTables: true},
115 | 		"scriptsprovided_initnil":   {config: Configuration{InitSQLScriptsDir: "hello"}, expectedInitTables: false},
116 | 		"scriptsempty_inittrue":     {config: Configuration{InitTables: &t}, expectedInitTables: true},
117 | 		"scriptsprovided_inittrue":  {config: Configuration{InitSQLScriptsDir: "hello", InitTables: &t}, expectedInitTables: true},
118 | 		"scriptsempty_initfalse":    {config: Configuration{InitTables: &f}, expectedInitTables: false},
119 | 		"scriptsprovided_initfalse": {config: Configuration{InitSQLScriptsDir: "hello", InitTables: &f}, expectedInitTables: false},
120 | 	}
121 | 
122 | 	for name, testcase := range tests {
123 | 		test.Run(name, func(t *testing.T) {
124 | 			testcase.config.setDefaults()
125 | 			assert.Equal(t, testcase.expectedInitTables, *(testcase.config.InitTables))
126 | 		})
127 | 	}
128 | }
129 | 


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
  1 | # options for analysis running
  2 | run:
  3 |   # default concurrency is a available CPU number
  4 |   concurrency: 4
  5 | 
  6 |   # timeout for analysis, e.g. 30s, 5m, default is 1m
  7 |   timeout: 10m
  8 | 
  9 |   # exit code when at least one issue was found, default is 1
 10 |   issues-exit-code: 1
 11 | 
 12 |   # include test files or not, default is true
 13 |   tests: true
 14 | 
 15 |   # which dirs to skip: issues from them won't be reported;
 16 |   # can use regexp here: generated.*, regexp is applied on full path;
 17 |   # default value is empty list, but default dirs are skipped independently
 18 |   # from this option's value (see skip-dirs-use-default).
 19 |   skip-dirs:
 20 | 
 21 |   # default is true. Enables skipping of directories:
 22 |   #   vendor$, third_party$, testdata$, examples$, Godeps$, builtin$
 23 |   skip-dirs-use-default: false
 24 | 
 25 |   # which files to skip: they will be analyzed, but issues from them
 26 |   # won't be reported. Default value is empty list, but there is
 27 |   # no need to include all autogenerated files, we confidently recognize
 28 |   # autogenerated files. If it's not please let us know.
 29 |   skip-files:
 30 | 
 31 |   # by default isn't set. If set we pass it to "go list -mod={option}". From "go help modules":
 32 |   # If invoked with -mod=readonly, the go command is disallowed from the implicit
 33 |   # automatic updating of go.mod described above. Instead, it fails when any changes
 34 |   # to go.mod are needed. This setting is most useful to check that go.mod does
 35 |   # not need updates, such as in a continuous integration and testing system.
 36 |   # If invoked with -mod=vendor, the go command assumes that the vendor
 37 |   # directory holds the correct copies of dependencies and ignores
 38 |   # the dependency descriptions in go.mod.
 39 |   modules-download-mode: readonly
 40 | 
 41 | # output configuration options
 42 | output:
 43 |   # colored-line-number|line-number|json|tab|checkstyle|code-climate, default is "colored-line-number"
 44 |   format: colored-line-number
 45 | 
 46 |   # print lines of code with issue, default is true
 47 |   print-issued-lines: true
 48 | 
 49 |   # print linter name in the end of issue text, default is true
 50 |   print-linter-name: true
 51 | 
 52 | # all available settings of specific linters
 53 | linters-settings:
 54 |   govet:
 55 |     # report about shadowed variables
 56 |     check-shadowing: true
 57 | 
 58 |     # settings per analyzer
 59 |     settings:
 60 |       printf: # analyzer name, run `go tool vet help` to see all analyzers
 61 |         funcs: # run `go tool vet help printf` to see available settings for `printf` analyzer
 62 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof
 63 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf
 64 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
 65 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf
 66 | 
 67 |     enable-all: true
 68 |     # TODO: Enable this and fix the alignment issues.
 69 |     disable:
 70 |       - fieldalignment
 71 | 
 72 |   revive:
 73 |     # minimal confidence for issues, default is 0.8
 74 |     min-confidence: 0.8
 75 | 
 76 |   gofmt:
 77 |     # simplify code: gofmt with `-s` option, true by default
 78 |     simplify: true
 79 | 
 80 |   goimports:
 81 |     # put imports beginning with prefix after 3rd-party packages;
 82 |     # it's a comma-separated list of prefixes
 83 |     local-prefixes: go.opentelemetry.io/collector
 84 | 
 85 |   misspell:
 86 |     # Correct spellings using locale preferences for US or UK.
 87 |     # Default is to use a neutral variety of English.
 88 |     # Setting locale to US will correct the British spelling of 'colour' to 'color'.
 89 |     locale: US
 90 |     ignore-words:
 91 |       - cancelled
 92 |       - metre
 93 |       - meter
 94 |       - metres
 95 |       - kilometre
 96 |       - kilometres
 97 | 
 98 | linters:
 99 |   disable:
100 |     - errcheck
101 |   enable:
102 |     - exportloopref
103 |     - gocritic
104 |     - gofmt
105 |     - goimports
106 |     - gosec
107 |     - govet
108 |     - misspell
109 |     - revive
110 |     - staticcheck
111 |     - unconvert
112 |     - unparam
113 | 
114 | issues:
115 |   # Excluding configuration per-path, per-linter, per-text and per-source
116 |   exclude-rules:
117 |     # Exclude some linters from running on tests files.
118 |     - path: otlp_test.go
119 |       linters:
120 |         # See https://github.com/golangci/golangci-lint/issues/537#issuecomment-545170007
121 |         - structcheck
122 |     - text: "G404:"
123 |       linters:
124 |         - gosec
125 |     - text: "G402:"
126 |       linters:
127 |         - gosec
128 |     - path: grpc_test.go
129 |       linters:
130 |         # See https://github.com/golangci/golangci-lint/issues/2286
131 |         - typecheck
132 | 
133 |   # The list of ids of default excludes to include or disable. By default it's empty.
134 |   # See the list of default excludes here https://golangci-lint.run/usage/configuration.
135 |   include:
136 |     - EXC0001
137 |     - EXC0002
138 |     - EXC0003
139 |     - EXC0004
140 |     - EXC0005
141 |     - EXC0006
142 |     - EXC0007
143 |     # - EXC0008 - Duplicated errcheck checks
144 |     - EXC0009
145 |     - EXC0010
146 |     - EXC0011
147 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
  1 | module github.com/jaegertracing/jaeger-clickhouse
  2 | 
  3 | go 1.19
  4 | 
  5 | require (
  6 | 	github.com/ClickHouse/clickhouse-go/v2 v2.3.0
  7 | 	github.com/DATA-DOG/go-sqlmock v1.5.0
  8 | 	github.com/ecodia/golang-awaitility v0.0.0-20180710094957-fb55e59708c7
  9 | 	github.com/gogo/protobuf v1.3.2
 10 | 	github.com/hashicorp/go-hclog v1.3.1
 11 | 	github.com/jaegertracing/jaeger v1.38.2-0.20221007043206-b4c88ddf6cdd
 12 | 	github.com/opentracing/opentracing-go v1.2.0
 13 | 	github.com/prometheus/client_golang v1.13.0
 14 | 	github.com/stretchr/testify v1.8.0
 15 | 	github.com/testcontainers/testcontainers-go v0.11.1
 16 | 	go.uber.org/zap v1.23.0
 17 | 	gopkg.in/yaml.v3 v3.0.1
 18 | )
 19 | 
 20 | require (
 21 | 	github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
 22 | 	github.com/ClickHouse/ch-go v0.47.3 // indirect
 23 | 	github.com/Microsoft/go-winio v0.4.17-0.20210211115548-6eac466e5fa3 // indirect
 24 | 	github.com/Microsoft/hcsshim v0.8.16 // indirect
 25 | 	github.com/andybalholm/brotli v1.0.4 // indirect
 26 | 	github.com/benbjohnson/clock v1.3.0 // indirect
 27 | 	github.com/beorn7/perks v1.0.1 // indirect
 28 | 	github.com/cenkalti/backoff v2.2.1+incompatible // indirect
 29 | 	github.com/cespare/xxhash/v2 v2.1.2 // indirect
 30 | 	github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68 // indirect
 31 | 	github.com/containerd/containerd v1.5.0-beta.4 // indirect
 32 | 	github.com/davecgh/go-spew v1.1.1 // indirect
 33 | 	github.com/docker/distribution v2.7.1+incompatible // indirect
 34 | 	github.com/docker/docker v20.10.7+incompatible // indirect
 35 | 	github.com/docker/go-connections v0.4.0 // indirect
 36 | 	github.com/docker/go-units v0.4.0 // indirect
 37 | 	github.com/fatih/color v1.13.0 // indirect
 38 | 	github.com/fsnotify/fsnotify v1.5.4 // indirect
 39 | 	github.com/go-faster/city v1.0.1 // indirect
 40 | 	github.com/go-faster/errors v0.6.1 // indirect
 41 | 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 42 | 	github.com/golang/protobuf v1.5.2 // indirect
 43 | 	github.com/google/uuid v1.3.0 // indirect
 44 | 	github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645 // indirect
 45 | 	github.com/hashicorp/go-plugin v1.4.5 // indirect
 46 | 	github.com/hashicorp/hcl v1.0.0 // indirect
 47 | 	github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87 // indirect
 48 | 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
 49 | 	github.com/klauspost/compress v1.15.10 // indirect
 50 | 	github.com/kr/pretty v0.3.0 // indirect
 51 | 	github.com/kr/text v0.2.0 // indirect
 52 | 	github.com/magiconair/properties v1.8.6 // indirect
 53 | 	github.com/mattn/go-colorable v0.1.12 // indirect
 54 | 	github.com/mattn/go-isatty v0.0.14 // indirect
 55 | 	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
 56 | 	github.com/mitchellh/go-testing-interface v1.14.1 // indirect
 57 | 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 58 | 	github.com/moby/sys/mount v0.2.0 // indirect
 59 | 	github.com/moby/sys/mountinfo v0.4.1 // indirect
 60 | 	github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect
 61 | 	github.com/morikuni/aec v0.0.0-20170113033406-39771216ff4c // indirect
 62 | 	github.com/oklog/run v1.1.0 // indirect
 63 | 	github.com/opencontainers/go-digest v1.0.0 // indirect
 64 | 	github.com/opencontainers/image-spec v1.0.1 // indirect
 65 | 	github.com/opencontainers/runc v1.0.0-rc93 // indirect
 66 | 	github.com/paulmach/orb v0.7.1 // indirect
 67 | 	github.com/pelletier/go-toml v1.9.5 // indirect
 68 | 	github.com/pelletier/go-toml/v2 v2.0.5 // indirect
 69 | 	github.com/pierrec/lz4/v4 v4.1.15 // indirect
 70 | 	github.com/pkg/errors v0.9.1 // indirect
 71 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
 72 | 	github.com/prometheus/client_model v0.2.0 // indirect
 73 | 	github.com/prometheus/common v0.37.0 // indirect
 74 | 	github.com/prometheus/procfs v0.8.0 // indirect
 75 | 	github.com/rogpeppe/go-internal v1.8.1 // indirect
 76 | 	github.com/segmentio/asm v1.2.0 // indirect
 77 | 	github.com/shopspring/decimal v1.3.1 // indirect
 78 | 	github.com/sirupsen/logrus v1.8.1 // indirect
 79 | 	github.com/spf13/afero v1.8.2 // indirect
 80 | 	github.com/spf13/cast v1.5.0 // indirect
 81 | 	github.com/spf13/cobra v1.5.0 // indirect
 82 | 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 83 | 	github.com/spf13/pflag v1.0.5 // indirect
 84 | 	github.com/spf13/viper v1.13.0 // indirect
 85 | 	github.com/subosito/gotenv v1.4.1 // indirect
 86 | 	github.com/uber/jaeger-client-go v2.30.0+incompatible // indirect
 87 | 	github.com/uber/jaeger-lib v2.4.1+incompatible // indirect
 88 | 	go.opencensus.io v0.23.0 // indirect
 89 | 	go.opentelemetry.io/otel v1.10.0 // indirect
 90 | 	go.opentelemetry.io/otel/trace v1.10.0 // indirect
 91 | 	go.uber.org/atomic v1.10.0 // indirect
 92 | 	go.uber.org/multierr v1.8.0 // indirect
 93 | 	golang.org/x/net v0.0.0-20221002022538-bcab6841153b // indirect
 94 | 	golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec // indirect
 95 | 	golang.org/x/text v0.3.7 // indirect
 96 | 	google.golang.org/genproto v0.0.0-20220822174746-9e6da59bd2fc // indirect
 97 | 	google.golang.org/grpc v1.50.0 // indirect
 98 | 	google.golang.org/protobuf v1.28.1 // indirect
 99 | 	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
100 | 	gopkg.in/ini.v1 v1.67.0 // indirect
101 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
102 | )
103 | 


--------------------------------------------------------------------------------
/storage/store_test.go:
--------------------------------------------------------------------------------
  1 | package storage
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"fmt"
  6 | 	"testing"
  7 | 
  8 | 	sqlmock "github.com/DATA-DOG/go-sqlmock"
  9 | 	hclog "github.com/hashicorp/go-hclog"
 10 | 	"github.com/stretchr/testify/assert"
 11 | 	"github.com/stretchr/testify/require"
 12 | 
 13 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousedependencystore"
 14 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousespanstore"
 15 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousespanstore/mocks"
 16 | )
 17 | 
 18 | const (
 19 | 	testIndexTable        = "test_index_table"
 20 | 	testSpansTable        = "test_spans_table"
 21 | 	testOperationsTable   = "test_operation_table"
 22 | 	testSpansArchiveTable = "test_spans_archive_table"
 23 | )
 24 | 
 25 | var errorMock = fmt.Errorf("error mock")
 26 | 
 27 | func TestStore_SpanWriter(t *testing.T) {
 28 | 	writer := clickhousespanstore.SpanWriter{}
 29 | 	store := Store{
 30 | 		writer: &writer,
 31 | 	}
 32 | 	assert.Equal(t, &writer, store.SpanWriter())
 33 | }
 34 | 
 35 | func TestStore_ArchiveSpanWriter(t *testing.T) {
 36 | 	writer := clickhousespanstore.SpanWriter{}
 37 | 	store := Store{
 38 | 		archiveWriter: &writer,
 39 | 	}
 40 | 	assert.Equal(t, &writer, store.ArchiveSpanWriter())
 41 | }
 42 | 
 43 | func TestStore_SpanReader(t *testing.T) {
 44 | 	reader := clickhousespanstore.TraceReader{}
 45 | 	store := Store{
 46 | 		reader: &reader,
 47 | 	}
 48 | 	assert.Equal(t, &reader, store.SpanReader())
 49 | }
 50 | 
 51 | func TestStore_ArchiveSpanReader(t *testing.T) {
 52 | 	reader := clickhousespanstore.TraceReader{}
 53 | 	store := Store{
 54 | 		archiveReader: &reader,
 55 | 	}
 56 | 	assert.Equal(t, &reader, store.ArchiveSpanReader())
 57 | }
 58 | 
 59 | func TestStore_DependencyReader(t *testing.T) {
 60 | 	store := Store{}
 61 | 	assert.Equal(t, &clickhousedependencystore.DependencyStore{}, store.DependencyReader())
 62 | }
 63 | 
 64 | func TestStore_Close(t *testing.T) {
 65 | 	db, mock, err := mocks.GetDbMock()
 66 | 	require.NoError(t, err)
 67 | 	defer db.Close()
 68 | 
 69 | 	logger := mocks.NewSpyLogger()
 70 | 	store := newStore(db, logger)
 71 | 
 72 | 	mock.ExpectClose()
 73 | 	require.NoError(t, store.Close())
 74 | 	assert.NoError(t, mock.ExpectationsWereMet())
 75 | 	logger.AssertLogsEmpty(t)
 76 | }
 77 | 
 78 | func newStore(db *sql.DB, logger mocks.SpyLogger) Store {
 79 | 	return Store{
 80 | 		db: db,
 81 | 		writer: clickhousespanstore.NewSpanWriter(
 82 | 			logger,
 83 | 			db,
 84 | 			testIndexTable,
 85 | 			testSpansTable,
 86 | 			"",
 87 | 			clickhousespanstore.EncodingJSON,
 88 | 			0,
 89 | 			0,
 90 | 			0,
 91 | 		),
 92 | 		reader: clickhousespanstore.NewTraceReader(
 93 | 			db,
 94 | 			testOperationsTable,
 95 | 			testIndexTable,
 96 | 			testSpansTable,
 97 | 			"",
 98 | 			0,
 99 | 		),
100 | 		archiveWriter: clickhousespanstore.NewSpanWriter(
101 | 			logger,
102 | 			db,
103 | 			testIndexTable,
104 | 			testSpansArchiveTable,
105 | 			"",
106 | 			clickhousespanstore.EncodingJSON,
107 | 			0,
108 | 			0,
109 | 			0,
110 | 		),
111 | 		archiveReader: clickhousespanstore.NewTraceReader(
112 | 			db,
113 | 			testOperationsTable,
114 | 			testIndexTable,
115 | 			testSpansArchiveTable,
116 | 			"",
117 | 			0,
118 | 		),
119 | 	}
120 | }
121 | 
122 | func TestStore_executeScripts(t *testing.T) {
123 | 	db, mock, err := mocks.GetDbMock()
124 | 	require.NoError(t, err)
125 | 	defer db.Close()
126 | 
127 | 	spyLogger := mocks.NewSpyLogger()
128 | 	scripts := []string{
129 | 		"first SQL script",
130 | 		"second_SQL_script",
131 | 	}
132 | 
133 | 	mock.ExpectBegin()
134 | 	for _, script := range scripts {
135 | 		mock.ExpectExec(script).WillReturnResult(sqlmock.NewResult(1, 1))
136 | 	}
137 | 	mock.ExpectCommit()
138 | 	err = executeScripts(spyLogger, scripts, db)
139 | 	require.NoError(t, err)
140 | 	assert.NoError(t, mock.ExpectationsWereMet())
141 | 	spyLogger.AssertLogsOfLevelEqual(t, hclog.Debug, func() []mocks.LogMock {
142 | 		res := make([]mocks.LogMock, len(scripts))
143 | 		for i, script := range scripts {
144 | 			res[i] = mocks.LogMock{Msg: "Running SQL statement", Args: []interface{}{"statement", script}}
145 | 		}
146 | 		return res
147 | 	}())
148 | }
149 | 
150 | func TestStore_executeScriptsExecuteError(t *testing.T) {
151 | 	db, mock, err := mocks.GetDbMock()
152 | 	require.NoError(t, err)
153 | 	defer db.Close()
154 | 
155 | 	spyLogger := mocks.NewSpyLogger()
156 | 	scripts := []string{
157 | 		"first SQL script",
158 | 		"second_SQL_script",
159 | 	}
160 | 
161 | 	mock.ExpectBegin()
162 | 	mock.ExpectExec(scripts[0]).WillReturnError(errorMock)
163 | 	mock.ExpectRollback()
164 | 	err = executeScripts(spyLogger, scripts, db)
165 | 	assert.EqualError(t, err, fmt.Sprintf("could not run sql %q: %q", scripts[0], errorMock))
166 | 	spyLogger.AssertLogsOfLevelEqual(
167 | 		t,
168 | 		hclog.Debug,
169 | 		[]mocks.LogMock{{Msg: "Running SQL statement", Args: []interface{}{"statement", scripts[0]}}},
170 | 	)
171 | }
172 | 
173 | func TestStore_executeScriptBeginError(t *testing.T) {
174 | 	db, mock, err := mocks.GetDbMock()
175 | 	require.NoError(t, err)
176 | 	defer db.Close()
177 | 
178 | 	spyLogger := mocks.NewSpyLogger()
179 | 	scripts := []string{
180 | 		"first SQL script",
181 | 		"second_SQL_script",
182 | 	}
183 | 
184 | 	mock.ExpectBegin().WillReturnError(errorMock)
185 | 	err = executeScripts(spyLogger, scripts, db)
186 | 	assert.EqualError(t, err, errorMock.Error())
187 | }
188 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/mocks/spylogger_test.go:
--------------------------------------------------------------------------------
  1 | package mocks
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"strconv"
  6 | 	"testing"
  7 | 
  8 | 	hclog "github.com/hashicorp/go-hclog"
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | const (
 13 | 	maxLogCount = 80
 14 | 	maxArgCount = 10
 15 | )
 16 | 
 17 | func TestSpyLogger_AssertLogsEmpty(t *testing.T) {
 18 | 	logger := NewSpyLogger()
 19 | 	logger.AssertLogsEmpty(t)
 20 | }
 21 | 
 22 | func TestSpyLogger_AssertLogsOfLevelEqualNoArgs(t *testing.T) {
 23 | 	logger := NewSpyLogger()
 24 | 	var logs = make([][]LogMock, levelCount)
 25 | 	for level, levelLogs := range logs {
 26 | 		logsCount := rand.Intn(maxLogCount)
 27 | 		for i := 0; i < logsCount; i++ {
 28 | 			msg := "msg" + strconv.FormatUint(rand.Uint64(), 10)
 29 | 			levelLogs = append(levelLogs, LogMock{Msg: msg})
 30 | 			logger.Log(hclog.Level(level+1), msg)
 31 | 		}
 32 | 		logs[level] = levelLogs
 33 | 	}
 34 | 
 35 | 	for level, levelLogs := range logs {
 36 | 		logger.AssertLogsOfLevelEqual(t, hclog.Level(level+1), levelLogs)
 37 | 	}
 38 | }
 39 | 
 40 | func TestSpyLogger_AssertLogsOfLevelEqualArgs(t *testing.T) {
 41 | 	logger := NewSpyLogger()
 42 | 	var logs = make([][]LogMock, levelCount)
 43 | 	for level, levelLogs := range logs {
 44 | 		logsCount := rand.Intn(maxLogCount)
 45 | 		for i := 0; i < logsCount; i++ {
 46 | 			msg := "msg" + strconv.FormatUint(rand.Uint64(), 10)
 47 | 			args := generateArgs(rand.Intn(maxArgCount))
 48 | 			levelLogs = append(levelLogs, LogMock{Msg: msg, Args: args})
 49 | 			logger.Log(hclog.Level(level+1), msg, args...)
 50 | 		}
 51 | 		logs[level] = levelLogs
 52 | 	}
 53 | 
 54 | 	for level, levelLogs := range logs {
 55 | 		logger.AssertLogsOfLevelEqual(t, hclog.Level(level+1), levelLogs)
 56 | 	}
 57 | }
 58 | 
 59 | func TestSpyLogger_Trace(t *testing.T) {
 60 | 	logger := NewSpyLogger()
 61 | 	logsCount := rand.Intn(maxLogCount)
 62 | 	logs := make([]LogMock, 0, logsCount)
 63 | 	for i := 0; i < logsCount; i++ {
 64 | 		msg := "msg" + strconv.FormatUint(rand.Uint64(), 10)
 65 | 		args := generateArgs(rand.Intn(maxArgCount))
 66 | 		logs = append(logs, LogMock{Msg: msg, Args: args})
 67 | 		logger.Trace(msg, args...)
 68 | 	}
 69 | 
 70 | 	logger.AssertLogsOfLevelEqual(t, hclog.Trace, logs)
 71 | }
 72 | 
 73 | func TestSpyLogger_Debug(t *testing.T) {
 74 | 	logger := NewSpyLogger()
 75 | 	logsCount := rand.Intn(maxLogCount)
 76 | 	logs := make([]LogMock, 0, logsCount)
 77 | 	for i := 0; i < logsCount; i++ {
 78 | 		msg := "msg" + strconv.FormatUint(rand.Uint64(), 10)
 79 | 		args := generateArgs(rand.Intn(maxArgCount))
 80 | 		logs = append(logs, LogMock{Msg: msg, Args: args})
 81 | 		logger.Debug(msg, args...)
 82 | 	}
 83 | 
 84 | 	logger.AssertLogsOfLevelEqual(t, hclog.Debug, logs)
 85 | }
 86 | 
 87 | func TestSpyLogger_Info(t *testing.T) {
 88 | 	logger := NewSpyLogger()
 89 | 	logsCount := rand.Intn(maxLogCount)
 90 | 	logs := make([]LogMock, 0, logsCount)
 91 | 	for i := 0; i < logsCount; i++ {
 92 | 		msg := "msg" + strconv.FormatUint(rand.Uint64(), 10)
 93 | 		args := generateArgs(rand.Intn(maxArgCount))
 94 | 		logs = append(logs, LogMock{Msg: msg, Args: args})
 95 | 		logger.Info(msg, args...)
 96 | 	}
 97 | 
 98 | 	logger.AssertLogsOfLevelEqual(t, hclog.Info, logs)
 99 | }
100 | 
101 | func TestSpyLogger_Warn(t *testing.T) {
102 | 	logger := NewSpyLogger()
103 | 	logsCount := rand.Intn(maxLogCount)
104 | 	logs := make([]LogMock, 0, logsCount)
105 | 	for i := 0; i < logsCount; i++ {
106 | 		msg := "msg" + strconv.FormatUint(rand.Uint64(), 10)
107 | 		args := generateArgs(rand.Intn(maxArgCount))
108 | 		logs = append(logs, LogMock{Msg: msg, Args: args})
109 | 		logger.Warn(msg, args...)
110 | 	}
111 | 
112 | 	logger.AssertLogsOfLevelEqual(t, hclog.Warn, logs)
113 | }
114 | 
115 | func TestSpyLogger_Error(t *testing.T) {
116 | 	logger := NewSpyLogger()
117 | 	logsCount := rand.Intn(maxLogCount)
118 | 	logs := make([]LogMock, 0, logsCount)
119 | 	for i := 0; i < logsCount; i++ {
120 | 		msg := "msg" + strconv.FormatUint(rand.Uint64(), 10)
121 | 		args := generateArgs(rand.Intn(maxArgCount))
122 | 		logs = append(logs, LogMock{Msg: msg, Args: args})
123 | 		logger.Error(msg, args...)
124 | 	}
125 | 
126 | 	logger.AssertLogsOfLevelEqual(t, hclog.Error, logs)
127 | }
128 | 
129 | func TestSpyLogger_Name(t *testing.T) {
130 | 	assert.Equal(t, "spy logger", NewSpyLogger().Name())
131 | }
132 | 
133 | func TestNotImplemented(t *testing.T) {
134 | 	logger := NewSpyLogger()
135 | 
136 | 	tests := map[string]struct {
137 | 		function assert.PanicTestFunc
138 | 	}{
139 | 		"is_trace":        {function: func() { _ = logger.IsTrace() }},
140 | 		"is_debug":        {function: func() { _ = logger.IsDebug() }},
141 | 		"is_info":         {function: func() { _ = logger.IsInfo() }},
142 | 		"is_warn":         {function: func() { _ = logger.IsWarn() }},
143 | 		"is_error":        {function: func() { _ = logger.IsError() }},
144 | 		"implied_args":    {function: func() { _ = logger.ImpliedArgs() }},
145 | 		"with":            {function: func() { _ = logger.With() }},
146 | 		"named":           {function: func() { _ = logger.Named("") }},
147 | 		"reset_named":     {function: func() { _ = logger.ResetNamed("") }},
148 | 		"set_level":       {function: func() { logger.SetLevel(hclog.NoLevel) }},
149 | 		"standard_logger": {function: func() { _ = logger.StandardLogger(nil) }},
150 | 		"standard_writer": {function: func() { _ = logger.StandardWriter(nil) }},
151 | 	}
152 | 
153 | 	for name, test := range tests {
154 | 		t.Run(name, func(t *testing.T) {
155 | 			assert.Panics(t, test.function, "implement me")
156 | 		})
157 | 	}
158 | }
159 | 
160 | func generateArgs(count int) []interface{} {
161 | 	args := make([]interface{}, 0, 2*count)
162 | 	for j := 0; j < count; j++ {
163 | 		args = append(
164 | 			args,
165 | 			"key"+strconv.FormatUint(rand.Uint64(), 10),
166 | 			"value"+strconv.FormatUint(rand.Uint64(), 10),
167 | 		)
168 | 	}
169 | 	return args
170 | }
171 | 


--------------------------------------------------------------------------------
/e2etests/e2e_test.go:
--------------------------------------------------------------------------------
  1 | package e2etests
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"io/ioutil"
  8 | 	"net/http"
  9 | 	"os"
 10 | 	"testing"
 11 | 	"time"
 12 | 
 13 | 	clickhouse "github.com/ClickHouse/clickhouse-go/v2"
 14 | 	"github.com/ecodia/golang-awaitility/awaitility"
 15 | 	"github.com/stretchr/testify/assert"
 16 | 	"github.com/stretchr/testify/require"
 17 | 	testcontainers "github.com/testcontainers/testcontainers-go"
 18 | 	"github.com/testcontainers/testcontainers-go/wait"
 19 | )
 20 | 
 21 | const (
 22 | 	clickHouseImage = "clickhouse/clickhouse-server:22"
 23 | 	jaegerImage     = "jaegertracing/all-in-one:1.32.0"
 24 | 
 25 | 	networkName     = "chi-jaeger-test"
 26 | 	clickhousePort  = "9000/tcp"
 27 | 	jaegerQueryPort = "16686/tcp"
 28 | 	jaegerAdminPort = "14269/tcp"
 29 | )
 30 | 
 31 | type testCase struct {
 32 | 	configs []string
 33 | 	chiconf *string
 34 | }
 35 | 
 36 | func TestE2E(t *testing.T) {
 37 | 	if os.Getenv("E2E_TEST") == "" {
 38 | 		t.Skip("Set E2E_TEST=true to run the test")
 39 | 	}
 40 | 
 41 | 	// Minimal additional configuration (config.d) to enable cluster mode
 42 | 	chireplconf := "clickhouse-replicated.xml"
 43 | 
 44 | 	tests := map[string]testCase{
 45 | 		"local-single": {
 46 | 			configs: []string{"config-local-single.yaml"},
 47 | 			chiconf: nil,
 48 | 		},
 49 | 		"local-multi": {
 50 | 			configs: []string{"config-local-multi1.yaml", "config-local-multi2.yaml"},
 51 | 			chiconf: nil,
 52 | 		},
 53 | 		"replication-single": {
 54 | 			configs: []string{"config-replication-single.yaml"},
 55 | 			chiconf: &chireplconf,
 56 | 		},
 57 | 		"replication-multi": {
 58 | 			configs: []string{"config-replication-multi1.yaml", "config-replication-multi2.yaml"},
 59 | 			chiconf: &chireplconf,
 60 | 		},
 61 | 	}
 62 | 	for name, test := range tests {
 63 | 		t.Run(name, func(t *testing.T) {
 64 | 			testE2E(t, test)
 65 | 		})
 66 | 	}
 67 | }
 68 | 
 69 | func testE2E(t *testing.T, test testCase) {
 70 | 	ctx := context.Background()
 71 | 	workingDir, err := os.Getwd()
 72 | 	require.NoError(t, err)
 73 | 
 74 | 	network, err := testcontainers.GenericNetwork(ctx, testcontainers.GenericNetworkRequest{
 75 | 		NetworkRequest: testcontainers.NetworkRequest{Name: networkName},
 76 | 	})
 77 | 	require.NoError(t, err)
 78 | 	defer network.Remove(ctx)
 79 | 
 80 | 	var bindMounts map[string]string
 81 | 	if test.chiconf != nil {
 82 | 		bindMounts = map[string]string{
 83 | 			fmt.Sprintf("%s/%s", workingDir, *test.chiconf): "/etc/clickhouse-server/config.d/testconf.xml",
 84 | 		}
 85 | 	} else {
 86 | 		bindMounts = map[string]string{}
 87 | 	}
 88 | 	chReq := testcontainers.ContainerRequest{
 89 | 		Image:        clickHouseImage,
 90 | 		ExposedPorts: []string{clickhousePort},
 91 | 		WaitingFor:   &clickhouseWaitStrategy{test: t, pollInterval: time.Millisecond * 200, startupTimeout: time.Minute},
 92 | 		Networks:     []string{networkName},
 93 | 		Hostname:     "chi",
 94 | 		BindMounts:   bindMounts,
 95 | 	}
 96 | 	chContainer, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
 97 | 		ContainerRequest: chReq,
 98 | 		Started:          true,
 99 | 	})
100 | 	require.NoError(t, err)
101 | 	defer chContainer.Terminate(ctx)
102 | 
103 | 	jaegerContainers := make([]testcontainers.Container, 0)
104 | 	for _, pluginConfig := range test.configs {
105 | 		jaegerReq := testcontainers.ContainerRequest{
106 | 			Image:        jaegerImage,
107 | 			ExposedPorts: []string{jaegerQueryPort, jaegerAdminPort},
108 | 			WaitingFor:   wait.ForHTTP("/").WithPort(jaegerAdminPort).WithStartupTimeout(time.Second * 10),
109 | 			Env: map[string]string{
110 | 				"SPAN_STORAGE_TYPE": "grpc-plugin",
111 | 			},
112 | 			Cmd: []string{
113 | 				"--grpc-storage-plugin.binary=/project-dir/jaeger-clickhouse-linux-amd64",
114 | 				fmt.Sprintf("--grpc-storage-plugin.configuration-file=/project-dir/e2etests/%s", pluginConfig),
115 | 				"--grpc-storage-plugin.log-level=debug",
116 | 			},
117 | 			BindMounts: map[string]string{
118 | 				workingDir + "/..": "/project-dir",
119 | 			},
120 | 			Networks: []string{networkName},
121 | 		}
122 | 		// Call Start() manually here so that if it fails then we can still access the logs.
123 | 		jaegerContainer, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
124 | 			ContainerRequest: jaegerReq,
125 | 		})
126 | 		require.NoError(t, err)
127 | 		defer func() {
128 | 			logs, errLogs := jaegerContainer.Logs(ctx)
129 | 			require.NoError(t, errLogs)
130 | 			all, errLogs := ioutil.ReadAll(logs)
131 | 			require.NoError(t, errLogs)
132 | 			fmt.Printf("Jaeger logs:\n---->\n%s<----\n\n", string(all))
133 | 			jaegerContainer.Terminate(ctx)
134 | 		}()
135 | 		err = jaegerContainer.Start(ctx)
136 | 		require.NoError(t, err)
137 | 
138 | 		jaegerContainers = append(jaegerContainers, jaegerContainer)
139 | 	}
140 | 
141 | 	for _, jaegerContainer := range jaegerContainers {
142 | 		jaegerQueryPort, err := jaegerContainer.MappedPort(ctx, jaegerQueryPort)
143 | 		require.NoError(t, err)
144 | 
145 | 		err = awaitility.Await(100*time.Millisecond, time.Second*3, func() bool {
146 | 			// Jaeger traces itself so this request generates some spans
147 | 			response, errHTTP := http.Get(fmt.Sprintf("http://localhost:%d/api/services", jaegerQueryPort.Int()))
148 | 			require.NoError(t, errHTTP)
149 | 			body, errHTTP := ioutil.ReadAll(response.Body)
150 | 			require.NoError(t, errHTTP)
151 | 			var r result
152 | 			errHTTP = json.Unmarshal(body, &r)
153 | 			require.NoError(t, errHTTP)
154 | 			return len(r.Data) == 1 && r.Data[0] == "jaeger-query"
155 | 		})
156 | 		assert.NoError(t, err)
157 | 	}
158 | }
159 | 
160 | type result struct {
161 | 	Data []string `json:"data"`
162 | }
163 | 
164 | type clickhouseWaitStrategy struct {
165 | 	test           *testing.T
166 | 	pollInterval   time.Duration
167 | 	startupTimeout time.Duration
168 | }
169 | 
170 | var _ wait.Strategy = (*clickhouseWaitStrategy)(nil)
171 | 
172 | func (c *clickhouseWaitStrategy) WaitUntilReady(ctx context.Context, target wait.StrategyTarget) error {
173 | 	ctx, cancelContext := context.WithTimeout(ctx, c.startupTimeout)
174 | 	defer cancelContext()
175 | 
176 | 	port, err := target.MappedPort(ctx, clickhousePort)
177 | 	require.NoError(c.test, err)
178 | 
179 | 	db := clickhouse.OpenDB(&clickhouse.Options{
180 | 		Addr: []string{
181 | 			fmt.Sprintf("localhost:%d", port.Int()),
182 | 		},
183 | 		Auth: clickhouse.Auth{
184 | 			Database: "default",
185 | 		},
186 | 		Compression: &clickhouse.Compression{
187 | 			Method: clickhouse.CompressionLZ4,
188 | 		},
189 | 	})
190 | 	require.NoError(c.test, err)
191 | 
192 | 	for {
193 | 		select {
194 | 		case <-ctx.Done():
195 | 			return ctx.Err()
196 | 		case <-time.After(c.pollInterval):
197 | 			if err := db.Ping(); err != nil {
198 | 				continue
199 | 			}
200 | 			return nil
201 | 		}
202 | 	}
203 | }
204 | 


--------------------------------------------------------------------------------
/storage/config.go:
--------------------------------------------------------------------------------
  1 | package storage
  2 | 
  3 | import (
  4 | 	"time"
  5 | 
  6 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousespanstore"
  7 | )
  8 | 
  9 | type EncodingType string
 10 | 
 11 | const (
 12 | 	defaultEncoding                     = JSONEncoding
 13 | 	JSONEncoding           EncodingType = "json"
 14 | 	ProtobufEncoding       EncodingType = "protobuf"
 15 | 	defaultMaxSpanCount                 = int(1e7)
 16 | 	defaultBatchSize                    = 10_000
 17 | 	defaultBatchDelay                   = time.Second * 5
 18 | 	defaultUsername                     = "default"
 19 | 	defaultDatabaseName                 = "default"
 20 | 	defaultMetricsEndpoint              = "localhost:9090"
 21 | 	defaultMaxNumSpans                  = 0
 22 | 
 23 | 	defaultSpansTable      clickhousespanstore.TableName = "jaeger_spans"
 24 | 	defaultSpansIndexTable clickhousespanstore.TableName = "jaeger_index"
 25 | 	defaultOperationsTable clickhousespanstore.TableName = "jaeger_operations"
 26 | )
 27 | 
 28 | type Configuration struct {
 29 | 	// Batch write size. Default is 10_000.
 30 | 	BatchWriteSize int64 `yaml:"batch_write_size"`
 31 | 	// Batch flush interval. Default is 5s.
 32 | 	BatchFlushInterval time.Duration `yaml:"batch_flush_interval"`
 33 | 	// Maximal amount of spans that can be pending writes at a time.
 34 | 	// New spans exceeding this limit will be discarded,
 35 | 	// keeping memory in check if there are issues writing to ClickHouse.
 36 | 	// Check the "jaeger_clickhouse_discarded_spans" metric to keep track of discards.
 37 | 	// Default 10_000_000, or disable the limit entirely by setting to 0.
 38 | 	MaxSpanCount int `yaml:"max_span_count"`
 39 | 	// Encoding either json or protobuf. Default is json.
 40 | 	Encoding EncodingType `yaml:"encoding"`
 41 | 	// ClickHouse address e.g. localhost:9000.
 42 | 	Address string `yaml:"address"`
 43 | 	// Directory with .sql files to run at plugin startup, mainly for integration tests.
 44 | 	// Depending on the value of init_tables, this can be run as a
 45 | 	// replacement or supplement to creating default tables for span storage.
 46 | 	// If init_tables is also enabled, the scripts in this directory will be run first.
 47 | 	InitSQLScriptsDir string `yaml:"init_sql_scripts_dir"`
 48 | 	// Whether to automatically attempt to create tables in ClickHouse.
 49 | 	// By default, this is enabled if init_sql_scripts_dir is empty,
 50 | 	// or disabled if init_sql_scripts_dir is provided.
 51 | 	InitTables *bool `yaml:"init_tables"`
 52 | 	// Indicates location of TLS certificate used to connect to database.
 53 | 	CaFile string `yaml:"ca_file"`
 54 | 	// Username for connection to database. Default is "default".
 55 | 	Username string `yaml:"username"`
 56 | 	// Password for connection to database.
 57 | 	Password string `yaml:"password"`
 58 | 	// Database name. Default is "default"
 59 | 	Database string `yaml:"database"`
 60 | 	// Endpoint for scraping prometheus metrics e.g. localhost:9090.
 61 | 	MetricsEndpoint string `yaml:"metrics_endpoint"`
 62 | 	// Whether to use SQL scripts supporting replication and sharding. Default false.
 63 | 	Replication bool `yaml:"replication"`
 64 | 	// If non-empty, enables multitenancy in SQL scripts, and assigns the tenant name for this instance.
 65 | 	Tenant string `yaml:"tenant"`
 66 | 	// Table with spans. Default "jaeger_spans_local" or "jaeger_spans" when replication is enabled.
 67 | 	SpansTable clickhousespanstore.TableName `yaml:"spans_table"`
 68 | 	// Span index table. Default "jaeger_index_local" or "jaeger_index" when replication is enabled.
 69 | 	SpansIndexTable clickhousespanstore.TableName `yaml:"spans_index_table"`
 70 | 	// Operations table. Default "jaeger_operations_local" or "jaeger_operations" when replication is enabled.
 71 | 	OperationsTable   clickhousespanstore.TableName `yaml:"operations_table"`
 72 | 	spansArchiveTable clickhousespanstore.TableName
 73 | 	// TTL for data in tables in days. If 0, no TTL is set. Default 0.
 74 | 	TTLDays uint `yaml:"ttl"`
 75 | 	// The maximum number of spans to fetch per trace. If 0, no limits is set. Default 0.
 76 | 	MaxNumSpans uint `yaml:"max_num_spans"`
 77 | 	// The maximum number of open connections to the database. Default is unlimited (see: https://pkg.go.dev/database/sql#DB.SetMaxOpenConns)
 78 | 	MaxOpenConns *uint `yaml:"max_open_conns"`
 79 | 	// The maximum number of database connections in the idle connection pool. Default 2. (see: https://pkg.go.dev/database/sql#DB.SetMaxIdleConns)
 80 | 	MaxIdleConns *uint `yaml:"max_idle_conns"`
 81 | 	// The maximum amount of milliseconds a database connection may be reused. Default = connections are never closed due to age (see: https://pkg.go.dev/database/sql#DB.SetConnMaxLifetime)
 82 | 	ConnMaxLifetimeMillis *uint `yaml:"conn_max_lifetime_millis"`
 83 | 	// The maximum amount of milliseconds a database connection may be idle. Default = connections are never closed due to idle time (see: https://pkg.go.dev/database/sql#DB.SetConnMaxIdleTime)
 84 | 	ConnMaxIdleTimeMillis *uint `yaml:"conn_max_idle_time_millis"`
 85 | }
 86 | 
 87 | func (cfg *Configuration) setDefaults() {
 88 | 	if cfg.BatchWriteSize == 0 {
 89 | 		cfg.BatchWriteSize = defaultBatchSize
 90 | 	}
 91 | 	if cfg.BatchFlushInterval == 0 {
 92 | 		cfg.BatchFlushInterval = defaultBatchDelay
 93 | 	}
 94 | 	if cfg.MaxSpanCount == 0 {
 95 | 		cfg.MaxSpanCount = defaultMaxSpanCount
 96 | 	}
 97 | 	if cfg.Encoding == "" {
 98 | 		cfg.Encoding = defaultEncoding
 99 | 	}
100 | 	if cfg.InitTables == nil {
101 | 		// Decide whether to init tables based on whether a custom script path was provided
102 | 		var defaultInitTables bool
103 | 		if cfg.InitSQLScriptsDir == "" {
104 | 			defaultInitTables = true
105 | 		} else {
106 | 			defaultInitTables = false
107 | 		}
108 | 		cfg.InitTables = &defaultInitTables
109 | 	}
110 | 	if cfg.Username == "" {
111 | 		cfg.Username = defaultUsername
112 | 	}
113 | 	if cfg.Database == "" {
114 | 		cfg.Database = defaultDatabaseName
115 | 	}
116 | 	if cfg.MetricsEndpoint == "" {
117 | 		cfg.MetricsEndpoint = defaultMetricsEndpoint
118 | 	}
119 | 	if cfg.MaxNumSpans == 0 {
120 | 		cfg.MaxNumSpans = defaultMaxNumSpans
121 | 	}
122 | 	if cfg.SpansTable == "" {
123 | 		if cfg.Replication {
124 | 			cfg.SpansTable = defaultSpansTable
125 | 			cfg.spansArchiveTable = defaultSpansTable + "_archive"
126 | 		} else {
127 | 			cfg.SpansTable = defaultSpansTable.ToLocal()
128 | 			cfg.spansArchiveTable = (defaultSpansTable + "_archive").ToLocal()
129 | 		}
130 | 	} else {
131 | 		cfg.spansArchiveTable = cfg.SpansTable + "_archive"
132 | 	}
133 | 	if cfg.SpansIndexTable == "" {
134 | 		if cfg.Replication {
135 | 			cfg.SpansIndexTable = defaultSpansIndexTable
136 | 		} else {
137 | 			cfg.SpansIndexTable = defaultSpansIndexTable.ToLocal()
138 | 		}
139 | 	}
140 | 	if cfg.OperationsTable == "" {
141 | 		if cfg.Replication {
142 | 			cfg.OperationsTable = defaultOperationsTable
143 | 		} else {
144 | 			cfg.OperationsTable = defaultOperationsTable.ToLocal()
145 | 		}
146 | 	}
147 | }
148 | 
149 | func (cfg *Configuration) GetSpansArchiveTable() clickhousespanstore.TableName {
150 | 	return cfg.spansArchiveTable
151 | }
152 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/worker.go:
--------------------------------------------------------------------------------
  1 | package clickhousespanstore
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"sort"
  7 | 	"strings"
  8 | 	"sync"
  9 | 	"time"
 10 | 
 11 | 	"github.com/gogo/protobuf/proto"
 12 | 	"github.com/jaegertracing/jaeger/model"
 13 | )
 14 | 
 15 | var delays = []int{2, 3, 5, 8}
 16 | 
 17 | // WriteWorker writes spans to CLickHouse.
 18 | // Given a batch of spans, WriteWorker attempts to write them to database.
 19 | // Interval in seconds between attempts changes due to delays slice, then it remains the same as the last value in delays.
 20 | type WriteWorker struct {
 21 | 	// workerID is an arbitrary identifier for keeping track of this worker in logs
 22 | 	workerID   int32
 23 | 	params     *WorkerParams
 24 | 	batch      []*model.Span
 25 | 	finish     chan bool
 26 | 	workerDone chan *WriteWorker
 27 | 	done       sync.WaitGroup
 28 | }
 29 | 
 30 | func (worker *WriteWorker) Work() {
 31 | 	worker.done.Add(1)
 32 | 
 33 | 	defer worker.done.Done()
 34 | 
 35 | 	// TODO: look for specific error(connection refused | database error)
 36 | 	if err := worker.writeBatch(worker.batch); err != nil {
 37 | 		worker.params.logger.Error("Could not write a batch of spans", "error", err, "worker_id", worker.workerID)
 38 | 	} else {
 39 | 		worker.close()
 40 | 		return
 41 | 	}
 42 | 	attempt := 0
 43 | 	for {
 44 | 		currentDelay := worker.getCurrentDelay(&attempt, worker.params.delay)
 45 | 		timer := time.After(currentDelay)
 46 | 		select {
 47 | 		case <-worker.finish:
 48 | 			worker.close()
 49 | 			return
 50 | 		case <-timer:
 51 | 			if err := worker.writeBatch(worker.batch); err != nil {
 52 | 				worker.params.logger.Error("Could not write a batch of spans", "error", err, "worker_id", worker.workerID)
 53 | 			} else {
 54 | 				worker.close()
 55 | 				return
 56 | 			}
 57 | 		}
 58 | 	}
 59 | }
 60 | 
 61 | func (worker *WriteWorker) Close() {
 62 | 	worker.finish <- true
 63 | 	worker.done.Wait()
 64 | }
 65 | 
 66 | func (worker *WriteWorker) getCurrentDelay(attempt *int, delay time.Duration) time.Duration {
 67 | 	if *attempt < len(delays) {
 68 | 		*attempt++
 69 | 	}
 70 | 	return time.Duration(int64(delays[*attempt-1]) * delay.Nanoseconds())
 71 | }
 72 | 
 73 | func (worker *WriteWorker) close() {
 74 | 	worker.workerDone <- worker
 75 | }
 76 | 
 77 | func (worker *WriteWorker) writeBatch(batch []*model.Span) error {
 78 | 	worker.params.logger.Debug("Writing spans", "size", len(batch))
 79 | 	if err := worker.writeModelBatch(batch); err != nil {
 80 | 		return err
 81 | 	}
 82 | 
 83 | 	if worker.params.indexTable != "" {
 84 | 		if err := worker.writeIndexBatch(batch); err != nil {
 85 | 			return err
 86 | 		}
 87 | 	}
 88 | 
 89 | 	return nil
 90 | }
 91 | 
 92 | func (worker *WriteWorker) writeModelBatch(batch []*model.Span) error {
 93 | 	tx, err := worker.params.db.Begin()
 94 | 	if err != nil {
 95 | 		return err
 96 | 	}
 97 | 
 98 | 	committed := false
 99 | 
100 | 	defer func() {
101 | 		if !committed {
102 | 			// Clickhouse does not support real rollback
103 | 			_ = tx.Rollback()
104 | 		}
105 | 	}()
106 | 
107 | 	var query string
108 | 	if worker.params.tenant == "" {
109 | 		query = fmt.Sprintf("INSERT INTO %s (timestamp, traceID, model) VALUES (?, ?, ?)", worker.params.spansTable)
110 | 	} else {
111 | 		query = fmt.Sprintf("INSERT INTO %s (tenant, timestamp, traceID, model) VALUES (?, ?, ?, ?)", worker.params.spansTable)
112 | 	}
113 | 
114 | 	statement, err := tx.Prepare(query)
115 | 	if err != nil {
116 | 		return err
117 | 	}
118 | 
119 | 	defer statement.Close()
120 | 
121 | 	for _, span := range batch {
122 | 		var serialized []byte
123 | 
124 | 		if worker.params.encoding == EncodingJSON {
125 | 			serialized, err = json.Marshal(span)
126 | 		} else {
127 | 			serialized, err = proto.Marshal(span)
128 | 		}
129 | 
130 | 		if err != nil {
131 | 			return err
132 | 		}
133 | 
134 | 		if worker.params.tenant == "" {
135 | 			_, err = statement.Exec(span.StartTime, span.TraceID.String(), serialized)
136 | 		} else {
137 | 			_, err = statement.Exec(worker.params.tenant, span.StartTime, span.TraceID.String(), serialized)
138 | 		}
139 | 		if err != nil {
140 | 			return err
141 | 		}
142 | 	}
143 | 
144 | 	committed = true
145 | 
146 | 	return tx.Commit()
147 | }
148 | 
149 | func (worker *WriteWorker) writeIndexBatch(batch []*model.Span) error {
150 | 	tx, err := worker.params.db.Begin()
151 | 	if err != nil {
152 | 		return err
153 | 	}
154 | 
155 | 	committed := false
156 | 
157 | 	defer func() {
158 | 		if !committed {
159 | 			// Clickhouse does not support real rollback
160 | 			_ = tx.Rollback()
161 | 		}
162 | 	}()
163 | 
164 | 	var query string
165 | 	if worker.params.tenant == "" {
166 | 		query = fmt.Sprintf(
167 | 			"INSERT INTO %s (timestamp, traceID, service, operation, durationUs, tags.key, tags.value) VALUES (?, ?, ?, ?, ?, ?, ?)",
168 | 			worker.params.indexTable,
169 | 		)
170 | 	} else {
171 | 		query = fmt.Sprintf(
172 | 			"INSERT INTO %s (tenant, timestamp, traceID, service, operation, durationUs, tags.key, tags.value) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
173 | 			worker.params.indexTable,
174 | 		)
175 | 	}
176 | 
177 | 	statement, err := tx.Prepare(query)
178 | 	if err != nil {
179 | 		return err
180 | 	}
181 | 
182 | 	defer statement.Close()
183 | 
184 | 	for _, span := range batch {
185 | 		keys, values := uniqueTagsForSpan(span)
186 | 		if worker.params.tenant == "" {
187 | 			_, err = statement.Exec(
188 | 				span.StartTime,
189 | 				span.TraceID.String(),
190 | 				span.Process.ServiceName,
191 | 				span.OperationName,
192 | 				uint64(span.Duration.Microseconds()),
193 | 				keys,
194 | 				values,
195 | 			)
196 | 		} else {
197 | 			_, err = statement.Exec(
198 | 				worker.params.tenant,
199 | 				span.StartTime,
200 | 				span.TraceID.String(),
201 | 				span.Process.ServiceName,
202 | 				span.OperationName,
203 | 				uint64(span.Duration.Microseconds()),
204 | 				keys,
205 | 				values,
206 | 			)
207 | 		}
208 | 		if err != nil {
209 | 			return err
210 | 		}
211 | 	}
212 | 
213 | 	committed = true
214 | 
215 | 	return tx.Commit()
216 | }
217 | 
218 | func uniqueTagsForSpan(span *model.Span) (keys, values []string) {
219 | 	uniqueTags := make(map[string][]string, len(span.Tags)+len(span.Process.Tags))
220 | 
221 | 	for i := range span.Tags {
222 | 		key := tagKey(&span.GetTags()[i])
223 | 		uniqueTags[key] = append(uniqueTags[key], tagValue(&span.GetTags()[i]))
224 | 	}
225 | 
226 | 	for i := range span.Process.Tags {
227 | 		key := tagKey(&span.GetProcess().GetTags()[i])
228 | 		uniqueTags[key] = append(uniqueTags[key], tagValue(&span.GetProcess().GetTags()[i]))
229 | 	}
230 | 
231 | 	for _, event := range span.Logs {
232 | 		for i := range event.Fields {
233 | 			key := tagKey(&event.GetFields()[i])
234 | 			uniqueTags[key] = append(uniqueTags[key], tagValue(&event.GetFields()[i]))
235 | 		}
236 | 	}
237 | 
238 | 	keys = make([]string, 0, len(uniqueTags))
239 | 	for k := range uniqueTags {
240 | 		keys = append(keys, k)
241 | 	}
242 | 	sort.Strings(keys)
243 | 
244 | 	values = make([]string, 0, len(uniqueTags))
245 | 	for _, key := range keys {
246 | 		values = append(values, strings.Join(unique(uniqueTags[key]), ","))
247 | 	}
248 | 
249 | 	return keys, values
250 | }
251 | 
252 | func tagKey(kv *model.KeyValue) string {
253 | 	return kv.Key
254 | }
255 | 
256 | func tagValue(kv *model.KeyValue) string {
257 | 	return kv.AsString()
258 | }
259 | 
260 | func unique(slice []string) []string {
261 | 	if len(slice) == 1 {
262 | 		return slice
263 | 	}
264 | 
265 | 	keys := make(map[string]bool)
266 | 	list := []string{}
267 | 	for _, entry := range slice {
268 | 		if _, value := keys[entry]; !value {
269 | 			keys[entry] = true
270 | 			list = append(list, entry)
271 | 		}
272 | 	}
273 | 	return list
274 | }
275 | 


--------------------------------------------------------------------------------
/guide-sharding-and-replication.md:
--------------------------------------------------------------------------------
  1 | # Sharding and Replication
  2 | 
  3 | This is a guide how to setup sharding and replication for Jaeger data.
  4 | This guide uses [clickhouse-operator](https://github.com/Altinity/clickhouse-operator) to deploy
  5 | the storage.
  6 | 
  7 | Note that the Jaeger ClickHouse plugin supports creating replicated schema out-of-the-box. Therefore,
  8 | this guide is not necessary for setting up default replicated deployment. Also note that the
  9 | ClickHouse operator uses by default `Ordinary` database engine, which does not work with the
 10 | embedded replication scripts in Jaeger.
 11 | Refer to the `config.yaml` how to setup replicated deployment.
 12 | 
 13 | ## Sharding
 14 | 
 15 | Sharding is a feature that allows splitting the data into multiple Clickhouse nodes to
 16 | increase throughput and decrease latency.
 17 | The sharding feature uses `Distributed` engine that is backed by local tables.
 18 | The distributed engine is a "virtual" table that does not store any data. It is used as
 19 | an interface to insert and query data.
 20 | 
 21 | To setup sharding run the following statements on all nodes in the cluster.
 22 | The "local" tables have to be created on the nodes before the distributed table.
 23 | 
 24 | ```sql
 25 | CREATE DATABASE jaeger ENGINE=Atomic;
 26 | USE jaeger;
 27 | 
 28 | CREATE TABLE IF NOT EXISTS jaeger_spans AS jaeger_spans_local ENGINE = Distributed('{cluster}', default, jaeger_spans_local, cityHash64(traceID));
 29 | CREATE TABLE IF NOT EXISTS jaeger_index AS jaeger_index_local ENGINE = Distributed('{cluster}', default, jaeger_index_local, cityHash64(traceID));
 30 | CREATE TABLE IF NOT EXISTS jaeger_operations AS jaeger_operations_local ENGINE = Distributed('{cluster}', default, jaeger_operations_local, rand());
 31 | ```
 32 | 
 33 | * The `AS <table-name>` statement creates table with the same schema as the specified one.
 34 | * The `Distributed` engine takes as parameters cluster , database, table name and sharding key.
 35 | 
 36 | If the distributed table is not created on all Clickhouse nodes the Jaeger query fails to get the data from the storage.
 37 | 
 38 | ### Deploy Clickhouse
 39 | 
 40 | Deploy Clickhouse with 2 shards:
 41 | 
 42 | ```yaml
 43 | cat <<EOF | kubectl apply -f -
 44 | apiVersion: clickhouse.altinity.com/v1
 45 | kind: ClickHouseInstallation
 46 | metadata:
 47 |   name: jaeger
 48 | spec:
 49 |   configuration:
 50 |     clusters:
 51 |       - name: cluster1
 52 |         layout:
 53 |           shardsCount: 2
 54 | EOF
 55 | ```
 56 | 
 57 | Use the following command to run `clickhouse-client` on Clickhouse nodes and create the distributed tables:
 58 | ```bash
 59 | kubectl exec -it statefulset.apps/chi-jaeger-cluster1-0-0 -- clickhouse-client
 60 | ```
 61 | 
 62 | ### Plugin configuration
 63 | 
 64 | The plugin has to be configured to write and read that from the global tables:
 65 | 
 66 | ```yaml
 67 | address: clickhouse-jaeger:9000
 68 | # database: jaeger
 69 | spans_table: jaeger_spans
 70 | spans_index_table: jaeger_index
 71 | operations_table: jaeger_operations
 72 | ```
 73 | 
 74 | ## Replication
 75 | 
 76 | Replication as the name suggest automatically replicates the data across multiple Clickhouse nodes.
 77 | It is used to accomplish high availability, load scaling and migration/updates.
 78 | 
 79 | The replication uses Zookeeper. Refer to the Clickhouse operator how to deploy Zookeeper.
 80 | 
 81 | Zookeeper allows us to use `ON CLUSTER` to automatically replicate table creation on all nodes.
 82 | Therefore the following command can be run only on a single Clickhouse node:
 83 | 
 84 | ```sql
 85 | CREATE DATABASE IF NOT EXISTS jaeger ON CLUSTER '{cluster}' ENGINE=Atomic;
 86 | USE jaeger;
 87 | 
 88 | CREATE TABLE IF NOT EXISTS jaeger_spans_local ON CLUSTER '{cluster}' (
 89 |     timestamp DateTime CODEC(Delta, ZSTD(1)),
 90 |     traceID String CODEC(ZSTD(1)),
 91 |     model String CODEC(ZSTD(3))
 92 | ) ENGINE ReplicatedMergeTree
 93 | PARTITION BY toDate(timestamp)
 94 | ORDER BY traceID
 95 | SETTINGS index_granularity=1024;
 96 | 
 97 | CREATE TABLE IF NOT EXISTS jaeger_index_local ON CLUSTER '{cluster}' (
 98 |     timestamp DateTime CODEC(Delta, ZSTD(1)),
 99 |     traceID String CODEC(ZSTD(1)),
100 |     service LowCardinality(String) CODEC(ZSTD(1)),
101 |     operation LowCardinality(String) CODEC(ZSTD(1)),
102 |     durationUs UInt64 CODEC(ZSTD(1)),
103 |     tags Array(String) CODEC(ZSTD(1)),
104 |     INDEX idx_tags tags TYPE bloom_filter(0.01) GRANULARITY 64,
105 |     INDEX idx_duration durationUs TYPE minmax GRANULARITY 1
106 | ) ENGINE ReplicatedMergeTree
107 | PARTITION BY toDate(timestamp)
108 | ORDER BY (service, -toUnixTimestamp(timestamp))
109 | SETTINGS index_granularity=1024;
110 | 
111 | CREATE MATERIALIZED VIEW IF NOT EXISTS jaeger_operations_local ON CLUSTER '{cluster}'
112 | ENGINE ReplicatedMergeTree
113 | PARTITION BY toYYYYMM(date) ORDER BY (date, service, operation)
114 | SETTINGS index_granularity=32
115 | POPULATE
116 | AS SELECT
117 |     toDate(timestamp) AS date,
118 |     service,
119 |     operation,
120 | count() as count
121 | FROM jaeger.jaeger_index_local
122 | GROUP BY date, service, operation;
123 | 
124 | 
125 | CREATE TABLE IF NOT EXISTS jaeger_spans ON CLUSTER '{cluster}' AS jaeger.jaeger_spans_local ENGINE = Distributed('{cluster}', jaeger, jaeger_spans_local, cityHash64(traceID));
126 | CREATE TABLE IF NOT EXISTS jaeger_index ON CLUSTER '{cluster}' AS jaeger.jaeger_index_local ENGINE = Distributed('{cluster}', jaeger, jaeger_index_local, cityHash64(traceID));
127 | CREATE TABLE IF NOT EXISTS jaeger_operations on CLUSTER '{cluster}' AS jaeger.jaeger_operations_local ENGINE = Distributed('{cluster}', jaeger, jaeger_operations_local, rand());
128 | ```
129 | 
130 | ### Deploy Clickhouse
131 | 
132 | Before deploying Clickhouse make sure Zookeeper is running in `zoo1ns` namespace.
133 | 
134 | Deploy Clickhouse with 3 shards and 2 replicas. In total Clickhouse operator will deploy 6 pods:
135 | 
136 | ```yaml
137 | cat <<EOF | kubectl apply -f -
138 | apiVersion: clickhouse.altinity.com/v1
139 | kind: ClickHouseInstallation
140 | metadata:
141 |   name: jaeger
142 | spec:
143 |   defaults:
144 |     templates:
145 |       dataVolumeClaimTemplate: data-volume-template
146 |       logVolumeClaimTemplate: log-volume-template
147 |   configuration:
148 |     zookeeper:
149 |       nodes:
150 |         - host: zookeeper.zoo1ns
151 |     clusters:
152 |       - name: cluster1
153 |         layout:
154 |           shardsCount: 3
155 |           replicasCount: 2
156 |   templates:
157 |     volumeClaimTemplates:
158 |       - name: data-volume-template
159 |         spec:
160 |           accessModes:
161 |             - ReadWriteOnce
162 |           resources:
163 |             requests:
164 |               storage: 1Gi
165 |       - name: log-volume-template
166 |         spec:
167 |           accessModes:
168 |             - ReadWriteOnce
169 |           resources:
170 |             requests:
171 |               storage: 100Mi
172 | EOF
173 | ```
174 | 
175 | The Clickhouse deployment will look like this:
176 | ```bash
177 | k get statefulsets
178 | NAME                      READY   AGE
179 | chi-jaeger-cluster1-0-0   1/1     17m    # shard 0
180 | chi-jaeger-cluster1-0-1   1/1     17m    # shard 0, replica 1
181 | chi-jaeger-cluster1-1-0   1/1     16m    # shard 1
182 | chi-jaeger-cluster1-1-1   1/1     16m    # shard 1, replica 1
183 | chi-jaeger-cluster1-2-0   1/1     7m43s  # shard 2
184 | chi-jaeger-cluster1-2-1   1/1     7m26s  # shard 2, replica 1
185 | ```
186 | 
187 | #### Scaling up
188 | 
189 | Just increase `shardsCount` number and new Clickhouse node will come up. It will have initialized Jaeger tables so
190 | no other steps are required. Note that the old data are not re-balanced, only new writes take into the account
191 | the new node.
192 | 
193 | ## Useful Commands
194 | 
195 | ### SQL
196 | 
197 | ```sql
198 | show tables;
199 | select count() from jaeger_spans;
200 | ```
201 | 
202 | ### Kubectl
203 | 
204 | ```bash
205 | kubectl get chi -o wide
206 | kubectl port-forward service/clickhouse-jaeger 9000:9000
207 | kubectl delete chi jaeger
208 | ```
209 | 


--------------------------------------------------------------------------------
/internal/tools/go.mod:
--------------------------------------------------------------------------------
  1 | module github.com/jaegertracing/jaeger-clickhouse/internal/tools
  2 | 
  3 | go 1.19
  4 | 
  5 | require (
  6 | 	github.com/golangci/golangci-lint v1.41.1
  7 | 	golang.org/x/tools v0.1.5
  8 | )
  9 | 
 10 | require (
 11 | 	4d63.com/gochecknoglobals v0.0.0-20201008074935-acfc0b28355a // indirect
 12 | 	github.com/BurntSushi/toml v0.3.1 // indirect
 13 | 	github.com/Djarvur/go-err113 v0.0.0-20210108212216-aea10b59be24 // indirect
 14 | 	github.com/Masterminds/semver v1.5.0 // indirect
 15 | 	github.com/OpenPeeDeeP/depguard v1.0.1 // indirect
 16 | 	github.com/alexkohler/prealloc v1.0.0 // indirect
 17 | 	github.com/ashanbrown/forbidigo v1.2.0 // indirect
 18 | 	github.com/ashanbrown/makezero v0.0.0-20210520155254-b6261585ddde // indirect
 19 | 	github.com/beorn7/perks v1.0.1 // indirect
 20 | 	github.com/bkielbasa/cyclop v1.2.0 // indirect
 21 | 	github.com/bombsimon/wsl/v3 v3.3.0 // indirect
 22 | 	github.com/cespare/xxhash/v2 v2.1.1 // indirect
 23 | 	github.com/charithe/durationcheck v0.0.8 // indirect
 24 | 	github.com/chavacava/garif v0.0.0-20210405164556-e8a0a408d6af // indirect
 25 | 	github.com/daixiang0/gci v0.2.8 // indirect
 26 | 	github.com/davecgh/go-spew v1.1.1 // indirect
 27 | 	github.com/denis-tingajkin/go-header v0.4.2 // indirect
 28 | 	github.com/esimonov/ifshort v1.0.2 // indirect
 29 | 	github.com/ettle/strcase v0.1.1 // indirect
 30 | 	github.com/fatih/color v1.12.0 // indirect
 31 | 	github.com/fatih/structtag v1.2.0 // indirect
 32 | 	github.com/fsnotify/fsnotify v1.4.9 // indirect
 33 | 	github.com/fzipp/gocyclo v0.3.1 // indirect
 34 | 	github.com/go-critic/go-critic v0.5.6 // indirect
 35 | 	github.com/go-toolsmith/astcast v1.0.0 // indirect
 36 | 	github.com/go-toolsmith/astcopy v1.0.0 // indirect
 37 | 	github.com/go-toolsmith/astequal v1.0.0 // indirect
 38 | 	github.com/go-toolsmith/astfmt v1.0.0 // indirect
 39 | 	github.com/go-toolsmith/astp v1.0.0 // indirect
 40 | 	github.com/go-toolsmith/strparse v1.0.0 // indirect
 41 | 	github.com/go-toolsmith/typep v1.0.2 // indirect
 42 | 	github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect
 43 | 	github.com/gobwas/glob v0.2.3 // indirect
 44 | 	github.com/gofrs/flock v0.8.0 // indirect
 45 | 	github.com/golang/protobuf v1.4.3 // indirect
 46 | 	github.com/golangci/check v0.0.0-20180506172741-cfe4005ccda2 // indirect
 47 | 	github.com/golangci/dupl v0.0.0-20180902072040-3e9179ac440a // indirect
 48 | 	github.com/golangci/go-misc v0.0.0-20180628070357-927a3d87b613 // indirect
 49 | 	github.com/golangci/gofmt v0.0.0-20190930125516-244bba706f1a // indirect
 50 | 	github.com/golangci/lint-1 v0.0.0-20191013205115-297bf364a8e0 // indirect
 51 | 	github.com/golangci/maligned v0.0.0-20180506175553-b1d89398deca // indirect
 52 | 	github.com/golangci/misspell v0.3.5 // indirect
 53 | 	github.com/golangci/revgrep v0.0.0-20210208091834-cd28932614b5 // indirect
 54 | 	github.com/golangci/unconvert v0.0.0-20180507085042-28b1c447d1f4 // indirect
 55 | 	github.com/google/go-cmp v0.5.4 // indirect
 56 | 	github.com/gordonklaus/ineffassign v0.0.0-20210225214923-2e10b2664254 // indirect
 57 | 	github.com/gostaticanalysis/analysisutil v0.4.1 // indirect
 58 | 	github.com/gostaticanalysis/comment v1.4.1 // indirect
 59 | 	github.com/gostaticanalysis/forcetypeassert v0.0.0-20200621232751-01d4955beaa5 // indirect
 60 | 	github.com/gostaticanalysis/nilerr v0.1.1 // indirect
 61 | 	github.com/hashicorp/errwrap v1.0.0 // indirect
 62 | 	github.com/hashicorp/go-multierror v1.1.1 // indirect
 63 | 	github.com/hashicorp/hcl v1.0.0 // indirect
 64 | 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
 65 | 	github.com/jgautheron/goconst v1.5.1 // indirect
 66 | 	github.com/jingyugao/rowserrcheck v1.1.0 // indirect
 67 | 	github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af // indirect
 68 | 	github.com/julz/importas v0.0.0-20210419104244-841f0c0fe66d // indirect
 69 | 	github.com/kisielk/errcheck v1.6.0 // indirect
 70 | 	github.com/kisielk/gotool v1.0.0 // indirect
 71 | 	github.com/kulti/thelper v0.4.0 // indirect
 72 | 	github.com/kunwardeep/paralleltest v1.0.2 // indirect
 73 | 	github.com/kyoh86/exportloopref v0.1.8 // indirect
 74 | 	github.com/ldez/gomoddirectives v0.2.1 // indirect
 75 | 	github.com/ldez/tagliatelle v0.2.0 // indirect
 76 | 	github.com/magiconair/properties v1.8.1 // indirect
 77 | 	github.com/maratori/testpackage v1.0.1 // indirect
 78 | 	github.com/matoous/godox v0.0.0-20210227103229-6504466cf951 // indirect
 79 | 	github.com/mattn/go-colorable v0.1.8 // indirect
 80 | 	github.com/mattn/go-isatty v0.0.12 // indirect
 81 | 	github.com/mattn/go-runewidth v0.0.9 // indirect
 82 | 	github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
 83 | 	github.com/mbilski/exhaustivestruct v1.2.0 // indirect
 84 | 	github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81 // indirect
 85 | 	github.com/mgechev/revive v1.0.7 // indirect
 86 | 	github.com/mitchellh/go-homedir v1.1.0 // indirect
 87 | 	github.com/mitchellh/mapstructure v1.1.2 // indirect
 88 | 	github.com/moricho/tparallel v0.2.1 // indirect
 89 | 	github.com/nakabonne/nestif v0.3.0 // indirect
 90 | 	github.com/nbutton23/zxcvbn-go v0.0.0-20210217022336-fa2cb2858354 // indirect
 91 | 	github.com/nishanths/exhaustive v0.1.0 // indirect
 92 | 	github.com/nishanths/predeclared v0.2.1 // indirect
 93 | 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 94 | 	github.com/pelletier/go-toml v1.2.0 // indirect
 95 | 	github.com/phayes/checkstyle v0.0.0-20170904204023-bfd46e6a821d // indirect
 96 | 	github.com/pkg/errors v0.9.1 // indirect
 97 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
 98 | 	github.com/polyfloyd/go-errorlint v0.0.0-20210510181950-ab96adb96fea // indirect
 99 | 	github.com/prometheus/client_golang v1.7.1 // indirect
100 | 	github.com/prometheus/client_model v0.2.0 // indirect
101 | 	github.com/prometheus/common v0.10.0 // indirect
102 | 	github.com/prometheus/procfs v0.1.3 // indirect
103 | 	github.com/quasilyte/go-ruleguard v0.3.4 // indirect
104 | 	github.com/quasilyte/regex/syntax v0.0.0-20200407221936-30656e2c4a95 // indirect
105 | 	github.com/ryancurrah/gomodguard v1.2.2 // indirect
106 | 	github.com/ryanrolds/sqlclosecheck v0.3.0 // indirect
107 | 	github.com/sanposhiho/wastedassign/v2 v2.0.6 // indirect
108 | 	github.com/securego/gosec/v2 v2.8.0 // indirect
109 | 	github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c // indirect
110 | 	github.com/sirupsen/logrus v1.8.1 // indirect
111 | 	github.com/sonatard/noctx v0.0.1 // indirect
112 | 	github.com/sourcegraph/go-diff v0.6.1 // indirect
113 | 	github.com/spf13/afero v1.1.2 // indirect
114 | 	github.com/spf13/cast v1.3.0 // indirect
115 | 	github.com/spf13/cobra v1.1.3 // indirect
116 | 	github.com/spf13/jwalterweatherman v1.0.0 // indirect
117 | 	github.com/spf13/pflag v1.0.5 // indirect
118 | 	github.com/spf13/viper v1.7.1 // indirect
119 | 	github.com/ssgreg/nlreturn/v2 v2.1.0 // indirect
120 | 	github.com/stretchr/objx v0.1.1 // indirect
121 | 	github.com/stretchr/testify v1.7.0 // indirect
122 | 	github.com/subosito/gotenv v1.2.0 // indirect
123 | 	github.com/tdakkota/asciicheck v0.0.0-20200416200610-e657995f937b // indirect
124 | 	github.com/tetafro/godot v1.4.7 // indirect
125 | 	github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94 // indirect
126 | 	github.com/tomarrell/wrapcheck/v2 v2.1.0 // indirect
127 | 	github.com/tommy-muehle/go-mnd/v2 v2.4.0 // indirect
128 | 	github.com/ultraware/funlen v0.0.3 // indirect
129 | 	github.com/ultraware/whitespace v0.0.4 // indirect
130 | 	github.com/uudashr/gocognit v1.0.1 // indirect
131 | 	github.com/yeya24/promlinter v0.1.0 // indirect
132 | 	golang.org/x/mod v0.4.2 // indirect
133 | 	golang.org/x/sys v0.0.0-20210510120138-977fb7262007 // indirect
134 | 	golang.org/x/text v0.3.5 // indirect
135 | 	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
136 | 	google.golang.org/protobuf v1.25.0 // indirect
137 | 	gopkg.in/ini.v1 v1.51.0 // indirect
138 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
139 | 	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
140 | 	honnef.co/go/tools v0.2.0 // indirect
141 | 	mvdan.cc/gofumpt v0.1.1 // indirect
142 | 	mvdan.cc/interfacer v0.0.0-20180901003855-c20040233aed // indirect
143 | 	mvdan.cc/lint v0.0.0-20170908181259-adc824a0674b // indirect
144 | 	mvdan.cc/unparam v0.0.0-20210104141923-aac4ce9116a7 // indirect
145 | )
146 | 


--------------------------------------------------------------------------------
/storage/store.go:
--------------------------------------------------------------------------------
  1 | package storage
  2 | 
  3 | import (
  4 | 	"crypto/tls"
  5 | 	"crypto/x509"
  6 | 	"database/sql"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"os"
 10 | 	"path/filepath"
 11 | 	"sort"
 12 | 	"strings"
 13 | 	"text/template"
 14 | 	"time"
 15 | 
 16 | 	clickhouse "github.com/ClickHouse/clickhouse-go/v2"
 17 | 	hclog "github.com/hashicorp/go-hclog"
 18 | 	"github.com/jaegertracing/jaeger/plugin/storage/grpc/shared"
 19 | 	"github.com/jaegertracing/jaeger/storage/dependencystore"
 20 | 	"github.com/jaegertracing/jaeger/storage/spanstore"
 21 | 
 22 | 	jaegerclickhouse "github.com/jaegertracing/jaeger-clickhouse"
 23 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousedependencystore"
 24 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousespanstore"
 25 | )
 26 | 
 27 | type Store struct {
 28 | 	db            *sql.DB
 29 | 	writer        spanstore.Writer
 30 | 	reader        spanstore.Reader
 31 | 	archiveWriter spanstore.Writer
 32 | 	archiveReader spanstore.Reader
 33 | }
 34 | 
 35 | var (
 36 | 	_ shared.StoragePlugin             = (*Store)(nil)
 37 | 	_ shared.ArchiveStoragePlugin      = (*Store)(nil)
 38 | 	_ shared.StreamingSpanWriterPlugin = (*Store)(nil)
 39 | 	_ io.Closer                        = (*Store)(nil)
 40 | )
 41 | 
 42 | func NewStore(logger hclog.Logger, cfg Configuration) (*Store, error) {
 43 | 	cfg.setDefaults()
 44 | 	db, err := connector(cfg)
 45 | 	if err != nil {
 46 | 		return nil, fmt.Errorf("could not connect to database: %q", err)
 47 | 	}
 48 | 
 49 | 	if err := runInitScripts(logger, db, cfg); err != nil {
 50 | 		_ = db.Close()
 51 | 		return nil, err
 52 | 	}
 53 | 	if cfg.Replication {
 54 | 		return &Store{
 55 | 			db: db,
 56 | 			writer: clickhousespanstore.NewSpanWriter(
 57 | 				logger,
 58 | 				db,
 59 | 				cfg.SpansIndexTable,
 60 | 				cfg.SpansTable,
 61 | 				cfg.Tenant,
 62 | 				clickhousespanstore.Encoding(cfg.Encoding),
 63 | 				cfg.BatchFlushInterval,
 64 | 				cfg.BatchWriteSize,
 65 | 				cfg.MaxSpanCount,
 66 | 			),
 67 | 			reader: clickhousespanstore.NewTraceReader(
 68 | 				db,
 69 | 				cfg.OperationsTable,
 70 | 				cfg.SpansIndexTable,
 71 | 				cfg.SpansTable,
 72 | 				cfg.Tenant,
 73 | 				cfg.MaxNumSpans,
 74 | 			),
 75 | 			archiveWriter: clickhousespanstore.NewSpanWriter(
 76 | 				logger,
 77 | 				db,
 78 | 				"",
 79 | 				cfg.GetSpansArchiveTable(),
 80 | 				cfg.Tenant,
 81 | 				clickhousespanstore.Encoding(cfg.Encoding),
 82 | 				cfg.BatchFlushInterval,
 83 | 				cfg.BatchWriteSize,
 84 | 				cfg.MaxSpanCount,
 85 | 			),
 86 | 			archiveReader: clickhousespanstore.NewTraceReader(
 87 | 				db,
 88 | 				"",
 89 | 				"",
 90 | 				cfg.GetSpansArchiveTable(),
 91 | 				cfg.Tenant,
 92 | 				cfg.MaxNumSpans,
 93 | 			),
 94 | 		}, nil
 95 | 	}
 96 | 	return &Store{
 97 | 		db: db,
 98 | 		writer: clickhousespanstore.NewSpanWriter(
 99 | 			logger,
100 | 			db,
101 | 			cfg.SpansIndexTable,
102 | 			cfg.SpansTable,
103 | 			cfg.Tenant,
104 | 			clickhousespanstore.Encoding(cfg.Encoding),
105 | 			cfg.BatchFlushInterval,
106 | 			cfg.BatchWriteSize,
107 | 			cfg.MaxSpanCount,
108 | 		),
109 | 		reader: clickhousespanstore.NewTraceReader(
110 | 			db,
111 | 			cfg.OperationsTable,
112 | 			cfg.SpansIndexTable,
113 | 			cfg.SpansTable,
114 | 			cfg.Tenant,
115 | 			cfg.MaxNumSpans,
116 | 		),
117 | 		archiveWriter: clickhousespanstore.NewSpanWriter(
118 | 			logger,
119 | 			db,
120 | 			"",
121 | 			cfg.GetSpansArchiveTable(),
122 | 			cfg.Tenant,
123 | 			clickhousespanstore.Encoding(cfg.Encoding),
124 | 			cfg.BatchFlushInterval,
125 | 			cfg.BatchWriteSize,
126 | 			cfg.MaxSpanCount,
127 | 		),
128 | 		archiveReader: clickhousespanstore.NewTraceReader(
129 | 			db,
130 | 			"",
131 | 			"",
132 | 			cfg.GetSpansArchiveTable(),
133 | 			cfg.Tenant,
134 | 			cfg.MaxNumSpans,
135 | 		),
136 | 	}, nil
137 | }
138 | 
139 | func connector(cfg Configuration) (*sql.DB, error) {
140 | 	var conn *sql.DB
141 | 
142 | 	options := clickhouse.Options{
143 | 		Addr: []string{sanitize(cfg.Address)},
144 | 		Auth: clickhouse.Auth{
145 | 			Database: cfg.Database,
146 | 			Username: cfg.Username,
147 | 			Password: cfg.Password,
148 | 		},
149 | 		Compression: &clickhouse.Compression{
150 | 			Method: clickhouse.CompressionLZ4,
151 | 		},
152 | 	}
153 | 
154 | 	if cfg.CaFile != "" {
155 | 		caCert, err := os.ReadFile(cfg.CaFile)
156 | 		if err != nil {
157 | 			return nil, err
158 | 		}
159 | 		caCertPool := x509.NewCertPool()
160 | 		caCertPool.AppendCertsFromPEM(caCert)
161 | 		options.TLS = &tls.Config{
162 | 			RootCAs: caCertPool,
163 | 		}
164 | 	}
165 | 	conn = clickhouse.OpenDB(&options)
166 | 
167 | 	if cfg.MaxOpenConns != nil {
168 | 		conn.SetMaxIdleConns(int(*cfg.MaxOpenConns))
169 | 	}
170 | 	if cfg.MaxIdleConns != nil {
171 | 		conn.SetMaxIdleConns(int(*cfg.MaxIdleConns))
172 | 	}
173 | 	if cfg.ConnMaxLifetimeMillis != nil {
174 | 		conn.SetConnMaxLifetime(time.Millisecond * time.Duration(*cfg.ConnMaxLifetimeMillis))
175 | 	}
176 | 	if cfg.ConnMaxIdleTimeMillis != nil {
177 | 		conn.SetConnMaxIdleTime(time.Millisecond * time.Duration(*cfg.ConnMaxIdleTimeMillis))
178 | 	}
179 | 
180 | 	if err := conn.Ping(); err != nil {
181 | 		return nil, err
182 | 	}
183 | 	return conn, nil
184 | }
185 | 
186 | type tableArgs struct {
187 | 	Database string
188 | 
189 | 	SpansIndexTable   clickhousespanstore.TableName
190 | 	SpansTable        clickhousespanstore.TableName
191 | 	OperationsTable   clickhousespanstore.TableName
192 | 	SpansArchiveTable clickhousespanstore.TableName
193 | 
194 | 	TTLTimestamp string
195 | 	TTLDate      string
196 | 
197 | 	Multitenant bool
198 | 	Replication bool
199 | }
200 | 
201 | type distributedTableArgs struct {
202 | 	Database string
203 | 	Table    clickhousespanstore.TableName
204 | 	Hash     string
205 | }
206 | 
207 | func render(templates *template.Template, filename string, args interface{}) string {
208 | 	var statement strings.Builder
209 | 	err := templates.ExecuteTemplate(&statement, filename, args)
210 | 	if err != nil {
211 | 		panic(err)
212 | 	}
213 | 	return statement.String()
214 | }
215 | 
216 | func runInitScripts(logger hclog.Logger, db *sql.DB, cfg Configuration) error {
217 | 	var (
218 | 		sqlStatements []string
219 | 		ttlTimestamp  string
220 | 		ttlDate       string
221 | 	)
222 | 	if cfg.TTLDays > 0 {
223 | 		ttlTimestamp = fmt.Sprintf("TTL timestamp + INTERVAL %d DAY DELETE", cfg.TTLDays)
224 | 		ttlDate = fmt.Sprintf("TTL date + INTERVAL %d DAY DELETE", cfg.TTLDays)
225 | 	}
226 | 	if cfg.InitSQLScriptsDir != "" {
227 | 		filePaths, err := walkMatch(cfg.InitSQLScriptsDir, "*.sql")
228 | 		if err != nil {
229 | 			return fmt.Errorf("could not list sql files: %q", err)
230 | 		}
231 | 		sort.Strings(filePaths)
232 | 		for _, f := range filePaths {
233 | 			sqlStatement, err := os.ReadFile(filepath.Clean(f))
234 | 			if err != nil {
235 | 				return err
236 | 			}
237 | 			sqlStatements = append(sqlStatements, string(sqlStatement))
238 | 		}
239 | 	}
240 | 	if *cfg.InitTables {
241 | 		templates := template.Must(template.ParseFS(jaegerclickhouse.SQLScripts, "sqlscripts/*.tmpl.sql"))
242 | 
243 | 		args := tableArgs{
244 | 			Database: cfg.Database,
245 | 
246 | 			SpansIndexTable:   cfg.SpansIndexTable,
247 | 			SpansTable:        cfg.SpansTable,
248 | 			OperationsTable:   cfg.OperationsTable,
249 | 			SpansArchiveTable: cfg.GetSpansArchiveTable(),
250 | 
251 | 			TTLTimestamp: ttlTimestamp,
252 | 			TTLDate:      ttlDate,
253 | 
254 | 			Multitenant: cfg.Tenant != "",
255 | 			Replication: cfg.Replication,
256 | 		}
257 | 
258 | 		if cfg.Replication {
259 | 			// Add "_local" to the local table names, and omit it from the distributed tables below
260 | 			args.SpansIndexTable = args.SpansIndexTable.ToLocal()
261 | 			args.SpansTable = args.SpansTable.ToLocal()
262 | 			args.OperationsTable = args.OperationsTable.ToLocal()
263 | 			args.SpansArchiveTable = args.SpansArchiveTable.ToLocal()
264 | 		}
265 | 
266 | 		sqlStatements = append(sqlStatements, render(templates, "jaeger-index.tmpl.sql", args))
267 | 		sqlStatements = append(sqlStatements, render(templates, "jaeger-operations.tmpl.sql", args))
268 | 		sqlStatements = append(sqlStatements, render(templates, "jaeger-spans.tmpl.sql", args))
269 | 		sqlStatements = append(sqlStatements, render(templates, "jaeger-spans-archive.tmpl.sql", args))
270 | 
271 | 		if cfg.Replication {
272 | 			// Now these tables omit the "_local" suffix
273 | 			distargs := distributedTableArgs{
274 | 				Table:    cfg.SpansTable,
275 | 				Database: cfg.Database,
276 | 				Hash:     "cityHash64(traceID)",
277 | 			}
278 | 			sqlStatements = append(sqlStatements, render(templates, "distributed-table.tmpl.sql", distargs))
279 | 
280 | 			distargs.Table = cfg.SpansIndexTable
281 | 			sqlStatements = append(sqlStatements, render(templates, "distributed-table.tmpl.sql", distargs))
282 | 
283 | 			distargs.Table = cfg.GetSpansArchiveTable()
284 | 			sqlStatements = append(sqlStatements, render(templates, "distributed-table.tmpl.sql", distargs))
285 | 
286 | 			distargs.Table = cfg.OperationsTable
287 | 			distargs.Hash = "rand()"
288 | 			sqlStatements = append(sqlStatements, render(templates, "distributed-table.tmpl.sql", distargs))
289 | 		}
290 | 	}
291 | 	return executeScripts(logger, sqlStatements, db)
292 | }
293 | 
294 | func (s *Store) SpanReader() spanstore.Reader {
295 | 	return s.reader
296 | }
297 | 
298 | func (s *Store) SpanWriter() spanstore.Writer {
299 | 	return s.writer
300 | }
301 | 
302 | func (s *Store) DependencyReader() dependencystore.Reader {
303 | 	return clickhousedependencystore.NewDependencyStore()
304 | }
305 | 
306 | func (s *Store) ArchiveSpanReader() spanstore.Reader {
307 | 	return s.archiveReader
308 | }
309 | 
310 | func (s *Store) ArchiveSpanWriter() spanstore.Writer {
311 | 	return s.archiveWriter
312 | }
313 | 
314 | func (s *Store) StreamingSpanWriter() spanstore.Writer {
315 | 	return s.writer
316 | }
317 | 
318 | func (s *Store) Close() error {
319 | 	return s.db.Close()
320 | }
321 | 
322 | func executeScripts(logger hclog.Logger, sqlStatements []string, db *sql.DB) error {
323 | 	tx, err := db.Begin()
324 | 	if err != nil {
325 | 		return err
326 | 	}
327 | 	committed := false
328 | 	defer func() {
329 | 		if !committed {
330 | 			_ = tx.Rollback()
331 | 		}
332 | 	}()
333 | 
334 | 	for _, statement := range sqlStatements {
335 | 		logger.Debug("Running SQL statement", "statement", statement)
336 | 		_, err = tx.Exec(statement)
337 | 		if err != nil {
338 | 			return fmt.Errorf("could not run sql %q: %q", statement, err)
339 | 		}
340 | 	}
341 | 	committed = true
342 | 	return tx.Commit()
343 | }
344 | 
345 | func walkMatch(root, pattern string) ([]string, error) {
346 | 	var matches []string
347 | 	err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
348 | 		if err != nil {
349 | 			return err
350 | 		}
351 | 		if info.IsDir() {
352 | 			return nil
353 | 		}
354 | 		if matched, err := filepath.Match(pattern, filepath.Base(path)); err != nil {
355 | 			return err
356 | 		} else if matched {
357 | 			matches = append(matches, path)
358 | 		}
359 | 		return nil
360 | 	})
361 | 	if err != nil {
362 | 		return nil, err
363 | 	}
364 | 	return matches, nil
365 | }
366 | 
367 | // Earlier version of clickhouse-go used to expect address as tcp://host:port
368 | // while newer version of clickhouse-go expect address as host:port (without scheme)
369 | // so to maintain backward compatibility we clean it up
370 | func sanitize(addr string) string {
371 | 	return strings.TrimPrefix(addr, "tcp://")
372 | }
373 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/reader.go:
--------------------------------------------------------------------------------
  1 | package clickhousespanstore
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 	"encoding/json"
  7 | 	"errors"
  8 | 	"fmt"
  9 | 	"strings"
 10 | 	"time"
 11 | 
 12 | 	"github.com/gogo/protobuf/proto"
 13 | 	"github.com/jaegertracing/jaeger/model"
 14 | 	"github.com/jaegertracing/jaeger/storage/spanstore"
 15 | 	opentracing "github.com/opentracing/opentracing-go"
 16 | )
 17 | 
 18 | const (
 19 | 	minTimespanForProgressiveSearch       = time.Hour
 20 | 	minTimespanForProgressiveSearchMargin = time.Minute
 21 | 	maxProgressiveSteps                   = 4
 22 | )
 23 | 
 24 | var (
 25 | 	errNoOperationsTable = errors.New("no operations table supplied")
 26 | 	errNoIndexTable      = errors.New("no index table supplied")
 27 | 	errStartTimeRequired = errors.New("start time is required for search queries")
 28 | )
 29 | 
 30 | // TraceReader for reading spans from ClickHouse
 31 | type TraceReader struct {
 32 | 	db              *sql.DB
 33 | 	operationsTable TableName
 34 | 	indexTable      TableName
 35 | 	spansTable      TableName
 36 | 	tenant          string
 37 | 	maxNumSpans     uint
 38 | }
 39 | 
 40 | var _ spanstore.Reader = (*TraceReader)(nil)
 41 | 
 42 | // NewTraceReader returns a TraceReader for the database
 43 | func NewTraceReader(db *sql.DB, operationsTable, indexTable, spansTable TableName, tenant string, maxNumSpans uint) *TraceReader {
 44 | 	return &TraceReader{
 45 | 		db:              db,
 46 | 		operationsTable: operationsTable,
 47 | 		indexTable:      indexTable,
 48 | 		spansTable:      spansTable,
 49 | 		tenant:          tenant,
 50 | 		maxNumSpans:     maxNumSpans,
 51 | 	}
 52 | }
 53 | 
 54 | func (r *TraceReader) getTraces(ctx context.Context, traceIDs []model.TraceID) ([]*model.Trace, error) {
 55 | 	returning := make([]*model.Trace, 0, len(traceIDs))
 56 | 
 57 | 	if len(traceIDs) == 0 {
 58 | 		return returning, nil
 59 | 	}
 60 | 
 61 | 	span, _ := opentracing.StartSpanFromContext(ctx, "getTraces")
 62 | 	defer span.Finish()
 63 | 
 64 | 	args := make([]interface{}, len(traceIDs))
 65 | 	for i, traceID := range traceIDs {
 66 | 		args[i] = traceID.String()
 67 | 	}
 68 | 
 69 | 	// It's more efficient to do PREWHERE on traceID to the only read needed models:
 70 | 	// * https://clickhouse.tech/docs/en/sql-reference/statements/select/prewhere/
 71 | 	//nolint:gosec  , G201: SQL string formatting
 72 | 	query := fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (%s)", r.spansTable, "?"+strings.Repeat(",?", len(traceIDs)-1))
 73 | 
 74 | 	if r.tenant != "" {
 75 | 		query += " AND tenant = ?"
 76 | 		args = append(args, r.tenant)
 77 | 	}
 78 | 
 79 | 	if r.maxNumSpans > 0 {
 80 | 		query += fmt.Sprintf(" ORDER BY timestamp LIMIT %d BY traceID", r.maxNumSpans)
 81 | 	}
 82 | 
 83 | 	span.SetTag("db.statement", query)
 84 | 	span.SetTag("db.args", args)
 85 | 
 86 | 	rows, err := r.db.QueryContext(ctx, query, args...)
 87 | 	if err != nil {
 88 | 		return nil, err
 89 | 	}
 90 | 
 91 | 	defer rows.Close()
 92 | 
 93 | 	traces := map[model.TraceID]*model.Trace{}
 94 | 
 95 | 	for rows.Next() {
 96 | 		var serialized string
 97 | 
 98 | 		err = rows.Scan(&serialized)
 99 | 		if err != nil {
100 | 			return nil, err
101 | 		}
102 | 
103 | 		span := model.Span{}
104 | 
105 | 		if serialized[0] == '{' {
106 | 			err = json.Unmarshal([]byte(serialized), &span)
107 | 		} else {
108 | 			err = proto.Unmarshal([]byte(serialized), &span)
109 | 		}
110 | 
111 | 		if err != nil {
112 | 			return nil, err
113 | 		}
114 | 
115 | 		if _, ok := traces[span.TraceID]; !ok {
116 | 			traces[span.TraceID] = &model.Trace{}
117 | 		}
118 | 
119 | 		traces[span.TraceID].Spans = append(traces[span.TraceID].Spans, &span)
120 | 	}
121 | 
122 | 	if err := rows.Err(); err != nil {
123 | 		return nil, err
124 | 	}
125 | 
126 | 	for _, traceID := range traceIDs {
127 | 		if trace, ok := traces[traceID]; ok {
128 | 			returning = append(returning, trace)
129 | 		}
130 | 	}
131 | 
132 | 	return returning, nil
133 | }
134 | 
135 | // GetTrace takes a traceID and returns a Trace associated with that traceID
136 | func (r *TraceReader) GetTrace(ctx context.Context, traceID model.TraceID) (*model.Trace, error) {
137 | 	span, ctx := opentracing.StartSpanFromContext(ctx, "GetTrace")
138 | 	defer span.Finish()
139 | 
140 | 	traces, err := r.getTraces(ctx, []model.TraceID{traceID})
141 | 	if err != nil {
142 | 		return nil, err
143 | 	}
144 | 
145 | 	if len(traces) == 0 {
146 | 		return nil, spanstore.ErrTraceNotFound
147 | 	}
148 | 
149 | 	return traces[0], nil
150 | }
151 | 
152 | func (r *TraceReader) getStrings(ctx context.Context, sql string, args ...interface{}) ([]string, error) {
153 | 	rows, err := r.db.QueryContext(ctx, sql, args...)
154 | 	if err != nil {
155 | 		return nil, err
156 | 	}
157 | 
158 | 	defer rows.Close()
159 | 
160 | 	values := make([]string, 0)
161 | 
162 | 	for rows.Next() {
163 | 		var value string
164 | 		if err := rows.Scan(&value); err != nil {
165 | 			return nil, err
166 | 		}
167 | 		values = append(values, value)
168 | 	}
169 | 
170 | 	if err := rows.Err(); err != nil {
171 | 		return nil, err
172 | 	}
173 | 
174 | 	return values, nil
175 | }
176 | 
177 | // GetServices fetches the sorted service list that have not expired
178 | func (r *TraceReader) GetServices(ctx context.Context) ([]string, error) {
179 | 	span, ctx := opentracing.StartSpanFromContext(ctx, "GetServices")
180 | 	defer span.Finish()
181 | 
182 | 	if r.operationsTable == "" {
183 | 		return nil, errNoOperationsTable
184 | 	}
185 | 
186 | 	query := fmt.Sprintf("SELECT service FROM %s", r.operationsTable)
187 | 	args := make([]interface{}, 0)
188 | 
189 | 	if r.tenant != "" {
190 | 		query += " WHERE tenant = ?"
191 | 		args = append(args, r.tenant)
192 | 	}
193 | 
194 | 	query += " GROUP BY service"
195 | 	span.SetTag("db.statement", query)
196 | 	span.SetTag("db.args", args)
197 | 
198 | 	return r.getStrings(ctx, query, args...)
199 | }
200 | 
201 | // GetOperations fetches operations in the service and empty slice if service does not exists
202 | func (r *TraceReader) GetOperations(
203 | 	ctx context.Context,
204 | 	params spanstore.OperationQueryParameters,
205 | ) ([]spanstore.Operation, error) {
206 | 	span, ctx := opentracing.StartSpanFromContext(ctx, "GetOperations")
207 | 	defer span.Finish()
208 | 
209 | 	if r.operationsTable == "" {
210 | 		return nil, errNoOperationsTable
211 | 	}
212 | 
213 | 	//nolint:gosec  , G201: SQL string formatting
214 | 	query := fmt.Sprintf("SELECT operation, spankind FROM %s WHERE", r.operationsTable)
215 | 	args := make([]interface{}, 0)
216 | 
217 | 	if r.tenant != "" {
218 | 		query += " tenant = ? AND"
219 | 		args = append(args, r.tenant)
220 | 	}
221 | 
222 | 	query += " service = ? GROUP BY operation, spankind ORDER BY operation"
223 | 	args = append(args, params.ServiceName)
224 | 
225 | 	span.SetTag("db.statement", query)
226 | 	span.SetTag("db.args", args)
227 | 
228 | 	rows, err := r.db.QueryContext(ctx, query, args...)
229 | 	if err != nil {
230 | 		return nil, err
231 | 	}
232 | 
233 | 	defer rows.Close()
234 | 
235 | 	operations := make([]spanstore.Operation, 0)
236 | 
237 | 	for rows.Next() {
238 | 		var name, spanKind string
239 | 		if err := rows.Scan(&name, &spanKind); err != nil {
240 | 			return nil, err
241 | 		}
242 | 		operation := spanstore.Operation{Name: name}
243 | 		if spanKind != "" {
244 | 			operation.SpanKind = spanKind
245 | 		}
246 | 		operations = append(operations, operation)
247 | 	}
248 | 
249 | 	if err := rows.Err(); err != nil {
250 | 		return nil, err
251 | 	}
252 | 
253 | 	return operations, nil
254 | }
255 | 
256 | // FindTraces retrieves traces that match the traceQuery
257 | func (r *TraceReader) FindTraces(ctx context.Context, query *spanstore.TraceQueryParameters) ([]*model.Trace, error) {
258 | 	span, ctx := opentracing.StartSpanFromContext(ctx, "FindTraces")
259 | 	defer span.Finish()
260 | 
261 | 	traceIDs, err := r.FindTraceIDs(ctx, query)
262 | 	if err != nil {
263 | 		return nil, err
264 | 	}
265 | 
266 | 	return r.getTraces(ctx, traceIDs)
267 | }
268 | 
269 | // FindTraceIDs retrieves only the TraceIDs that match the traceQuery, but not the trace data
270 | func (r *TraceReader) FindTraceIDs(ctx context.Context, params *spanstore.TraceQueryParameters) ([]model.TraceID, error) {
271 | 	span, ctx := opentracing.StartSpanFromContext(ctx, "FindTraceIDs")
272 | 	defer span.Finish()
273 | 
274 | 	if params.StartTimeMin.IsZero() {
275 | 		return nil, errStartTimeRequired
276 | 	}
277 | 
278 | 	end := params.StartTimeMax
279 | 	if end.IsZero() {
280 | 		end = time.Now()
281 | 	}
282 | 
283 | 	fullTimeSpan := end.Sub(params.StartTimeMin)
284 | 
285 | 	if fullTimeSpan < minTimespanForProgressiveSearch+minTimespanForProgressiveSearchMargin {
286 | 		return r.findTraceIDsInRange(ctx, params, params.StartTimeMin, end, nil)
287 | 	}
288 | 
289 | 	timeSpan := fullTimeSpan
290 | 	for step := 0; step < maxProgressiveSteps; step++ {
291 | 		timeSpan /= 2
292 | 	}
293 | 
294 | 	if timeSpan < minTimespanForProgressiveSearch {
295 | 		timeSpan = minTimespanForProgressiveSearch
296 | 	}
297 | 
298 | 	found := make([]model.TraceID, 0)
299 | 
300 | 	for step := 0; step < maxProgressiveSteps; step++ {
301 | 		if len(found) >= params.NumTraces {
302 | 			break
303 | 		}
304 | 
305 | 		// last step has to take care of the whole remainder
306 | 		if step == maxProgressiveSteps-1 {
307 | 			timeSpan = fullTimeSpan
308 | 		}
309 | 
310 | 		start := end.Add(-timeSpan)
311 | 		if start.Before(params.StartTimeMin) {
312 | 			start = params.StartTimeMin
313 | 		}
314 | 
315 | 		if start.After(end) {
316 | 			break
317 | 		}
318 | 
319 | 		foundInRange, err := r.findTraceIDsInRange(ctx, params, start, end, found)
320 | 		if err != nil {
321 | 			return nil, err
322 | 		}
323 | 
324 | 		found = append(found, foundInRange...)
325 | 
326 | 		end = start
327 | 		timeSpan *= 2
328 | 	}
329 | 
330 | 	return found, nil
331 | }
332 | 
333 | func (r *TraceReader) findTraceIDsInRange(ctx context.Context, params *spanstore.TraceQueryParameters, start, end time.Time, skip []model.TraceID) ([]model.TraceID, error) {
334 | 	span, ctx := opentracing.StartSpanFromContext(ctx, "findTraceIDsInRange")
335 | 	defer span.Finish()
336 | 
337 | 	if end.Before(start) || end == start {
338 | 		return []model.TraceID{}, nil
339 | 	}
340 | 
341 | 	span.SetTag("range", end.Sub(start).String())
342 | 
343 | 	if r.indexTable == "" {
344 | 		return nil, errNoIndexTable
345 | 	}
346 | 
347 | 	query := fmt.Sprintf("SELECT DISTINCT traceID FROM %s WHERE service = ?", r.indexTable)
348 | 	args := []interface{}{params.ServiceName}
349 | 
350 | 	if r.tenant != "" {
351 | 		query += " AND tenant = ?"
352 | 		args = append(args, r.tenant)
353 | 	}
354 | 
355 | 	if params.OperationName != "" {
356 | 		query += " AND operation = ?"
357 | 		args = append(args, params.OperationName)
358 | 	}
359 | 
360 | 	query += " AND timestamp >= ? AND timestamp <= ?"
361 | 	args = append(args, start, end)
362 | 
363 | 	if params.DurationMin != 0 {
364 | 		query += " AND durationUs >= ?"
365 | 		args = append(args, params.DurationMin.Microseconds())
366 | 	}
367 | 
368 | 	if params.DurationMax != 0 {
369 | 		query += " AND durationUs <= ?"
370 | 		args = append(args, params.DurationMax.Microseconds())
371 | 	}
372 | 
373 | 	for key, value := range params.Tags {
374 | 		query += " AND has(tags.key, ?) AND has(splitByChar(',', tags.value[indexOf(tags.key, ?)]), ?)"
375 | 		args = append(args, key, key, value)
376 | 	}
377 | 
378 | 	if len(skip) > 0 {
379 | 		query += fmt.Sprintf(" AND traceID NOT IN (%s)", "?"+strings.Repeat(",?", len(skip)-1))
380 | 		for _, traceID := range skip {
381 | 			args = append(args, traceID.String())
382 | 		}
383 | 	}
384 | 
385 | 	// Sorting by service is required for early termination of primary key scan:
386 | 	// * https://github.com/ClickHouse/ClickHouse/issues/7102
387 | 	query += " ORDER BY service, timestamp DESC LIMIT ?"
388 | 	args = append(args, params.NumTraces-len(skip))
389 | 
390 | 	span.SetTag("db.statement", query)
391 | 	span.SetTag("db.args", args)
392 | 
393 | 	traceIDStrings, err := r.getStrings(ctx, query, args...)
394 | 	if err != nil {
395 | 		return nil, err
396 | 	}
397 | 
398 | 	traceIDs := make([]model.TraceID, len(traceIDStrings))
399 | 	for i, traceIDString := range traceIDStrings {
400 | 		traceID, err := model.TraceIDFromString(traceIDString)
401 | 		if err != nil {
402 | 			return nil, err
403 | 		}
404 | 		traceIDs[i] = traceID
405 | 	}
406 | 
407 | 	return traceIDs, nil
408 | }
409 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/worker_test.go:
--------------------------------------------------------------------------------
  1 | package clickhousespanstore
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"database/sql/driver"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"math/rand"
  9 | 	"strconv"
 10 | 	"testing"
 11 | 	"time"
 12 | 
 13 | 	sqlmock "github.com/DATA-DOG/go-sqlmock"
 14 | 	"github.com/gogo/protobuf/proto"
 15 | 	hclog "github.com/hashicorp/go-hclog"
 16 | 	"github.com/jaegertracing/jaeger/model"
 17 | 	"github.com/stretchr/testify/assert"
 18 | 	"github.com/stretchr/testify/require"
 19 | 
 20 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousespanstore/mocks"
 21 | )
 22 | 
 23 | const (
 24 | 	testTagCount      = 10
 25 | 	testLogCount      = 5
 26 | 	testLogFieldCount = 5
 27 | 	testIndexTable    = "test_index_table"
 28 | 	testSpansTable    = "test_spans_table"
 29 | 	testTenant        = "test_tenant"
 30 | )
 31 | 
 32 | type expectation struct {
 33 | 	preparation string
 34 | 	execArgs    [][]driver.Value
 35 | }
 36 | 
 37 | var (
 38 | 	errorMock = fmt.Errorf("error mock")
 39 | 	process   = model.NewProcess("test_service", []model.KeyValue{model.String("test_process_key", "test_process_value")})
 40 | 	testSpan  = model.Span{
 41 | 		TraceID:       model.NewTraceID(1, 2),
 42 | 		SpanID:        model.NewSpanID(3),
 43 | 		OperationName: "GET /unit_test",
 44 | 		StartTime:     testStartTime,
 45 | 		Process:       process,
 46 | 		Tags:          []model.KeyValue{model.String("test_string_key", "test_string_value"), model.Int64("test_int64_key", 4)},
 47 | 		Logs:          []model.Log{{Timestamp: testStartTime, Fields: []model.KeyValue{model.String("test_log_key", "test_log_value")}}},
 48 | 		Duration:      time.Minute,
 49 | 	}
 50 | 	testSpans             = []*model.Span{&testSpan}
 51 | 	keys, values          = uniqueTagsForSpan(&testSpan)
 52 | 	indexWriteExpectation = expectation{
 53 | 		preparation: fmt.Sprintf("INSERT INTO %s (timestamp, traceID, service, operation, durationUs, tags.key, tags.value) VALUES (?, ?, ?, ?, ?, ?, ?)", testIndexTable),
 54 | 		execArgs: [][]driver.Value{{
 55 | 			testSpan.StartTime,
 56 | 			testSpan.TraceID.String(),
 57 | 			testSpan.Process.GetServiceName(),
 58 | 			testSpan.OperationName,
 59 | 			uint64(testSpan.Duration.Microseconds()),
 60 | 			keys,
 61 | 			values,
 62 | 		}}}
 63 | 	indexWriteExpectationTenant = expectation{
 64 | 		preparation: fmt.Sprintf("INSERT INTO %s (tenant, timestamp, traceID, service, operation, durationUs, tags.key, tags.value) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", testIndexTable),
 65 | 		execArgs: [][]driver.Value{{
 66 | 			testTenant,
 67 | 			testSpan.StartTime,
 68 | 			testSpan.TraceID.String(),
 69 | 			testSpan.Process.GetServiceName(),
 70 | 			testSpan.OperationName,
 71 | 			uint64(testSpan.Duration.Microseconds()),
 72 | 			keys,
 73 | 			values,
 74 | 		}}}
 75 | 	writeBatchLogs = []mocks.LogMock{{Msg: "Writing spans", Args: []interface{}{"size", len(testSpans)}}}
 76 | )
 77 | 
 78 | func TestSpanWriter_TagKeyValue(t *testing.T) {
 79 | 	tests := map[string]struct {
 80 | 		kv       model.KeyValue
 81 | 		expected string
 82 | 	}{
 83 | 		"string value":       {kv: model.String("tag_key", "tag_string_value"), expected: "tag_string_value"},
 84 | 		"true value":         {kv: model.Bool("tag_key", true), expected: "true"},
 85 | 		"false value":        {kv: model.Bool("tag_key", false), expected: "false"},
 86 | 		"positive int value": {kv: model.Int64("tag_key", 1203912), expected: "1203912"},
 87 | 		"negative int value": {kv: model.Int64("tag_key", -1203912), expected: "-1203912"},
 88 | 		"float value":        {kv: model.Float64("tag_key", 0.005009), expected: "0.005009"},
 89 | 	}
 90 | 	for name, test := range tests {
 91 | 		t.Run(name, func(t *testing.T) {
 92 | 			assert.Equal(t, test.expected, tagValue(&test.kv), "Incorrect tag value string")
 93 | 		})
 94 | 	}
 95 | }
 96 | 
 97 | func TestSpanWriter_UniqueTagsForSpan(t *testing.T) {
 98 | 	tests := map[string]struct {
 99 | 		tags           []model.KeyValue
100 | 		processTags    []model.KeyValue
101 | 		logs           []model.Log
102 | 		expectedKeys   []string
103 | 		expectedValues []string
104 | 	}{
105 | 		"default": {
106 | 			tags:           []model.KeyValue{model.String("key2", "value")},
107 | 			processTags:    []model.KeyValue{model.Int64("key3", 412)},
108 | 			logs:           []model.Log{{Fields: []model.KeyValue{model.Float64("key1", .5)}}},
109 | 			expectedKeys:   []string{"key1", "key2", "key3"},
110 | 			expectedValues: []string{"0.5", "value", "412"},
111 | 		},
112 | 		"repeating tags": {
113 | 			tags:           []model.KeyValue{model.String("key2", "value"), model.String("key2", "value")},
114 | 			processTags:    []model.KeyValue{model.Int64("key3", 412)},
115 | 			logs:           []model.Log{{Fields: []model.KeyValue{model.Float64("key1", .5)}}},
116 | 			expectedKeys:   []string{"key1", "key2", "key3"},
117 | 			expectedValues: []string{"0.5", "value", "412"},
118 | 		},
119 | 		"repeating keys": {
120 | 			tags:           []model.KeyValue{model.String("key2", "value_a"), model.String("key2", "value_b")},
121 | 			processTags:    []model.KeyValue{model.Int64("key3", 412)},
122 | 			logs:           []model.Log{{Fields: []model.KeyValue{model.Float64("key1", .5)}}},
123 | 			expectedKeys:   []string{"key1", "key2", "key3"},
124 | 			expectedValues: []string{"0.5", "value_a,value_b", "412"},
125 | 		},
126 | 		"repeating values": {
127 | 			tags:           []model.KeyValue{model.String("key2", "value"), model.Int64("key4", 412)},
128 | 			processTags:    []model.KeyValue{model.Int64("key3", 412)},
129 | 			logs:           []model.Log{{Fields: []model.KeyValue{model.Float64("key1", .5)}}},
130 | 			expectedKeys:   []string{"key1", "key2", "key3", "key4"},
131 | 			expectedValues: []string{"0.5", "value", "412", "412"},
132 | 		},
133 | 	}
134 | 	for name, test := range tests {
135 | 		t.Run(name, func(t *testing.T) {
136 | 			process := model.Process{Tags: test.processTags}
137 | 			span := model.Span{Tags: test.tags, Process: &process, Logs: test.logs}
138 | 			actualKeys, actualValues := uniqueTagsForSpan(&span)
139 | 			assert.Equal(t, test.expectedKeys, actualKeys)
140 | 			assert.Equal(t, test.expectedValues, actualValues)
141 | 		})
142 | 	}
143 | }
144 | 
145 | func TestSpanWriter_General(t *testing.T) {
146 | 	spanJSON, err := json.Marshal(&testSpan)
147 | 	require.NoError(t, err)
148 | 	modelWriteExpectationJSON := getModelWriteExpectation(spanJSON, "")
149 | 	modelWriteExpectationJSONTenant := getModelWriteExpectation(spanJSON, testTenant)
150 | 	spanProto, err := proto.Marshal(&testSpan)
151 | 	require.NoError(t, err)
152 | 	modelWriteExpectationProto := getModelWriteExpectation(spanProto, "")
153 | 	modelWriteExpectationProtoTenant := getModelWriteExpectation(spanProto, testTenant)
154 | 	tests := map[string]struct {
155 | 		encoding     Encoding
156 | 		indexTable   TableName
157 | 		tenant       string
158 | 		spans        []*model.Span
159 | 		expectations []expectation
160 | 		action       func(writeWorker *WriteWorker, spans []*model.Span) error
161 | 		expectedLogs []mocks.LogMock
162 | 	}{
163 | 		"write index batch": {
164 | 			encoding:     EncodingJSON,
165 | 			indexTable:   testIndexTable,
166 | 			spans:        testSpans,
167 | 			expectations: []expectation{indexWriteExpectation},
168 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeIndexBatch(spans) },
169 | 		},
170 | 		"write index tenant batch": {
171 | 			encoding:     EncodingJSON,
172 | 			indexTable:   testIndexTable,
173 | 			tenant:       testTenant,
174 | 			spans:        testSpans,
175 | 			expectations: []expectation{indexWriteExpectationTenant},
176 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeIndexBatch(spans) },
177 | 		},
178 | 		"write model batch JSON": {
179 | 			encoding:     EncodingJSON,
180 | 			indexTable:   testIndexTable,
181 | 			spans:        testSpans,
182 | 			expectations: []expectation{modelWriteExpectationJSON},
183 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeModelBatch(spans) },
184 | 		},
185 | 		"write model tenant batch JSON": {
186 | 			encoding:     EncodingJSON,
187 | 			indexTable:   testIndexTable,
188 | 			tenant:       testTenant,
189 | 			spans:        testSpans,
190 | 			expectations: []expectation{modelWriteExpectationJSONTenant},
191 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeModelBatch(spans) },
192 | 		},
193 | 		"write model batch Proto": {
194 | 			encoding:     EncodingProto,
195 | 			indexTable:   testIndexTable,
196 | 			spans:        testSpans,
197 | 			expectations: []expectation{modelWriteExpectationProto},
198 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeModelBatch(spans) },
199 | 		},
200 | 		"write model tenant batch Proto": {
201 | 			encoding:     EncodingProto,
202 | 			indexTable:   testIndexTable,
203 | 			tenant:       testTenant,
204 | 			spans:        testSpans,
205 | 			expectations: []expectation{modelWriteExpectationProtoTenant},
206 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeModelBatch(spans) },
207 | 		},
208 | 		"write batch no index JSON": {
209 | 			encoding:     EncodingJSON,
210 | 			indexTable:   "",
211 | 			spans:        testSpans,
212 | 			expectations: []expectation{modelWriteExpectationJSON},
213 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeBatch(spans) },
214 | 			expectedLogs: writeBatchLogs,
215 | 		},
216 | 		"write batch no index Proto": {
217 | 			encoding:     EncodingProto,
218 | 			indexTable:   "",
219 | 			spans:        testSpans,
220 | 			expectations: []expectation{modelWriteExpectationProto},
221 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeBatch(spans) },
222 | 			expectedLogs: writeBatchLogs,
223 | 		},
224 | 		"write batch JSON": {
225 | 			encoding:     EncodingJSON,
226 | 			indexTable:   testIndexTable,
227 | 			spans:        testSpans,
228 | 			expectations: []expectation{modelWriteExpectationJSON, indexWriteExpectation},
229 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeBatch(spans) },
230 | 			expectedLogs: writeBatchLogs,
231 | 		},
232 | 		"write batch tenant JSON": {
233 | 			encoding:     EncodingJSON,
234 | 			indexTable:   testIndexTable,
235 | 			tenant:       testTenant,
236 | 			spans:        testSpans,
237 | 			expectations: []expectation{modelWriteExpectationJSONTenant, indexWriteExpectationTenant},
238 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeBatch(spans) },
239 | 			expectedLogs: writeBatchLogs,
240 | 		},
241 | 		"write batch Proto": {
242 | 			encoding:     EncodingProto,
243 | 			indexTable:   testIndexTable,
244 | 			spans:        testSpans,
245 | 			expectations: []expectation{modelWriteExpectationProto, indexWriteExpectation},
246 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeBatch(spans) },
247 | 			expectedLogs: writeBatchLogs,
248 | 		},
249 | 		"write batch tenant Proto": {
250 | 			encoding:     EncodingProto,
251 | 			indexTable:   testIndexTable,
252 | 			tenant:       testTenant,
253 | 			spans:        testSpans,
254 | 			expectations: []expectation{modelWriteExpectationProtoTenant, indexWriteExpectationTenant},
255 | 			action:       func(writeWorker *WriteWorker, spans []*model.Span) error { return writeWorker.writeBatch(spans) },
256 | 			expectedLogs: writeBatchLogs,
257 | 		},
258 | 	}
259 | 
260 | 	for name, test := range tests {
261 | 		t.Run(name, func(t *testing.T) {
262 | 			db, mock, err := mocks.GetDbMock()
263 | 			require.NoError(t, err, "an error was not expected when opening a stub database connection")
264 | 			defer db.Close()
265 | 
266 | 			spyLogger := mocks.NewSpyLogger()
267 | 			worker := getWriteWorker(spyLogger, db, test.encoding, test.indexTable, test.tenant)
268 | 
269 | 			for _, expectation := range test.expectations {
270 | 				mock.ExpectBegin()
271 | 				prep := mock.ExpectPrepare(expectation.preparation)
272 | 				for _, args := range expectation.execArgs {
273 | 					prep.ExpectExec().WithArgs(args...).WillReturnResult(sqlmock.NewResult(1, 1))
274 | 				}
275 | 				mock.ExpectCommit()
276 | 			}
277 | 
278 | 			assert.NoError(t, test.action(&worker, test.spans))
279 | 			assert.NoError(t, mock.ExpectationsWereMet())
280 | 			spyLogger.AssertLogsOfLevelEqual(t, hclog.Debug, test.expectedLogs)
281 | 		})
282 | 	}
283 | }
284 | 
285 | func TestSpanWriter_BeginError(t *testing.T) {
286 | 	tests := map[string]struct {
287 | 		action       func(writeWorker *WriteWorker) error
288 | 		expectedLogs []mocks.LogMock
289 | 	}{
290 | 		"write model batch": {action: func(writeWorker *WriteWorker) error { return writeWorker.writeModelBatch(testSpans) }},
291 | 		"write index batch": {action: func(writeWorker *WriteWorker) error { return writeWorker.writeIndexBatch(testSpans) }},
292 | 		"write batch": {
293 | 			action:       func(writeWorker *WriteWorker) error { return writeWorker.writeBatch(testSpans) },
294 | 			expectedLogs: writeBatchLogs,
295 | 		},
296 | 	}
297 | 
298 | 	for name, test := range tests {
299 | 		t.Run(name, func(t *testing.T) {
300 | 			db, mock, err := mocks.GetDbMock()
301 | 			require.NoError(t, err, "an error was not expected when opening a stub database connection")
302 | 			defer db.Close()
303 | 
304 | 			spyLogger := mocks.NewSpyLogger()
305 | 			writeWorker := getWriteWorker(spyLogger, db, EncodingJSON, testIndexTable, "")
306 | 
307 | 			mock.ExpectBegin().WillReturnError(errorMock)
308 | 
309 | 			assert.ErrorIs(t, test.action(&writeWorker), errorMock)
310 | 			assert.NoError(t, mock.ExpectationsWereMet())
311 | 			spyLogger.AssertLogsOfLevelEqual(t, hclog.Debug, test.expectedLogs)
312 | 		})
313 | 	}
314 | }
315 | 
316 | func TestSpanWriter_PrepareError(t *testing.T) {
317 | 	spanJSON, err := json.Marshal(&testSpan)
318 | 	require.NoError(t, err)
319 | 	modelWriteExpectation := getModelWriteExpectation(spanJSON, "")
320 | 	modelWriteExpectationTenant := getModelWriteExpectation(spanJSON, testTenant)
321 | 
322 | 	tests := map[string]struct {
323 | 		action       func(writeWorker *WriteWorker) error
324 | 		tenant       string
325 | 		expectation  expectation
326 | 		expectedLogs []mocks.LogMock
327 | 	}{
328 | 		"write model batch": {
329 | 			action:      func(writeWorker *WriteWorker) error { return writeWorker.writeModelBatch(testSpans) },
330 | 			expectation: modelWriteExpectation,
331 | 		},
332 | 		"write model tenant batch": {
333 | 			action:      func(writeWorker *WriteWorker) error { return writeWorker.writeModelBatch(testSpans) },
334 | 			tenant:      testTenant,
335 | 			expectation: modelWriteExpectationTenant,
336 | 		},
337 | 		"write index batch": {
338 | 			action:      func(writeWorker *WriteWorker) error { return writeWorker.writeIndexBatch(testSpans) },
339 | 			expectation: indexWriteExpectation,
340 | 		},
341 | 		"write index tenant batch": {
342 | 			action:      func(writeWorker *WriteWorker) error { return writeWorker.writeIndexBatch(testSpans) },
343 | 			tenant:      testTenant,
344 | 			expectation: indexWriteExpectationTenant,
345 | 		},
346 | 		"write batch": {
347 | 			action:       func(writeWorker *WriteWorker) error { return writeWorker.writeBatch(testSpans) },
348 | 			expectation:  modelWriteExpectation,
349 | 			expectedLogs: writeBatchLogs,
350 | 		},
351 | 		"write tenant batch": {
352 | 			action:       func(writeWorker *WriteWorker) error { return writeWorker.writeBatch(testSpans) },
353 | 			tenant:       testTenant,
354 | 			expectation:  modelWriteExpectationTenant,
355 | 			expectedLogs: writeBatchLogs,
356 | 		},
357 | 	}
358 | 
359 | 	for name, test := range tests {
360 | 		t.Run(name, func(t *testing.T) {
361 | 			db, mock, err := mocks.GetDbMock()
362 | 			require.NoError(t, err, "an error was not expected when opening a stub database connection")
363 | 			defer db.Close()
364 | 
365 | 			spyLogger := mocks.NewSpyLogger()
366 | 			spanWriter := getWriteWorker(spyLogger, db, EncodingJSON, testIndexTable, test.tenant)
367 | 
368 | 			mock.ExpectBegin()
369 | 			mock.ExpectPrepare(test.expectation.preparation).WillReturnError(errorMock)
370 | 			mock.ExpectRollback()
371 | 
372 | 			assert.ErrorIs(t, test.action(&spanWriter), errorMock)
373 | 			assert.NoError(t, mock.ExpectationsWereMet())
374 | 			spyLogger.AssertLogsOfLevelEqual(t, hclog.Debug, test.expectedLogs)
375 | 		})
376 | 	}
377 | }
378 | 
379 | func TestSpanWriter_ExecError(t *testing.T) {
380 | 	spanJSON, err := json.Marshal(&testSpan)
381 | 	require.NoError(t, err)
382 | 	modelWriteExpectation := getModelWriteExpectation(spanJSON, "")
383 | 	modelWriteExpectationTenant := getModelWriteExpectation(spanJSON, testTenant)
384 | 	tests := map[string]struct {
385 | 		indexTable   TableName
386 | 		tenant       string
387 | 		expectations []expectation
388 | 		action       func(writer *WriteWorker) error
389 | 		expectedLogs []mocks.LogMock
390 | 	}{
391 | 		"write model batch": {
392 | 			indexTable:   testIndexTable,
393 | 			expectations: []expectation{modelWriteExpectation},
394 | 			action:       func(writer *WriteWorker) error { return writer.writeModelBatch(testSpans) },
395 | 		},
396 | 		"write model tenant batch": {
397 | 			indexTable:   testIndexTable,
398 | 			tenant:       testTenant,
399 | 			expectations: []expectation{modelWriteExpectationTenant},
400 | 			action:       func(writer *WriteWorker) error { return writer.writeModelBatch(testSpans) },
401 | 		},
402 | 		"write index batch": {
403 | 			indexTable:   testIndexTable,
404 | 			expectations: []expectation{indexWriteExpectation},
405 | 			action:       func(writer *WriteWorker) error { return writer.writeIndexBatch(testSpans) },
406 | 		},
407 | 		"write index tenant batch": {
408 | 			indexTable:   testIndexTable,
409 | 			tenant:       testTenant,
410 | 			expectations: []expectation{indexWriteExpectationTenant},
411 | 			action:       func(writer *WriteWorker) error { return writer.writeIndexBatch(testSpans) },
412 | 		},
413 | 		"write batch no index": {
414 | 			indexTable:   "",
415 | 			expectations: []expectation{modelWriteExpectation},
416 | 			action:       func(writer *WriteWorker) error { return writer.writeBatch(testSpans) },
417 | 			expectedLogs: writeBatchLogs,
418 | 		},
419 | 		"write batch": {
420 | 			indexTable:   testIndexTable,
421 | 			expectations: []expectation{modelWriteExpectation, indexWriteExpectation},
422 | 			action:       func(writer *WriteWorker) error { return writer.writeBatch(testSpans) },
423 | 			expectedLogs: writeBatchLogs,
424 | 		},
425 | 		"write tenant batch": {
426 | 			indexTable:   testIndexTable,
427 | 			tenant:       testTenant,
428 | 			expectations: []expectation{modelWriteExpectationTenant, indexWriteExpectationTenant},
429 | 			action:       func(writer *WriteWorker) error { return writer.writeBatch(testSpans) },
430 | 			expectedLogs: writeBatchLogs,
431 | 		},
432 | 	}
433 | 
434 | 	for name, test := range tests {
435 | 		t.Run(name, func(t *testing.T) {
436 | 			db, mock, err := mocks.GetDbMock()
437 | 			require.NoError(t, err, "an error was not expected when opening a stub database connection")
438 | 			defer db.Close()
439 | 
440 | 			spyLogger := mocks.NewSpyLogger()
441 | 			writeWorker := getWriteWorker(spyLogger, db, EncodingJSON, testIndexTable, test.tenant)
442 | 
443 | 			for i, expectation := range test.expectations {
444 | 				mock.ExpectBegin()
445 | 				prep := mock.ExpectPrepare(expectation.preparation)
446 | 				if i < len(test.expectations)-1 {
447 | 					for _, args := range expectation.execArgs {
448 | 						prep.ExpectExec().WithArgs(args...).WillReturnResult(sqlmock.NewResult(1, 1))
449 | 					}
450 | 					mock.ExpectCommit()
451 | 				} else {
452 | 					prep.ExpectExec().WithArgs(expectation.execArgs[0]...).WillReturnError(errorMock)
453 | 					mock.ExpectRollback()
454 | 				}
455 | 			}
456 | 
457 | 			assert.ErrorIs(t, test.action(&writeWorker), errorMock)
458 | 			assert.NoError(t, mock.ExpectationsWereMet())
459 | 			spyLogger.AssertLogsOfLevelEqual(t, hclog.Debug, test.expectedLogs)
460 | 		})
461 | 	}
462 | }
463 | 
464 | func getWriteWorker(spyLogger mocks.SpyLogger, db *sql.DB, encoding Encoding, indexTable TableName, tenant string) WriteWorker {
465 | 	return WriteWorker{
466 | 		params: &WorkerParams{
467 | 			logger:     spyLogger,
468 | 			db:         db,
469 | 			spansTable: testSpansTable,
470 | 			indexTable: indexTable,
471 | 			tenant:     tenant,
472 | 			encoding:   encoding,
473 | 		},
474 | 		workerDone: make(chan *WriteWorker),
475 | 	}
476 | }
477 | 
478 | func generateRandomSpans(count int) []*model.Span {
479 | 	spans := make([]*model.Span, count)
480 | 	for i := 0; i < count; i++ {
481 | 		span := generateRandomSpan()
482 | 		spans[i] = &span
483 | 	}
484 | 	return spans
485 | }
486 | 
487 | func generateRandomSpan() model.Span {
488 | 	processTags := generateRandomKeyValues(testTagCount)
489 | 	process := model.Process{
490 | 		ServiceName: "service" + strconv.FormatUint(rand.Uint64(), 10),
491 | 		Tags:        processTags,
492 | 	}
493 | 	span := model.Span{
494 | 		TraceID:       model.NewTraceID(rand.Uint64(), rand.Uint64()),
495 | 		SpanID:        model.NewSpanID(rand.Uint64()),
496 | 		OperationName: "operation" + strconv.FormatUint(rand.Uint64(), 10),
497 | 		StartTime:     getRandomTime(),
498 | 		Process:       &process,
499 | 		Tags:          generateRandomKeyValues(testTagCount),
500 | 		Logs:          generateRandomLogs(),
501 | 		Duration:      time.Unix(rand.Int63n(1<<32), 0).Sub(time.Unix(0, 0)),
502 | 	}
503 | 	return span
504 | }
505 | 
506 | func generateRandomLogs() []model.Log {
507 | 	logs := make([]model.Log, 0, testLogCount)
508 | 	for i := 0; i < testLogCount; i++ {
509 | 		timestamp := getRandomTime()
510 | 		logs = append(logs, model.Log{Timestamp: timestamp, Fields: generateRandomKeyValues(testLogFieldCount)})
511 | 	}
512 | 	return logs
513 | }
514 | 
515 | func getRandomTime() time.Time {
516 | 	return time.Unix(rand.Int63n(time.Now().Unix()), 0)
517 | }
518 | 
519 | func generateRandomKeyValues(count int) []model.KeyValue {
520 | 	tags := make([]model.KeyValue, 0, count)
521 | 	for i := 0; i < count; i++ {
522 | 		key := "key" + strconv.FormatUint(rand.Uint64(), 16)
523 | 		value := "key" + strconv.FormatUint(rand.Uint64(), 16)
524 | 		kv := model.KeyValue{Key: key, VType: model.ValueType_STRING, VStr: value}
525 | 		tags = append(tags, kv)
526 | 	}
527 | 
528 | 	return tags
529 | }
530 | 
531 | func getModelWriteExpectation(spanJSON []byte, tenant string) expectation {
532 | 	if tenant == "" {
533 | 		return expectation{
534 | 			preparation: fmt.Sprintf("INSERT INTO %s (timestamp, traceID, model) VALUES (?, ?, ?)", testSpansTable),
535 | 			execArgs: [][]driver.Value{{
536 | 				testSpan.StartTime,
537 | 				testSpan.TraceID.String(),
538 | 				spanJSON,
539 | 			}},
540 | 		}
541 | 	} else {
542 | 		return expectation{
543 | 			preparation: fmt.Sprintf("INSERT INTO %s (tenant, timestamp, traceID, model) VALUES (?, ?, ?, ?)", testSpansTable),
544 | 			execArgs: [][]driver.Value{{
545 | 				tenant,
546 | 				testSpan.StartTime,
547 | 				testSpan.TraceID.String(),
548 | 				spanJSON,
549 | 			}},
550 | 		}
551 | 	}
552 | }
553 | 


--------------------------------------------------------------------------------
/storage/clickhousespanstore/reader_test.go:
--------------------------------------------------------------------------------
   1 | package clickhousespanstore
   2 | 
   3 | import (
   4 | 	"context"
   5 | 	"database/sql/driver"
   6 | 	"encoding/json"
   7 | 	"fmt"
   8 | 	"math"
   9 | 	"strings"
  10 | 	"testing"
  11 | 	"time"
  12 | 
  13 | 	sqlmock "github.com/DATA-DOG/go-sqlmock"
  14 | 	"github.com/gogo/protobuf/proto"
  15 | 	"github.com/jaegertracing/jaeger/model"
  16 | 	"github.com/jaegertracing/jaeger/storage/spanstore"
  17 | 	"github.com/stretchr/testify/assert"
  18 | 	"github.com/stretchr/testify/require"
  19 | 
  20 | 	"github.com/jaegertracing/jaeger-clickhouse/storage/clickhousespanstore/mocks"
  21 | )
  22 | 
  23 | const (
  24 | 	testOperationsTable = "test_operations_table"
  25 | 	testNumTraces       = 10
  26 | 	testSpansInTrace    = 2
  27 | 	testMaxNumSpans     = 0
  28 | )
  29 | 
  30 | var testStartTime = time.Date(2010, 3, 15, 7, 40, 0, 0, time.UTC)
  31 | 
  32 | func TestTraceReader_FindTraceIDs(t *testing.T) {
  33 | 	service := "service"
  34 | 
  35 | 	tests := map[string]struct {
  36 | 		queryTemplate string
  37 | 		firstArgs     []driver.Value
  38 | 		tenant        string
  39 | 	}{
  40 | 		"default": {
  41 | 			queryTemplate: "SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ?%s ORDER BY service, timestamp DESC LIMIT ?",
  42 | 			firstArgs:     []driver.Value{service},
  43 | 		},
  44 | 		"tenant": {
  45 | 			queryTemplate: "SELECT DISTINCT traceID FROM %s WHERE service = ? AND tenant = ? AND timestamp >= ? AND timestamp <= ?%s ORDER BY service, timestamp DESC LIMIT ?",
  46 | 			firstArgs:     []driver.Value{service, testTenant},
  47 | 			tenant:        testTenant,
  48 | 		},
  49 | 	}
  50 | 
  51 | 	for name, test := range tests {
  52 | 		t.Run(name, func(t *testing.T) {
  53 | 			db, mock, err := mocks.GetDbMock()
  54 | 			require.NoError(t, err, "an error was not expected when opening a stub database connection")
  55 | 			defer db.Close()
  56 | 
  57 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
  58 | 			start := testStartTime
  59 | 			end := start.Add(24 * time.Hour)
  60 | 			fullDuration := end.Sub(start)
  61 | 			duration := fullDuration
  62 | 			for i := 0; i < maxProgressiveSteps; i++ {
  63 | 				duration /= 2
  64 | 			}
  65 | 			params := spanstore.TraceQueryParameters{
  66 | 				ServiceName:  service,
  67 | 				NumTraces:    testNumTraces,
  68 | 				StartTimeMin: start,
  69 | 				StartTimeMax: end,
  70 | 			}
  71 | 
  72 | 			expectedTraceIDs := make([]model.TraceID, testNumTraces)
  73 | 			traceIDValues := make([]driver.Value, testNumTraces)
  74 | 			for i := range expectedTraceIDs {
  75 | 				traceID := model.TraceID{Low: uint64(i)}
  76 | 				expectedTraceIDs[i] = traceID
  77 | 				traceIDValues[i] = traceID.String()
  78 | 			}
  79 | 
  80 | 			found := traceIDValues[:0]
  81 | 			endArg := end
  82 | 			for i := 0; i < maxProgressiveSteps; i++ {
  83 | 				if i == maxProgressiveSteps-1 {
  84 | 					duration = fullDuration
  85 | 				}
  86 | 
  87 | 				startArg := endArg.Add(-duration)
  88 | 				if startArg.Before(start) {
  89 | 					startArg = start
  90 | 				}
  91 | 
  92 | 				// Select how many spans query will return
  93 | 				index := int(math.Min(float64(i*2+1), testNumTraces))
  94 | 				if i == maxProgressiveSteps-1 {
  95 | 					index = testNumTraces
  96 | 				}
  97 | 				args := test.firstArgs
  98 | 				args = append(args, startArg)
  99 | 				args = append(args, endArg)
 100 | 				args = append(args, found...)
 101 | 				args = append(args, testNumTraces-len(found))
 102 | 				mock.
 103 | 					ExpectQuery(fmt.Sprintf(
 104 | 						test.queryTemplate,
 105 | 						testIndexTable,
 106 | 						func() string {
 107 | 							if len(found) == 0 {
 108 | 								return ""
 109 | 							}
 110 | 							return " AND traceID NOT IN (?" + strings.Repeat(",?", len(found)-1) + ")"
 111 | 						}(),
 112 | 					)).
 113 | 					WithArgs(args...).
 114 | 					WillReturnRows(getRows(traceIDValues[len(found):index]))
 115 | 				endArg = startArg
 116 | 				duration *= 2
 117 | 				found = traceIDValues[:index]
 118 | 			}
 119 | 
 120 | 			traceIDs, err := traceReader.FindTraceIDs(context.Background(), &params)
 121 | 			require.NoError(t, err)
 122 | 			assert.Equal(t, expectedTraceIDs, traceIDs)
 123 | 			assert.NoError(t, mock.ExpectationsWereMet())
 124 | 		})
 125 | 	}
 126 | }
 127 | 
 128 | func TestTraceReader_FindTraceIDsShortDurationAfterReduction(t *testing.T) {
 129 | 	db, mock, err := mocks.GetDbMock()
 130 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 131 | 	defer db.Close()
 132 | 
 133 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 134 | 	service := "service"
 135 | 	start := testStartTime
 136 | 	end := start.Add(8 * time.Hour)
 137 | 	fullDuration := end.Sub(start)
 138 | 	duration := minTimespanForProgressiveSearch
 139 | 	params := spanstore.TraceQueryParameters{
 140 | 		ServiceName:  service,
 141 | 		NumTraces:    testNumTraces,
 142 | 		StartTimeMin: start,
 143 | 		StartTimeMax: end,
 144 | 	}
 145 | 
 146 | 	expectedTraceIDs := make([]model.TraceID, testNumTraces)
 147 | 	traceIDValues := make([]driver.Value, testNumTraces)
 148 | 	for i := range expectedTraceIDs {
 149 | 		traceID := model.TraceID{Low: uint64(i)}
 150 | 		expectedTraceIDs[i] = traceID
 151 | 		traceIDValues[i] = traceID.String()
 152 | 	}
 153 | 
 154 | 	found := traceIDValues[:0]
 155 | 	endArg := end
 156 | 	for i := 0; i < maxProgressiveSteps; i++ {
 157 | 		if i == maxProgressiveSteps-1 {
 158 | 			duration = fullDuration
 159 | 		}
 160 | 
 161 | 		startArg := endArg.Add(-duration)
 162 | 		if startArg.Before(start) {
 163 | 			startArg = start
 164 | 		}
 165 | 
 166 | 		index := func() int {
 167 | 			switch i {
 168 | 			case 0:
 169 | 				return 1
 170 | 			case 1:
 171 | 				return 3
 172 | 			case 2:
 173 | 				return 5
 174 | 			default:
 175 | 				return testNumTraces
 176 | 			}
 177 | 		}()
 178 | 		args := append(
 179 | 			append(
 180 | 				[]driver.Value{
 181 | 					service,
 182 | 					startArg,
 183 | 					endArg,
 184 | 				},
 185 | 				found...),
 186 | 			testNumTraces-len(found))
 187 | 		mock.
 188 | 			ExpectQuery(fmt.Sprintf(
 189 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ?%s ORDER BY service, timestamp DESC LIMIT ?",
 190 | 				testIndexTable,
 191 | 				func() string {
 192 | 					if len(found) == 0 {
 193 | 						return ""
 194 | 					}
 195 | 					return " AND traceID NOT IN (?" + strings.Repeat(",?", len(found)-1) + ")"
 196 | 				}(),
 197 | 			)).
 198 | 			WithArgs(args...).
 199 | 			WillReturnRows(getRows(traceIDValues[len(found):index]))
 200 | 		endArg = startArg
 201 | 		duration *= 2
 202 | 		found = traceIDValues[:index]
 203 | 	}
 204 | 
 205 | 	traceIDs, err := traceReader.FindTraceIDs(context.Background(), &params)
 206 | 	require.NoError(t, err)
 207 | 	assert.Equal(t, expectedTraceIDs, traceIDs)
 208 | 	assert.NoError(t, mock.ExpectationsWereMet())
 209 | }
 210 | 
 211 | func TestTraceReader_FindTraceIDsEarlyExit(t *testing.T) {
 212 | 	db, mock, err := mocks.GetDbMock()
 213 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 214 | 	defer db.Close()
 215 | 
 216 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 217 | 	service := "service"
 218 | 	start := testStartTime
 219 | 	end := start.Add(24 * time.Hour)
 220 | 	duration := end.Sub(start)
 221 | 	for i := 0; i < maxProgressiveSteps; i++ {
 222 | 		duration /= 2
 223 | 	}
 224 | 	params := spanstore.TraceQueryParameters{
 225 | 		ServiceName:  service,
 226 | 		NumTraces:    testNumTraces,
 227 | 		StartTimeMin: start,
 228 | 		StartTimeMax: end,
 229 | 	}
 230 | 
 231 | 	expectedTraceIDs := make([]model.TraceID, testNumTraces)
 232 | 	traceIDValues := make([]driver.Value, testNumTraces)
 233 | 	for i := range expectedTraceIDs {
 234 | 		traceID := model.TraceID{Low: uint64(i)}
 235 | 		expectedTraceIDs[i] = traceID
 236 | 		traceIDValues[i] = traceID.String()
 237 | 	}
 238 | 
 239 | 	endArg := end
 240 | 	startArg := endArg.Add(-duration)
 241 | 	if startArg.Before(start) {
 242 | 		startArg = start
 243 | 	}
 244 | 
 245 | 	mock.
 246 | 		ExpectQuery(fmt.Sprintf(
 247 | 			"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
 248 | 			testIndexTable,
 249 | 		)).
 250 | 		WithArgs(
 251 | 			service,
 252 | 			startArg,
 253 | 			endArg,
 254 | 			testNumTraces,
 255 | 		).
 256 | 		WillReturnRows(getRows(traceIDValues))
 257 | 
 258 | 	traceIDs, err := traceReader.FindTraceIDs(context.Background(), &params)
 259 | 	require.NoError(t, err)
 260 | 	assert.Equal(t, expectedTraceIDs, traceIDs)
 261 | 	assert.NoError(t, mock.ExpectationsWereMet())
 262 | }
 263 | 
 264 | func TestTraceReader_FindTraceIDsShortRange(t *testing.T) {
 265 | 	db, mock, err := mocks.GetDbMock()
 266 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 267 | 	defer db.Close()
 268 | 
 269 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 270 | 	service := "service"
 271 | 	start := testStartTime
 272 | 	end := start.Add(time.Hour)
 273 | 	params := spanstore.TraceQueryParameters{
 274 | 		ServiceName:  service,
 275 | 		NumTraces:    testNumTraces,
 276 | 		StartTimeMin: start,
 277 | 		StartTimeMax: end,
 278 | 	}
 279 | 
 280 | 	expectedTraceIDs := make([]model.TraceID, testNumTraces)
 281 | 	traceIDValues := make([]driver.Value, testNumTraces)
 282 | 	for i := range expectedTraceIDs {
 283 | 		traceID := model.TraceID{Low: uint64(i)}
 284 | 		expectedTraceIDs[i] = traceID
 285 | 		traceIDValues[i] = traceID.String()
 286 | 	}
 287 | 
 288 | 	mock.
 289 | 		ExpectQuery(fmt.Sprintf(
 290 | 			"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
 291 | 			testIndexTable,
 292 | 		)).
 293 | 		WithArgs(
 294 | 			service,
 295 | 			start,
 296 | 			end,
 297 | 			testNumTraces,
 298 | 		).
 299 | 		WillReturnRows(getRows(traceIDValues))
 300 | 
 301 | 	traceIDs, err := traceReader.FindTraceIDs(context.Background(), &params)
 302 | 	require.NoError(t, err)
 303 | 	assert.Equal(t, expectedTraceIDs, traceIDs)
 304 | 	assert.NoError(t, mock.ExpectationsWereMet())
 305 | }
 306 | 
 307 | func TestTraceReader_FindTraceIDsQueryError(t *testing.T) {
 308 | 	db, mock, err := mocks.GetDbMock()
 309 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 310 | 	defer db.Close()
 311 | 
 312 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 313 | 	service := "service"
 314 | 	start := testStartTime
 315 | 	end := start.Add(24 * time.Hour)
 316 | 	duration := end.Sub(start)
 317 | 	for i := 0; i < maxProgressiveSteps; i++ {
 318 | 		duration /= 2
 319 | 	}
 320 | 	params := spanstore.TraceQueryParameters{
 321 | 		ServiceName:  service,
 322 | 		NumTraces:    testNumTraces,
 323 | 		StartTimeMin: start,
 324 | 		StartTimeMax: end,
 325 | 	}
 326 | 
 327 | 	mock.
 328 | 		ExpectQuery(fmt.Sprintf(
 329 | 			"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
 330 | 			testIndexTable,
 331 | 		)).
 332 | 		WithArgs(
 333 | 			service,
 334 | 			end.Add(-duration),
 335 | 			end,
 336 | 			testNumTraces,
 337 | 		).
 338 | 		WillReturnError(errorMock)
 339 | 
 340 | 	traceIDs, err := traceReader.FindTraceIDs(context.Background(), &params)
 341 | 	require.ErrorIs(t, err, errorMock)
 342 | 	assert.Equal(t, []model.TraceID(nil), traceIDs)
 343 | 	assert.NoError(t, mock.ExpectationsWereMet())
 344 | }
 345 | 
 346 | func TestTraceReader_FindTraceIDsZeroStartTime(t *testing.T) {
 347 | 	db, mock, err := mocks.GetDbMock()
 348 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 349 | 	defer db.Close()
 350 | 
 351 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 352 | 	service := "service"
 353 | 	start := time.Time{}
 354 | 	end := testStartTime
 355 | 	params := spanstore.TraceQueryParameters{
 356 | 		ServiceName:  service,
 357 | 		NumTraces:    testNumTraces,
 358 | 		StartTimeMin: start,
 359 | 		StartTimeMax: end,
 360 | 	}
 361 | 
 362 | 	traceIDs, err := traceReader.FindTraceIDs(context.Background(), &params)
 363 | 	require.ErrorIs(t, err, errStartTimeRequired)
 364 | 	assert.Equal(t, []model.TraceID(nil), traceIDs)
 365 | 	assert.NoError(t, mock.ExpectationsWereMet())
 366 | }
 367 | 
 368 | func TestTraceReader_GetServices(t *testing.T) {
 369 | 	tests := map[string]struct {
 370 | 		query  string
 371 | 		args   []driver.Value
 372 | 		tenant string
 373 | 	}{
 374 | 		"default": {
 375 | 			query: fmt.Sprintf("SELECT service FROM %s GROUP BY service", testOperationsTable),
 376 | 			args:  []driver.Value{},
 377 | 		},
 378 | 		"tenant": {
 379 | 			query:  fmt.Sprintf("SELECT service FROM %s WHERE tenant = ? GROUP BY service", testOperationsTable),
 380 | 			args:   []driver.Value{testTenant},
 381 | 			tenant: testTenant,
 382 | 		},
 383 | 	}
 384 | 
 385 | 	for name, test := range tests {
 386 | 		t.Run(name, func(t *testing.T) {
 387 | 			db, mock, err := mocks.GetDbMock()
 388 | 			require.NoError(t, err, "an error was not expected when opening a stub database connection")
 389 | 			defer db.Close()
 390 | 
 391 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
 392 | 			expectedServices := []string{"GET /first", "POST /second", "PUT /third"}
 393 | 			expectedServiceValues := make([]driver.Value, len(expectedServices))
 394 | 			for i := range expectedServices {
 395 | 				expectedServiceValues[i] = expectedServices[i]
 396 | 			}
 397 | 			queryResult := getRows(expectedServiceValues)
 398 | 
 399 | 			mock.ExpectQuery(test.query).WithArgs(test.args...).WillReturnRows(queryResult)
 400 | 
 401 | 			services, err := traceReader.GetServices(context.Background())
 402 | 			require.NoError(t, err)
 403 | 			assert.Equal(t, expectedServices, services)
 404 | 			assert.NoError(t, mock.ExpectationsWereMet())
 405 | 		})
 406 | 	}
 407 | }
 408 | 
 409 | func TestTraceReader_GetServicesQueryError(t *testing.T) {
 410 | 	db, mock, err := mocks.GetDbMock()
 411 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 412 | 	defer db.Close()
 413 | 
 414 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 415 | 
 416 | 	mock.
 417 | 		ExpectQuery(fmt.Sprintf("SELECT service FROM %s GROUP BY service", testOperationsTable)).
 418 | 		WillReturnError(errorMock)
 419 | 	services, err := traceReader.GetServices(context.Background())
 420 | 	require.ErrorIs(t, err, errorMock)
 421 | 	assert.Equal(t, []string(nil), services)
 422 | 	assert.NoError(t, mock.ExpectationsWereMet())
 423 | }
 424 | 
 425 | func TestTraceReader_GetServicesNoTable(t *testing.T) {
 426 | 	db, _, err := mocks.GetDbMock()
 427 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 428 | 	defer db.Close()
 429 | 
 430 | 	traceReader := NewTraceReader(db, "", testIndexTable, testSpansTable, "", testMaxNumSpans)
 431 | 
 432 | 	services, err := traceReader.GetServices(context.Background())
 433 | 	require.ErrorIs(t, err, errNoOperationsTable)
 434 | 	assert.Equal(t, []string(nil), services)
 435 | }
 436 | 
 437 | func TestTraceReader_GetOperations(t *testing.T) {
 438 | 	db, mock, err := mocks.GetDbMock()
 439 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 440 | 	defer db.Close()
 441 | 
 442 | 	service := "test service"
 443 | 	params := spanstore.OperationQueryParameters{ServiceName: service}
 444 | 	tests := map[string]struct {
 445 | 		tenant   string
 446 | 		query    string
 447 | 		args     []driver.Value
 448 | 		rows     *sqlmock.Rows
 449 | 		expected []spanstore.Operation
 450 | 	}{
 451 | 		"default": {
 452 | 			query: fmt.Sprintf("SELECT operation, spankind FROM %s WHERE service = ? GROUP BY operation, spankind ORDER BY operation", testOperationsTable),
 453 | 			args:  []driver.Value{service},
 454 | 			rows: sqlmock.NewRows([]string{"operation", "spankind"}).
 455 | 				AddRow("operation_1", "client").
 456 | 				AddRow("operation_2", ""),
 457 | 			expected: []spanstore.Operation{{Name: "operation_1", SpanKind: "client"}, {Name: "operation_2"}},
 458 | 		},
 459 | 		"tenant": {
 460 | 			tenant: testTenant,
 461 | 			query:  fmt.Sprintf("SELECT operation, spankind FROM %s WHERE tenant = ? AND service = ? GROUP BY operation, spankind ORDER BY operation", testOperationsTable),
 462 | 			args:   []driver.Value{testTenant, service},
 463 | 			rows: sqlmock.NewRows([]string{"operation", "spankind"}).
 464 | 				AddRow("operation_1", "client").
 465 | 				AddRow("operation_2", ""),
 466 | 			expected: []spanstore.Operation{{Name: "operation_1", SpanKind: "client"}, {Name: "operation_2"}},
 467 | 		},
 468 | 	}
 469 | 	for name, test := range tests {
 470 | 		t.Run(name, func(t *testing.T) {
 471 | 			mock.
 472 | 				ExpectQuery(test.query).
 473 | 				WithArgs(test.args...).
 474 | 				WillReturnRows(test.rows)
 475 | 
 476 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
 477 | 			operations, err := traceReader.GetOperations(context.Background(), params)
 478 | 			require.NoError(t, err)
 479 | 			assert.Equal(t, test.expected, operations)
 480 | 			assert.NoError(t, mock.ExpectationsWereMet())
 481 | 		})
 482 | 	}
 483 | }
 484 | 
 485 | func TestTraceReader_GetOperationsQueryError(t *testing.T) {
 486 | 	db, mock, err := mocks.GetDbMock()
 487 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 488 | 	defer db.Close()
 489 | 
 490 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 491 | 	service := "test service"
 492 | 	params := spanstore.OperationQueryParameters{ServiceName: service}
 493 | 	mock.
 494 | 		ExpectQuery(fmt.Sprintf("SELECT operation, spankind FROM %s WHERE service = ? GROUP BY operation, spankind ORDER BY operation", testOperationsTable)).
 495 | 		WithArgs(service).
 496 | 		WillReturnError(errorMock)
 497 | 
 498 | 	operations, err := traceReader.GetOperations(context.Background(), params)
 499 | 	assert.ErrorIs(t, err, errorMock)
 500 | 	assert.Equal(t, []spanstore.Operation(nil), operations)
 501 | 	assert.NoError(t, mock.ExpectationsWereMet())
 502 | }
 503 | 
 504 | func TestTraceReader_GetOperationsNoTable(t *testing.T) {
 505 | 	db, _, err := mocks.GetDbMock()
 506 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 507 | 	defer db.Close()
 508 | 
 509 | 	traceReader := NewTraceReader(db, "", testIndexTable, testSpansTable, "", testMaxNumSpans)
 510 | 	service := "test service"
 511 | 	params := spanstore.OperationQueryParameters{ServiceName: service}
 512 | 	operations, err := traceReader.GetOperations(context.Background(), params)
 513 | 	assert.ErrorIs(t, err, errNoOperationsTable)
 514 | 	assert.Equal(t, []spanstore.Operation(nil), operations)
 515 | }
 516 | 
 517 | func TestTraceReader_GetTrace(t *testing.T) {
 518 | 	db, mock, err := mocks.GetDbMock()
 519 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 520 | 	defer db.Close()
 521 | 
 522 | 	traceID := model.TraceID{High: 0, Low: 1}
 523 | 	spanRefs := generateRandomSpans(testSpansInTrace)
 524 | 	trace := model.Trace{}
 525 | 	for _, span := range spanRefs {
 526 | 		span.TraceID = traceID
 527 | 		trace.Spans = append(trace.Spans, span)
 528 | 	}
 529 | 	spans := make([]model.Span, len(spanRefs))
 530 | 	for i := range spanRefs {
 531 | 		spans[i] = *spanRefs[i]
 532 | 	}
 533 | 
 534 | 	tests := map[string]struct {
 535 | 		tenant        string
 536 | 		queryResult   *sqlmock.Rows
 537 | 		expectedTrace *model.Trace
 538 | 		expectedError error
 539 | 	}{
 540 | 		"json": {
 541 | 			queryResult:   getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return json.Marshal(span) }),
 542 | 			expectedTrace: &trace,
 543 | 			expectedError: nil,
 544 | 		},
 545 | 		"json tenant": {
 546 | 			tenant:        testTenant,
 547 | 			queryResult:   getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return json.Marshal(span) }),
 548 | 			expectedTrace: &trace,
 549 | 			expectedError: nil,
 550 | 		},
 551 | 		"protobuf": {
 552 | 			queryResult:   getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return proto.Marshal(span) }),
 553 | 			expectedTrace: &trace,
 554 | 			expectedError: nil,
 555 | 		},
 556 | 		"protobuf tenant": {
 557 | 			tenant:        testTenant,
 558 | 			queryResult:   getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return proto.Marshal(span) }),
 559 | 			expectedTrace: &trace,
 560 | 			expectedError: nil,
 561 | 		},
 562 | 		"trace not found": {
 563 | 			queryResult:   sqlmock.NewRows([]string{"model"}),
 564 | 			expectedTrace: nil,
 565 | 			expectedError: spanstore.ErrTraceNotFound,
 566 | 		},
 567 | 		"query error": {
 568 | 			queryResult:   getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return json.Marshal(span) }).RowError(0, errorMock),
 569 | 			expectedTrace: nil,
 570 | 			expectedError: errorMock,
 571 | 		},
 572 | 	}
 573 | 
 574 | 	for name, test := range tests {
 575 | 		t.Run(name, func(t *testing.T) {
 576 | 			if test.tenant == "" {
 577 | 				mock.
 578 | 					ExpectQuery(
 579 | 						fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?)", testSpansTable),
 580 | 					).
 581 | 					WithArgs(traceID).
 582 | 					WillReturnRows(test.queryResult)
 583 | 			} else {
 584 | 				mock.
 585 | 					ExpectQuery(
 586 | 						fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?) AND tenant = ?", testSpansTable),
 587 | 					).
 588 | 					WithArgs(traceID, test.tenant).
 589 | 					WillReturnRows(test.queryResult)
 590 | 			}
 591 | 
 592 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
 593 | 			trace, err := traceReader.GetTrace(context.Background(), traceID)
 594 | 			require.ErrorIs(t, err, test.expectedError)
 595 | 			if trace != nil {
 596 | 				model.SortTrace(trace)
 597 | 			}
 598 | 			if test.expectedTrace != nil {
 599 | 				model.SortTrace(test.expectedTrace)
 600 | 			}
 601 | 			assert.Equal(t, test.expectedTrace, trace)
 602 | 			assert.NoError(t, mock.ExpectationsWereMet())
 603 | 		})
 604 | 	}
 605 | }
 606 | 
 607 | func TestSpanWriter_getTraces(t *testing.T) {
 608 | 	db, mock, err := mocks.GetDbMock()
 609 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 610 | 	defer db.Close()
 611 | 
 612 | 	traceIDs := []model.TraceID{
 613 | 		{High: 0, Low: 1},
 614 | 		{High: 2, Low: 2},
 615 | 		{High: 1, Low: 3},
 616 | 		{High: 0, Low: 4},
 617 | 	}
 618 | 	spans := make([]model.Span, testSpansInTrace*len(traceIDs))
 619 | 	for i := 0; i < testSpansInTrace*len(traceIDs); i++ {
 620 | 		traceID := traceIDs[i%len(traceIDs)]
 621 | 		spans[i] = generateRandomSpan()
 622 | 		spans[i].TraceID = traceID
 623 | 	}
 624 | 
 625 | 	traceIDStrings := make([]driver.Value, 4)
 626 | 	for i, traceID := range traceIDs {
 627 | 		traceIDStrings[i] = traceID.String()
 628 | 	}
 629 | 
 630 | 	defaultQuery := fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?,?,?,?)", testSpansTable)
 631 | 	tenantQuery := fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?,?,?,?) AND tenant = ?", testSpansTable)
 632 | 
 633 | 	tests := map[string]struct {
 634 | 		tenant         string
 635 | 		query          string
 636 | 		args           []driver.Value
 637 | 		queryResult    *sqlmock.Rows
 638 | 		expectedTraces []*model.Trace
 639 | 	}{
 640 | 		"JSON encoded traces one span per trace": {
 641 | 			query:          defaultQuery,
 642 | 			args:           traceIDStrings,
 643 | 			queryResult:    getEncodedSpans(spans[:len(traceIDs)], func(span *model.Span) ([]byte, error) { return json.Marshal(span) }),
 644 | 			expectedTraces: getTracesFromSpans(spans[:len(traceIDs)]),
 645 | 		},
 646 | 		"tenant JSON encoded traces one span per trace": {
 647 | 			tenant:         testTenant,
 648 | 			query:          tenantQuery,
 649 | 			args:           append(traceIDStrings, testTenant),
 650 | 			queryResult:    getEncodedSpans(spans[:len(traceIDs)], func(span *model.Span) ([]byte, error) { return json.Marshal(span) }),
 651 | 			expectedTraces: getTracesFromSpans(spans[:len(traceIDs)]),
 652 | 		},
 653 | 		"Protobuf encoded traces one span per trace": {
 654 | 			query:          defaultQuery,
 655 | 			args:           traceIDStrings,
 656 | 			queryResult:    getEncodedSpans(spans[:len(traceIDs)], func(span *model.Span) ([]byte, error) { return proto.Marshal(span) }),
 657 | 			expectedTraces: getTracesFromSpans(spans[:len(traceIDs)]),
 658 | 		},
 659 | 		"tenant Protobuf encoded traces one span per trace": {
 660 | 			tenant:         testTenant,
 661 | 			query:          tenantQuery,
 662 | 			args:           append(traceIDStrings, testTenant),
 663 | 			queryResult:    getEncodedSpans(spans[:len(traceIDs)], func(span *model.Span) ([]byte, error) { return proto.Marshal(span) }),
 664 | 			expectedTraces: getTracesFromSpans(spans[:len(traceIDs)]),
 665 | 		},
 666 | 		"JSON encoded traces many spans per trace": {
 667 | 			query:          defaultQuery,
 668 | 			args:           traceIDStrings,
 669 | 			queryResult:    getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return json.Marshal(span) }),
 670 | 			expectedTraces: getTracesFromSpans(spans),
 671 | 		},
 672 | 		"tenant JSON encoded traces many spans per trace": {
 673 | 			tenant:         testTenant,
 674 | 			query:          tenantQuery,
 675 | 			args:           append(traceIDStrings, testTenant),
 676 | 			queryResult:    getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return json.Marshal(span) }),
 677 | 			expectedTraces: getTracesFromSpans(spans),
 678 | 		},
 679 | 		"Protobuf encoded traces many spans per trace": {
 680 | 			query:          defaultQuery,
 681 | 			args:           traceIDStrings,
 682 | 			queryResult:    getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return proto.Marshal(span) }),
 683 | 			expectedTraces: getTracesFromSpans(spans),
 684 | 		},
 685 | 		"tenant Protobuf encoded traces many spans per trace": {
 686 | 			tenant:         testTenant,
 687 | 			query:          tenantQuery,
 688 | 			args:           append(traceIDStrings, testTenant),
 689 | 			queryResult:    getEncodedSpans(spans, func(span *model.Span) ([]byte, error) { return proto.Marshal(span) }),
 690 | 			expectedTraces: getTracesFromSpans(spans),
 691 | 		},
 692 | 	}
 693 | 
 694 | 	for name, test := range tests {
 695 | 		t.Run(name, func(t *testing.T) {
 696 | 			mock.
 697 | 				ExpectQuery(test.query).
 698 | 				WithArgs(test.args...).
 699 | 				WillReturnRows(test.queryResult)
 700 | 
 701 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
 702 | 			traces, err := traceReader.getTraces(context.Background(), traceIDs)
 703 | 			require.NoError(t, err)
 704 | 			model.SortTraces(traces)
 705 | 			assert.Equal(t, test.expectedTraces, traces)
 706 | 			assert.NoError(t, mock.ExpectationsWereMet())
 707 | 		})
 708 | 	}
 709 | }
 710 | 
 711 | func TestSpanWriter_getTracesIncorrectData(t *testing.T) {
 712 | 	db, mock, err := mocks.GetDbMock()
 713 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 714 | 	defer db.Close()
 715 | 
 716 | 	traceIDs := []model.TraceID{
 717 | 		{High: 0, Low: 1},
 718 | 		{High: 2, Low: 2},
 719 | 		{High: 1, Low: 3},
 720 | 		{High: 0, Low: 4},
 721 | 	}
 722 | 	spans := make([]model.Span, 2*len(traceIDs))
 723 | 	for i := 0; i < 2*len(traceIDs); i++ {
 724 | 		traceID := traceIDs[i%len(traceIDs)]
 725 | 		spans[i] = generateRandomSpan()
 726 | 		spans[i].TraceID = traceID
 727 | 	}
 728 | 
 729 | 	traceIDStrings := make([]driver.Value, 4)
 730 | 	for i, traceID := range traceIDs {
 731 | 		traceIDStrings[i] = traceID.String()
 732 | 	}
 733 | 
 734 | 	defaultQuery := fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?,?,?,?)", testSpansTable)
 735 | 	tenantQuery := fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?,?,?,?) AND tenant = ?", testSpansTable)
 736 | 
 737 | 	tests := map[string]struct {
 738 | 		tenant         string
 739 | 		query          string
 740 | 		args           []driver.Value
 741 | 		queryResult    *sqlmock.Rows
 742 | 		expectedResult []*model.Trace
 743 | 		expectedError  error
 744 | 	}{
 745 | 		"JSON encoding incorrect data": {
 746 | 			query:          defaultQuery,
 747 | 			args:           traceIDStrings,
 748 | 			queryResult:    getRows([]driver.Value{[]byte{'{', 'n', 'o', 't', '_', 'a', '_', 'k', 'e', 'y', '}'}}),
 749 | 			expectedResult: []*model.Trace(nil),
 750 | 			expectedError:  fmt.Errorf("invalid character 'n' looking for beginning of object key string"),
 751 | 		},
 752 | 		"tenant JSON encoding incorrect data": {
 753 | 			tenant:         testTenant,
 754 | 			query:          tenantQuery,
 755 | 			args:           append(traceIDStrings, testTenant),
 756 | 			queryResult:    getRows([]driver.Value{[]byte{'{', 'n', 'o', 't', '_', 'a', '_', 'k', 'e', 'y', '}'}}),
 757 | 			expectedResult: []*model.Trace(nil),
 758 | 			expectedError:  fmt.Errorf("invalid character 'n' looking for beginning of object key string"),
 759 | 		},
 760 | 		"Protobuf encoding incorrect data": {
 761 | 			query:          defaultQuery,
 762 | 			args:           traceIDStrings,
 763 | 			queryResult:    getRows([]driver.Value{[]byte{'i', 'n', 'c', 'o', 'r', 'r', 'e', 'c', 't'}}),
 764 | 			expectedResult: []*model.Trace{},
 765 | 			expectedError:  nil,
 766 | 		},
 767 | 		"tenant Protobuf encoding incorrect data": {
 768 | 			tenant:         testTenant,
 769 | 			query:          tenantQuery,
 770 | 			args:           append(traceIDStrings, testTenant),
 771 | 			queryResult:    getRows([]driver.Value{[]byte{'i', 'n', 'c', 'o', 'r', 'r', 'e', 'c', 't'}}),
 772 | 			expectedResult: []*model.Trace{},
 773 | 			expectedError:  nil,
 774 | 		},
 775 | 	}
 776 | 
 777 | 	for name, test := range tests {
 778 | 		t.Run(name, func(t *testing.T) {
 779 | 			mock.
 780 | 				ExpectQuery(test.query).
 781 | 				WithArgs(test.args...).
 782 | 				WillReturnRows(test.queryResult)
 783 | 
 784 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
 785 | 			traces, err := traceReader.getTraces(context.Background(), traceIDs)
 786 | 			if test.expectedError == nil {
 787 | 				assert.NoError(t, err)
 788 | 			} else {
 789 | 				assert.EqualError(t, err, test.expectedError.Error())
 790 | 			}
 791 | 			assert.Equal(t, test.expectedResult, traces)
 792 | 			assert.NoError(t, mock.ExpectationsWereMet())
 793 | 		})
 794 | 	}
 795 | }
 796 | 
 797 | func TestSpanWriter_getTracesQueryError(t *testing.T) {
 798 | 	db, mock, err := mocks.GetDbMock()
 799 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 800 | 	defer db.Close()
 801 | 
 802 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 803 | 	traceIDs := []model.TraceID{
 804 | 		{High: 0, Low: 1},
 805 | 		{High: 2, Low: 2},
 806 | 		{High: 1, Low: 3},
 807 | 		{High: 0, Low: 4},
 808 | 	}
 809 | 
 810 | 	traceIDStrings := make([]driver.Value, 4)
 811 | 	for i, traceID := range traceIDs {
 812 | 		traceIDStrings[i] = traceID.String()
 813 | 	}
 814 | 
 815 | 	mock.
 816 | 		ExpectQuery(
 817 | 			fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?,?,?,?)", testSpansTable),
 818 | 		).
 819 | 		WithArgs(traceIDStrings...).
 820 | 		WillReturnError(errorMock)
 821 | 
 822 | 	traces, err := traceReader.getTraces(context.Background(), traceIDs)
 823 | 	assert.EqualError(t, err, errorMock.Error())
 824 | 	assert.Equal(t, []*model.Trace(nil), traces)
 825 | 	assert.NoError(t, mock.ExpectationsWereMet())
 826 | }
 827 | 
 828 | func TestSpanWriter_getTracesRowsScanError(t *testing.T) {
 829 | 	db, mock, err := mocks.GetDbMock()
 830 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 831 | 	defer db.Close()
 832 | 
 833 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 834 | 	traceIDs := []model.TraceID{
 835 | 		{High: 0, Low: 1},
 836 | 		{High: 2, Low: 2},
 837 | 		{High: 1, Low: 3},
 838 | 		{High: 0, Low: 4},
 839 | 	}
 840 | 
 841 | 	traceIDStrings := make([]driver.Value, 4)
 842 | 	for i, traceID := range traceIDs {
 843 | 		traceIDStrings[i] = traceID.String()
 844 | 	}
 845 | 	rows := getRows([]driver.Value{"some value"}).RowError(0, errorMock)
 846 | 
 847 | 	mock.
 848 | 		ExpectQuery(
 849 | 			fmt.Sprintf("SELECT model FROM %s PREWHERE traceID IN (?,?,?,?)", testSpansTable),
 850 | 		).
 851 | 		WithArgs(traceIDStrings...).
 852 | 		WillReturnRows(rows)
 853 | 
 854 | 	traces, err := traceReader.getTraces(context.Background(), traceIDs)
 855 | 	assert.EqualError(t, err, errorMock.Error())
 856 | 	assert.Equal(t, []*model.Trace(nil), traces)
 857 | 	assert.NoError(t, mock.ExpectationsWereMet())
 858 | }
 859 | 
 860 | func TestSpanWriter_getTraceNoTraceIDs(t *testing.T) {
 861 | 	db, _, err := mocks.GetDbMock()
 862 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 863 | 	defer db.Close()
 864 | 
 865 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
 866 | 	traceIDs := make([]model.TraceID, 0)
 867 | 
 868 | 	traces, err := traceReader.getTraces(context.Background(), traceIDs)
 869 | 	require.NoError(t, err)
 870 | 	assert.Equal(t, make([]*model.Trace, 0), traces)
 871 | }
 872 | 
 873 | func getEncodedSpans(spans []model.Span, marshal func(span *model.Span) ([]byte, error)) *sqlmock.Rows {
 874 | 	serialized := make([]driver.Value, len(spans))
 875 | 	for i := range spans {
 876 | 		bytes, err := marshal(&spans[i])
 877 | 		if err != nil {
 878 | 			panic(err)
 879 | 		}
 880 | 		serialized[i] = bytes
 881 | 	}
 882 | 	return getRows(serialized)
 883 | }
 884 | 
 885 | func getRows(values []driver.Value) *sqlmock.Rows {
 886 | 	rows := sqlmock.NewRows([]string{"model"})
 887 | 	for _, value := range values {
 888 | 		rows.AddRow(value)
 889 | 	}
 890 | 	return rows
 891 | }
 892 | 
 893 | func getTracesFromSpans(spans []model.Span) []*model.Trace {
 894 | 	traces := make(map[model.TraceID]*model.Trace)
 895 | 	for i, span := range spans {
 896 | 		if _, ok := traces[span.TraceID]; !ok {
 897 | 			traces[span.TraceID] = &model.Trace{}
 898 | 		}
 899 | 		traces[span.TraceID].Spans = append(traces[span.TraceID].Spans, &spans[i])
 900 | 	}
 901 | 
 902 | 	res := make([]*model.Trace, 0, len(traces))
 903 | 	for _, trace := range traces {
 904 | 		res = append(res, trace)
 905 | 	}
 906 | 	model.SortTraces(res)
 907 | 	return res
 908 | }
 909 | 
 910 | func TestSpanWriter_findTraceIDsInRange(t *testing.T) {
 911 | 	db, mock, err := mocks.GetDbMock()
 912 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
 913 | 	defer db.Close()
 914 | 
 915 | 	service := "test_service"
 916 | 	operation := "test_operation"
 917 | 	start := time.Unix(0, 0)
 918 | 	end := time.Now()
 919 | 	minDuration := time.Minute
 920 | 	maxDuration := time.Hour
 921 | 	tags := map[string]string{
 922 | 		"key": "value",
 923 | 	}
 924 | 	skip := []model.TraceID{
 925 | 		{High: 1, Low: 1},
 926 | 		{High: 0, Low: 0},
 927 | 	}
 928 | 	tagArgs := func(tags map[string]string) []model.KeyValue {
 929 | 		res := make([]model.KeyValue, 0, len(tags))
 930 | 		for key, value := range tags {
 931 | 			res = append(res, model.String(key, value))
 932 | 		}
 933 | 		return res
 934 | 	}(tags)
 935 | 	rowValues := []driver.Value{
 936 | 		"1",
 937 | 		"2",
 938 | 		"3",
 939 | 	}
 940 | 	rows := []model.TraceID{
 941 | 		{High: 0, Low: 1},
 942 | 		{High: 0, Low: 2},
 943 | 		{High: 0, Low: 3},
 944 | 	}
 945 | 
 946 | 	tests := map[string]struct {
 947 | 		queryParams   spanstore.TraceQueryParameters
 948 | 		skip          []model.TraceID
 949 | 		tenant        string
 950 | 		expectedQuery string
 951 | 		expectedArgs  []driver.Value
 952 | 	}{
 953 | 		"default": {
 954 | 			queryParams: spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces},
 955 | 			skip:        make([]model.TraceID, 0),
 956 | 			expectedQuery: fmt.Sprintf(
 957 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
 958 | 				testIndexTable,
 959 | 			),
 960 | 			expectedArgs: []driver.Value{
 961 | 				service,
 962 | 				start,
 963 | 				end,
 964 | 				testNumTraces,
 965 | 			},
 966 | 		},
 967 | 		"tenant": {
 968 | 			queryParams: spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces},
 969 | 			skip:        make([]model.TraceID, 0),
 970 | 			tenant:      testTenant,
 971 | 			expectedQuery: fmt.Sprintf(
 972 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND tenant = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
 973 | 				testIndexTable,
 974 | 			),
 975 | 			expectedArgs: []driver.Value{
 976 | 				service,
 977 | 				testTenant,
 978 | 				start,
 979 | 				end,
 980 | 				testNumTraces,
 981 | 			},
 982 | 		},
 983 | 		"maxDuration": {
 984 | 			queryParams: spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces, DurationMax: maxDuration},
 985 | 			skip:        make([]model.TraceID, 0),
 986 | 			expectedQuery: fmt.Sprintf(
 987 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? AND durationUs <= ? ORDER BY service, timestamp DESC LIMIT ?",
 988 | 				testIndexTable,
 989 | 			),
 990 | 			expectedArgs: []driver.Value{
 991 | 				service,
 992 | 				start,
 993 | 				end,
 994 | 				maxDuration.Microseconds(),
 995 | 				testNumTraces,
 996 | 			},
 997 | 		},
 998 | 		"minDuration": {
 999 | 			queryParams: spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces, DurationMin: minDuration},
1000 | 			skip:        make([]model.TraceID, 0),
1001 | 			expectedQuery: fmt.Sprintf(
1002 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? AND durationUs >= ? ORDER BY service, timestamp DESC LIMIT ?",
1003 | 				testIndexTable,
1004 | 			),
1005 | 			expectedArgs: []driver.Value{
1006 | 				service,
1007 | 				start,
1008 | 				end,
1009 | 				minDuration.Microseconds(),
1010 | 				testNumTraces,
1011 | 			},
1012 | 		},
1013 | 		"tags": {
1014 | 			queryParams: spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces, Tags: tags},
1015 | 			skip:        make([]model.TraceID, 0),
1016 | 			expectedQuery: fmt.Sprintf(
1017 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ?%s ORDER BY service, timestamp DESC LIMIT ?",
1018 | 				testIndexTable,
1019 | 				strings.Repeat(" AND has(tags.key, ?) AND has(splitByChar(',', tags.value[indexOf(tags.key, ?)]), ?)", len(tags)),
1020 | 			),
1021 | 			expectedArgs: []driver.Value{
1022 | 				service,
1023 | 				start,
1024 | 				end,
1025 | 				tagArgs[0].Key,
1026 | 				tagArgs[0].Key,
1027 | 				tagArgs[0].AsString(),
1028 | 				testNumTraces,
1029 | 			},
1030 | 		},
1031 | 		"skip": {
1032 | 			queryParams: spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces},
1033 | 			skip:        skip,
1034 | 			expectedQuery: fmt.Sprintf(
1035 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? AND traceID NOT IN (?,?) ORDER BY service, timestamp DESC LIMIT ?",
1036 | 				testIndexTable,
1037 | 			),
1038 | 			expectedArgs: []driver.Value{
1039 | 				service,
1040 | 				start,
1041 | 				end,
1042 | 				skip[0].String(),
1043 | 				skip[1].String(),
1044 | 				testNumTraces - len(skip),
1045 | 			},
1046 | 		},
1047 | 		"operation": {
1048 | 			queryParams: spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces, OperationName: operation},
1049 | 			skip:        make([]model.TraceID, 0),
1050 | 			expectedQuery: fmt.Sprintf(
1051 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND operation = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
1052 | 				testIndexTable,
1053 | 			),
1054 | 			expectedArgs: []driver.Value{
1055 | 				service,
1056 | 				operation,
1057 | 				start,
1058 | 				end,
1059 | 				testNumTraces,
1060 | 			},
1061 | 		},
1062 | 	}
1063 | 
1064 | 	for name, test := range tests {
1065 | 		t.Run(name, func(t *testing.T) {
1066 | 			queryResult := sqlmock.NewRows([]string{"traceID"})
1067 | 			for _, row := range rowValues {
1068 | 				queryResult.AddRow(row)
1069 | 			}
1070 | 
1071 | 			mock.
1072 | 				ExpectQuery(test.expectedQuery).
1073 | 				WithArgs(test.expectedArgs...).
1074 | 				WillReturnRows(queryResult)
1075 | 
1076 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
1077 | 			res, err := traceReader.findTraceIDsInRange(
1078 | 				context.Background(),
1079 | 				&test.queryParams,
1080 | 				start,
1081 | 				end,
1082 | 				test.skip)
1083 | 			require.NoError(t, err)
1084 | 			assert.Equal(t, rows, res)
1085 | 			assert.NoError(t, mock.ExpectationsWereMet())
1086 | 		})
1087 | 	}
1088 | }
1089 | 
1090 | func TestSpanReader_findTraceIDsInRangeNoIndexTable(t *testing.T) {
1091 | 	db, _, err := mocks.GetDbMock()
1092 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
1093 | 	defer db.Close()
1094 | 
1095 | 	traceReader := NewTraceReader(db, testOperationsTable, "", testSpansTable, "", testMaxNumSpans)
1096 | 	res, err := traceReader.findTraceIDsInRange(
1097 | 		context.Background(),
1098 | 		nil,
1099 | 		time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC),
1100 | 		time.Date(2000, 1, 2, 0, 0, 0, 0, time.UTC),
1101 | 		make([]model.TraceID, 0),
1102 | 	)
1103 | 	assert.Equal(t, []model.TraceID(nil), res)
1104 | 	assert.EqualError(t, err, errNoIndexTable.Error())
1105 | }
1106 | 
1107 | func TestSpanReader_findTraceIDsInRangeEndBeforeStart(t *testing.T) {
1108 | 	db, _, err := mocks.GetDbMock()
1109 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
1110 | 	defer db.Close()
1111 | 
1112 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
1113 | 	res, err := traceReader.findTraceIDsInRange(
1114 | 		context.Background(),
1115 | 		nil,
1116 | 		time.Date(2000, 1, 2, 0, 0, 0, 0, time.UTC),
1117 | 		time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC),
1118 | 		make([]model.TraceID, 0),
1119 | 	)
1120 | 	assert.Equal(t, make([]model.TraceID, 0), res)
1121 | 	assert.NoError(t, err)
1122 | }
1123 | 
1124 | func TestSpanReader_findTraceIDsInRangeQueryError(t *testing.T) {
1125 | 	db, mock, err := mocks.GetDbMock()
1126 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
1127 | 	defer db.Close()
1128 | 
1129 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
1130 | 	service := "test_service"
1131 | 	start := time.Unix(0, 0)
1132 | 	end := time.Now()
1133 | 
1134 | 	mock.
1135 | 		ExpectQuery(fmt.Sprintf(
1136 | 			"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
1137 | 			testIndexTable,
1138 | 		)).
1139 | 		WithArgs(
1140 | 			service,
1141 | 			start,
1142 | 			end,
1143 | 			testNumTraces,
1144 | 		).
1145 | 		WillReturnError(errorMock)
1146 | 
1147 | 	res, err := traceReader.findTraceIDsInRange(
1148 | 		context.Background(),
1149 | 		&spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces},
1150 | 		start,
1151 | 		end,
1152 | 		make([]model.TraceID, 0))
1153 | 	assert.EqualError(t, err, errorMock.Error())
1154 | 	assert.Equal(t, []model.TraceID(nil), res)
1155 | 	assert.NoError(t, mock.ExpectationsWereMet())
1156 | }
1157 | 
1158 | func TestSpanReader_findTraceIDsInRangeIncorrectData(t *testing.T) {
1159 | 	db, mock, err := mocks.GetDbMock()
1160 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
1161 | 	defer db.Close()
1162 | 
1163 | 	service := "test_service"
1164 | 	start := time.Unix(0, 0)
1165 | 	end := time.Now()
1166 | 
1167 | 	tests := map[string]struct {
1168 | 		query  string
1169 | 		args   []driver.Value
1170 | 		tenant string
1171 | 	}{
1172 | 		"default": {
1173 | 			query: fmt.Sprintf(
1174 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
1175 | 				testIndexTable,
1176 | 			),
1177 | 			args: []driver.Value{service, start, end, testNumTraces},
1178 | 		},
1179 | 		"tenant": {
1180 | 			query: fmt.Sprintf(
1181 | 				"SELECT DISTINCT traceID FROM %s WHERE service = ? AND tenant = ? AND timestamp >= ? AND timestamp <= ? ORDER BY service, timestamp DESC LIMIT ?",
1182 | 				testIndexTable,
1183 | 			),
1184 | 			args:   []driver.Value{service, testTenant, start, end, testNumTraces},
1185 | 			tenant: testTenant,
1186 | 		},
1187 | 	}
1188 | 
1189 | 	for name, test := range tests {
1190 | 		t.Run(name, func(t *testing.T) {
1191 | 			traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, test.tenant, testMaxNumSpans)
1192 | 
1193 | 			rowValues := []driver.Value{
1194 | 				"1",
1195 | 				"incorrect value",
1196 | 				"3",
1197 | 			}
1198 | 			queryResult := sqlmock.NewRows([]string{"traceID"})
1199 | 			for _, row := range rowValues {
1200 | 				queryResult.AddRow(row)
1201 | 			}
1202 | 			mock.ExpectQuery(test.query).WithArgs(test.args...).WillReturnRows(queryResult)
1203 | 
1204 | 			res, err := traceReader.findTraceIDsInRange(
1205 | 				context.Background(),
1206 | 				&spanstore.TraceQueryParameters{ServiceName: service, NumTraces: testNumTraces},
1207 | 				start,
1208 | 				end,
1209 | 				make([]model.TraceID, 0))
1210 | 			assert.Error(t, err)
1211 | 			assert.Equal(t, []model.TraceID(nil), res)
1212 | 			assert.NoError(t, mock.ExpectationsWereMet())
1213 | 		})
1214 | 	}
1215 | }
1216 | 
1217 | func TestSpanReader_getStrings(t *testing.T) {
1218 | 	db, mock, err := mocks.GetDbMock()
1219 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
1220 | 	defer db.Close()
1221 | 
1222 | 	query := "SELECT b FROM a WHERE b != ?"
1223 | 	argValues := []driver.Value{driver.Value("a")}
1224 | 	args := []interface{}{"a"}
1225 | 	rows := []driver.Value{"some", "query", "rows"}
1226 | 	expectedResult := []string{"some", "query", "rows"}
1227 | 	result := sqlmock.NewRows([]string{"b"})
1228 | 	for _, str := range rows {
1229 | 		result.AddRow(str)
1230 | 	}
1231 | 	mock.ExpectQuery(query).WithArgs(argValues...).WillReturnRows(result)
1232 | 
1233 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
1234 | 
1235 | 	queryResult, err := traceReader.getStrings(context.Background(), query, args...)
1236 | 	assert.NoError(t, err)
1237 | 	assert.EqualValues(t, expectedResult, queryResult)
1238 | 	assert.NoError(t, mock.ExpectationsWereMet())
1239 | }
1240 | 
1241 | func TestSpanReader_getStringsQueryError(t *testing.T) {
1242 | 	db, mock, err := mocks.GetDbMock()
1243 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
1244 | 	defer db.Close()
1245 | 
1246 | 	query := "SELECT b FROM a WHERE b != ?"
1247 | 	argValues := []driver.Value{driver.Value("a")}
1248 | 	args := []interface{}{"a"}
1249 | 	mock.ExpectQuery(query).WithArgs(argValues...).WillReturnError(errorMock)
1250 | 
1251 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
1252 | 
1253 | 	queryResult, err := traceReader.getStrings(context.Background(), query, args...)
1254 | 	assert.EqualError(t, err, errorMock.Error())
1255 | 	assert.EqualValues(t, []string(nil), queryResult)
1256 | 	assert.NoError(t, mock.ExpectationsWereMet())
1257 | }
1258 | 
1259 | func TestSpanReader_getStringsRowError(t *testing.T) {
1260 | 	db, mock, err := mocks.GetDbMock()
1261 | 	require.NoError(t, err, "an error was not expected when opening a stub database connection")
1262 | 	defer db.Close()
1263 | 
1264 | 	query := "SELECT b FROM a WHERE b != ?"
1265 | 	argValues := []driver.Value{driver.Value("a")}
1266 | 	args := []interface{}{"a"}
1267 | 	rows := []driver.Value{"some", "query", "rows"}
1268 | 	result := sqlmock.NewRows([]string{"b"})
1269 | 	for _, str := range rows {
1270 | 		result.AddRow(str)
1271 | 	}
1272 | 	result.RowError(2, errorMock)
1273 | 	mock.ExpectQuery(query).WithArgs(argValues...).WillReturnRows(result)
1274 | 
1275 | 	traceReader := NewTraceReader(db, testOperationsTable, testIndexTable, testSpansTable, "", testMaxNumSpans)
1276 | 
1277 | 	queryResult, err := traceReader.getStrings(context.Background(), query, args...)
1278 | 	assert.EqualError(t, err, errorMock.Error())
1279 | 	assert.EqualValues(t, []string(nil), queryResult)
1280 | 	assert.NoError(t, mock.ExpectationsWereMet())
1281 | }
1282 | 


--------------------------------------------------------------------------------