├── .gitignore ├── .goreleaser.yaml ├── .travis.yml ├── LICENSE ├── NOTICE ├── README.md ├── cmd └── outflux │ ├── .goreleaser.yml │ ├── app_context.go │ ├── main.go │ ├── migrate.go │ ├── migrate_i_test.go │ ├── migrate_test.go │ ├── mocks_for_test.go │ ├── root.go │ ├── schema_transfer.go │ ├── schema_transfer_i_test.go │ └── schema_transfer_test.go ├── docker-compose-integration.yml ├── go.mod ├── go.sum └── internal ├── cli ├── connection_config.go ├── extraction_conf_creator.go ├── flagparsers │ ├── connection_args_parser.go │ ├── connection_flags.go │ ├── flags.go │ ├── migrate_args_parser.go │ └── schema_transfer_args_parser.go ├── ingestion_conf_creator.go ├── migration_config.go ├── pipe_service.go ├── pipe_service_create_elements.go ├── pipe_service_create_transformers.go ├── pipe_service_create_transformers_test.go └── transformer_service.go ├── connections ├── influx_connection.go ├── influx_connection_test.go ├── pgx_mock.go ├── pgx_wrapper.go ├── timescale_connection.go └── timescale_connection_i_test.go ├── extraction ├── config │ ├── extraction_config.go │ └── extraction_config_test.go ├── extraction_service.go ├── extractor.go └── influx │ ├── idrfconversion │ ├── idrf_converter.go │ └── idrf_converter_test.go │ ├── influx_data_producer.go │ ├── influx_extractor.go │ ├── query_building.go │ └── query_building_test.go ├── idrf ├── bundle.go ├── column_info.go ├── column_info_test.go ├── data_set.go ├── data_set_test.go ├── data_type.go ├── data_type_test.go └── row.go ├── ingestion ├── config │ ├── ingestor_config.go │ └── ingestor_config_test.go ├── ingestor.go ├── ingestor_service.go └── ts │ ├── ingestion_routine.go │ ├── ingestion_routine_test.go │ └── ts_ingestor.go ├── pipeline ├── pipe.go ├── pipe_extraction_routine.go ├── pipe_ingestor_routine.go ├── pipe_prepare_elements.go ├── pipe_run.go └── pipe_transform_routine.go ├── schemamanagement ├── influx │ ├── dataset_constructor.go │ ├── dataset_constructor_test.go │ ├── discovery │ │ ├── field_discovery.go │ │ ├── field_discovery_test.go │ │ ├── measure_discovery.go │ │ ├── measure_discovery_test.go │ │ ├── tag_discovery.go │ │ └── tag_discovery_test.go │ ├── influx_schema_manager.go │ ├── influx_schema_manager_test.go │ └── influxqueries │ │ ├── influx_query_service.go │ │ ├── influx_query_service_test.go │ │ └── mock_influx_client.go ├── schema_manager.go ├── schema_manager_service.go ├── schemaconfig │ └── schema_strategy.go └── ts │ ├── hypertable_dimension_explorer_i_test.go │ ├── idrr_to_pg_type_test.go │ ├── pg_type_to_idrf_test.go │ ├── schema_explorer.go │ ├── table_compatibility.go │ ├── table_compatibility_test.go │ ├── table_creator.go │ ├── table_creator_i_test.go │ ├── table_creator_test.go │ ├── table_dropper.go │ ├── ts_schema_manager.go │ ├── ts_schema_manager_test.go │ └── type_conversion.go ├── testutils ├── config.go └── server_preparation.go ├── transformation ├── jsoncombiner │ ├── column_combiner.go │ ├── column_combiner_test.go │ ├── json_creator.go │ ├── transformer.go │ ├── transformer_test.go │ ├── validator.go │ └── validator_test.go └── transformer.go └── utils ├── broadcaster_test.go ├── check_error_test.go ├── closer_test.go ├── error_broadcaster.go ├── error_broadcaster_test.go ├── subscriber_test.go └── unsubscriber_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/go,linux,macos,windows 3 | # Edit at https://www.gitignore.io/?templates=go,linux,macos,windows 4 | 5 | ### Go ### 6 | # Binaries for programs and plugins 7 | *.exe 8 | *.exe~ 9 | *.dll 10 | *.so 11 | *.dylib 12 | 13 | # Test binary, built with `go test -c` 14 | *.test 15 | 16 | # Output of the go coverage tool, specifically when used with LiteIDE 17 | *.out 18 | 19 | ### Go Patch ### 20 | /vendor/ 21 | /Godeps/ 22 | 23 | ### Linux ### 24 | *~ 25 | 26 | # temporary files which can be created if a process still has a handle open of a deleted file 27 | .fuse_hidden* 28 | 29 | # KDE directory preferences 30 | .directory 31 | 32 | # Linux trash folder which might appear on any partition or disk 33 | .Trash-* 34 | 35 | # .nfs files are created when an open file is removed but is still being accessed 36 | .nfs* 37 | 38 | ### macOS ### 39 | # General 40 | .DS_Store 41 | .AppleDouble 42 | .LSOverride 43 | 44 | # Icon must end with two \r 45 | Icon 46 | 47 | # Thumbnails 48 | ._* 49 | 50 | # Files that might appear in the root of a volume 51 | .DocumentRevisions-V100 52 | .fseventsd 53 | .Spotlight-V100 54 | .TemporaryItems 55 | .Trashes 56 | .VolumeIcon.icns 57 | .com.apple.timemachine.donotpresent 58 | 59 | # Directories potentially created on remote AFP share 60 | .AppleDB 61 | .AppleDesktop 62 | Network Trash Folder 63 | Temporary Items 64 | .apdisk 65 | 66 | ### Windows ### 67 | # Windows thumbnail cache files 68 | Thumbs.db 69 | ehthumbs.db 70 | ehthumbs_vista.db 71 | 72 | # Dump file 73 | *.stackdump 74 | 75 | # Folder config file 76 | [Dd]esktop.ini 77 | 78 | # Recycle Bin used on file shares 79 | $RECYCLE.BIN/ 80 | 81 | # Windows Installer files 82 | *.cab 83 | *.msi 84 | *.msix 85 | *.msm 86 | *.msp 87 | 88 | # Windows shortcuts 89 | *.lnk 90 | 91 | # End of https://www.gitignore.io/api/go,linux,macos,windows 92 | .vscode 93 | Gopkg.lock 94 | .idea 95 | 96 | 97 | dist/ 98 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | # This is an example .goreleaser.yml file with some sensible defaults. 2 | # Make sure to check the documentation at https://goreleaser.com 3 | before: 4 | hooks: 5 | - go mod tidy 6 | builds: 7 | - env: 8 | - CGO_ENABLED=0 9 | goos: 10 | - linux 11 | - windows 12 | - darwin 13 | main: ./cmd/outflux 14 | goarch: 15 | - amd64 16 | - arm64 17 | 18 | archives: 19 | - format: tar.gz 20 | name_template: >- 21 | {{ .ProjectName }}_ 22 | {{- .Version }}_ 23 | {{- title .Os }}_ 24 | {{- if eq .Arch "amd64" }}x86_64 25 | {{- else if eq .Arch "386" }}i386 26 | {{- else }}{{ .Arch }}{{ end }} 27 | {{- if .Arm }}v{{ .Arm }}{{ end }} 28 | format_overrides: 29 | - goos: windows 30 | format: zip 31 | checksum: 32 | name_template: 'checksums.txt' 33 | snapshot: 34 | name_template: "{{ incpatch .Version }}-next" 35 | changelog: 36 | sort: asc 37 | filters: 38 | exclude: 39 | - '^docs:' 40 | - '^test:' 41 | 42 | # The lines beneath this are called `modelines`. See `:help modeline` 43 | # Feel free to remove those if you don't want/use them. 44 | # yaml-language-server: $schema=https://goreleaser.com/static/schema.json 45 | # vim: set ts=2 sw=2 tw=0 fo=cnqoj 46 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.13.x 5 | services: 6 | - docker 7 | install: true 8 | script: 9 | - docker run -d --name ts1 -p 5433:5432 -e POSTGRES_PASSWORD=postgres timescale/timescaledb:1.4.2-pg11 10 | - docker run -d --name influx1_0 -p 8086:8086 influxdb:1.0 11 | - GO111MODULE=on go test -race -tags=integration ./... 12 | - go clean -testcache 13 | - docker stop influx1_0 && docker rm influx1_0 14 | - docker stop ts1 && docker rm ts1 15 | - echo '- - - - -' 16 | - docker run -d --name ts1 -p 5433:5432 -e POSTGRES_PASSWORD=postgres timescale/timescaledb:1.4.2-pg11 17 | - docker run -d --name influx1_6 -p 8086:8086 influxdb:1.6 18 | - GO111MODULE=on go test -race -tags=integration ./... 19 | - go clean -testcache 20 | - docker stop influx1_6 && docker rm influx1_6 21 | - docker stop ts1 && docker rm ts1 22 | - echo '- - - - -' 23 | - docker run -d --name ts1 -p 5433:5432 -e POSTGRES_PASSWORD=postgres timescale/timescaledb:1.4.2-pg11 24 | - docker run -d --name influx1_8 -p 8086:8086 influxdb:1.8 25 | - GO111MODULE=on go test -race -tags=integration ./... 26 | - docker stop influx1_8 && docker rm influx1_8 27 | - docker stop ts1 && docker rm ts1 28 | after_success: 29 | - bash <(curl -s https://codecov.io/bash) 30 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Outflux by Timescale (TM) 2 | 3 | Copyright (c) 2019-2020 Timescale, Inc. All Rights Reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | -------------------------------------------------------------------------------- /cmd/outflux/.goreleaser.yml: -------------------------------------------------------------------------------- 1 | builds: 2 | - 3 | env: 4 | - CGO_ENABLED=0 5 | goos: 6 | - linux 7 | - darwin 8 | - windows 9 | goarch: 10 | - amd64 11 | archives: 12 | - 13 | id: outflux 14 | replacements: 15 | darwin: macOS 16 | linux: Linux 17 | windows: Windows 18 | 386: i386 19 | amd64: x86_64 20 | checksum: 21 | name_template: 'checksums.txt' 22 | snapshot: 23 | name_template: "outflux_{{ .Tag }}" 24 | changelog: 25 | sort: asc 26 | filters: 27 | exclude: 28 | - '^docs:' 29 | - '^test:' 30 | -------------------------------------------------------------------------------- /cmd/outflux/app_context.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/timescale/outflux/internal/cli" 5 | "github.com/timescale/outflux/internal/connections" 6 | "github.com/timescale/outflux/internal/extraction" 7 | "github.com/timescale/outflux/internal/ingestion" 8 | "github.com/timescale/outflux/internal/schemamanagement" 9 | "github.com/timescale/outflux/internal/schemamanagement/influx/discovery" 10 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 11 | ) 12 | 13 | type appContext struct { 14 | ics connections.InfluxConnectionService 15 | tscs connections.TSConnectionService 16 | pipeService cli.PipeService 17 | influxQueryService influxqueries.InfluxQueryService 18 | influxTagExplorer discovery.TagExplorer 19 | influxFieldExplorer discovery.FieldExplorer 20 | influxMeasureExplorer discovery.MeasureExplorer 21 | extractorService extraction.ExtractorService 22 | schemaManagerService schemamanagement.SchemaManagerService 23 | transformerService cli.TransformerService 24 | } 25 | 26 | func initAppContext() *appContext { 27 | tscs := connections.NewTSConnectionService() 28 | ics := connections.NewInfluxConnectionService() 29 | ingestorService := ingestion.NewIngestorService() 30 | influxQueryService := influxqueries.NewInfluxQueryService() 31 | influxTagExplorer := discovery.NewTagExplorer(influxQueryService) 32 | influxFieldExplorer := discovery.NewFieldExplorer(influxQueryService) 33 | influxMeasureExplorer := discovery.NewMeasureExplorer(influxQueryService, influxFieldExplorer) 34 | schemaManagerService := schemamanagement.NewSchemaManagerService(influxMeasureExplorer, influxTagExplorer, influxFieldExplorer) 35 | extractorService := extraction.NewExtractorService(schemaManagerService) 36 | 37 | transformerService := cli.NewTransformerService(influxTagExplorer, influxFieldExplorer) 38 | pipeService := cli.NewPipeService(ingestorService, extractorService, transformerService) 39 | return &appContext{ 40 | ics: ics, 41 | tscs: tscs, 42 | pipeService: pipeService, 43 | influxQueryService: influxQueryService, 44 | extractorService: extractorService, 45 | schemaManagerService: schemaManagerService, 46 | transformerService: transformerService, 47 | influxTagExplorer: influxTagExplorer, 48 | influxFieldExplorer: influxFieldExplorer, 49 | influxMeasureExplorer: influxMeasureExplorer, 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cmd/outflux/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | const outfluxVersion = "0.3.0" 4 | 5 | func main() { 6 | RootCmd.Execute() 7 | } 8 | -------------------------------------------------------------------------------- /cmd/outflux/migrate_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "testing" 7 | 8 | influx "github.com/influxdata/influxdb/client/v2" 9 | "github.com/jackc/pgx" 10 | 11 | "github.com/timescale/outflux/internal/cli" 12 | "github.com/timescale/outflux/internal/connections" 13 | ) 14 | 15 | func TestPreparePipeErrors(t *testing.T) { 16 | testCases := []struct { 17 | expected string 18 | input []error 19 | }{ 20 | { 21 | expected: "Migration finished with errors:\n", 22 | input: []error{}, 23 | }, { 24 | expected: "Migration finished with errors:\nOne\nTwo\n", 25 | input: []error{fmt.Errorf("One"), fmt.Errorf("Two")}, 26 | }, 27 | } 28 | 29 | for _, testCase := range testCases { 30 | res := preparePipeErrors(testCase.input) 31 | if res.Error() != testCase.expected { 32 | t.Errorf("expected:%s\ngot:%s", testCase.expected, res.Error()) 33 | } 34 | } 35 | } 36 | func TestMigrateErrorOnDiscoverMeasures(t *testing.T) { 37 | app := &appContext{ 38 | pipeService: &mockService{}, 39 | ics: &mockService{inflConnErr: fmt.Errorf("error")}, 40 | } 41 | 42 | conn := &cli.ConnectionConfig{} 43 | mig := &cli.MigrationConfig{Quiet: true} 44 | err := migrate(app, conn, mig) 45 | if err == nil { 46 | t.Error("expected error, none received") 47 | } 48 | } 49 | 50 | func TestOpenConnectionsReturnsError(t *testing.T) { 51 | app := &appContext{ 52 | ics: &mockService{inflConnErr: fmt.Errorf("error")}, 53 | } 54 | 55 | conn := &cli.ConnectionConfig{ 56 | InputMeasures: []string{"a"}, 57 | } 58 | mig := &cli.MigrationConfig{MaxParallel: 1} 59 | err := migrate(app, conn, mig) 60 | if err == nil { 61 | t.Error("expected error, none received") 62 | } 63 | } 64 | 65 | func TestMigrateCreatePipeReturnsError(t *testing.T) { 66 | app := &appContext{ 67 | ics: &mockService{inflConn: &mockInfConn{}}, 68 | tscs: &mockTsConnSer{tsConn: &pgx.Conn{}}, 69 | pipeService: &mockService{pipeErr: fmt.Errorf("error")}, 70 | } 71 | 72 | conn := &cli.ConnectionConfig{ 73 | InputMeasures: []string{"a"}, 74 | } 75 | mig := &cli.MigrationConfig{MaxParallel: 1} 76 | err := migrate(app, conn, mig) 77 | if err == nil { 78 | t.Error("expected error, none received") 79 | } 80 | } 81 | func TestMigratePipeReturnsError(t *testing.T) { 82 | errorReturningPipe := &mockPipe{runErr: fmt.Errorf("error")} 83 | app := &appContext{ 84 | ics: &mockService{inflConn: &mockInfConn{}}, 85 | tscs: &mockTsConnSer{tsConn: &pgx.Conn{}}, 86 | pipeService: &mockService{ 87 | pipe: errorReturningPipe, 88 | }, 89 | } 90 | conn := &cli.ConnectionConfig{InputMeasures: []string{"a"}} 91 | mig := &cli.MigrationConfig{MaxParallel: 1} 92 | err := migrate(app, conn, mig) 93 | if err == nil { 94 | t.Errorf("expected error, none received") 95 | } 96 | } 97 | 98 | func TestMigratePipesWaitForSemaphore(t *testing.T) { 99 | counter := &runCounter{lock: &sync.Mutex{}} 100 | goodPipe1 := &mockPipe{counter: counter} 101 | 102 | app := &appContext{ 103 | pipeService: &mockService{ 104 | pipe: goodPipe1, 105 | }, 106 | ics: &multiConnMock{}, 107 | tscs: &mockTsConnSer{tsConn: &pgx.Conn{}}, 108 | } 109 | conn := &cli.ConnectionConfig{InputMeasures: []string{"a", "b", "c"}} 110 | mig := &cli.MigrationConfig{MaxParallel: 2} 111 | err := migrate(app, conn, mig) 112 | if err != nil { 113 | t.Errorf("unexpected error: %v", err) 114 | } 115 | 116 | if counter.maxRunning > int32(mig.MaxParallel) { 117 | t.Errorf("number of concurrent pipelines (%d) was too damn high (allowed %d)", counter.maxRunning, mig.MaxParallel) 118 | } 119 | } 120 | 121 | func TestOpenConnections(t *testing.T) { 122 | // error on new influx con 123 | app := &appContext{ 124 | ics: &mockService{ 125 | inflConnErr: fmt.Errorf("some error"), 126 | }, 127 | } 128 | 129 | // error on open influx conn 130 | _, _, err := openConnections(app, &cli.ConnectionConfig{}) 131 | if err == nil { 132 | t.Errorf("expected error, none received") 133 | } 134 | 135 | // error on open ts conn 136 | mockIcs := &mockService{inflConn: &mockInfConn{}} 137 | mockTs := &mockTsConnSer{tsConnErr: fmt.Errorf("error")} 138 | app = &appContext{ 139 | ics: mockIcs, 140 | tscs: mockTs, 141 | } 142 | _, _, err = openConnections(app, &cli.ConnectionConfig{}) 143 | if err == nil { 144 | t.Error("expected error, none received") 145 | } else if !mockIcs.inflConn.(*mockInfConn).closeCalled { 146 | t.Error("close not called on influx connection") 147 | } 148 | 149 | // no error 150 | mockIcs = &mockService{inflConn: &mockInfConn{}} 151 | mockTs = &mockTsConnSer{tsConn: &pgx.Conn{}} 152 | app = &appContext{ 153 | ics: mockIcs, 154 | tscs: mockTs, 155 | } 156 | _, _, err = openConnections(app, &cli.ConnectionConfig{}) 157 | if err != nil { 158 | t.Errorf("unexpected error: %v", err) 159 | } else if mockIcs.inflConn.(*mockInfConn).closeCalled { 160 | t.Error("close method was called on influx connection") 161 | } 162 | } 163 | 164 | type multiConnMock struct { 165 | } 166 | 167 | func (m *multiConnMock) NewConnection(p *connections.InfluxConnectionParams) (influx.Client, error) { 168 | return &mockInfConn{}, nil 169 | } 170 | -------------------------------------------------------------------------------- /cmd/outflux/mocks_for_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | "github.com/timescale/outflux/internal/cli" 9 | "github.com/timescale/outflux/internal/connections" 10 | "github.com/timescale/outflux/internal/pipeline" 11 | "github.com/timescale/outflux/internal/schemamanagement" 12 | ) 13 | 14 | type mockService struct { 15 | pipe pipeline.Pipe 16 | pipeErr error 17 | inflConn influx.Client 18 | inflConnErr error 19 | inflSchemMngr schemamanagement.SchemaManager 20 | } 21 | 22 | func (m *mockService) Create(infConn influx.Client, tsConn connections.PgxWrap, measure, inputDb string, conf *cli.MigrationConfig) (pipeline.Pipe, error) { 23 | return m.pipe, m.pipeErr 24 | } 25 | 26 | func (m *mockService) NewConnection(arg *connections.InfluxConnectionParams) (influx.Client, error) { 27 | return m.inflConn, m.inflConnErr 28 | } 29 | 30 | func (m *mockService) Influx(c influx.Client, db, rp string, convertIntToFloat bool) schemamanagement.SchemaManager { 31 | return m.inflSchemMngr 32 | } 33 | 34 | func (m *mockService) TimeScale(dbConn connections.PgxWrap, schema, chunkInterval string) schemamanagement.SchemaManager { 35 | return nil 36 | } 37 | 38 | type mockTsConnSer struct { 39 | tsConn connections.PgxWrap 40 | tsConnErr error 41 | } 42 | 43 | func (m *mockTsConnSer) NewConnection(connStr string) (connections.PgxWrap, error) { 44 | return m.tsConn, m.tsConnErr 45 | } 46 | 47 | type runCounter struct { 48 | lock *sync.Mutex 49 | maxRunning int32 50 | currRunning int32 51 | } 52 | type mockPipe struct { 53 | counter *runCounter 54 | runErr error 55 | } 56 | 57 | func (m *mockPipe) ID() string { return "id" } 58 | func (m *mockPipe) Run() error { 59 | if m.counter != nil { 60 | m.counter.lock.Lock() 61 | m.counter.currRunning++ 62 | if m.counter.currRunning > m.counter.maxRunning { 63 | m.counter.maxRunning = m.counter.currRunning 64 | } 65 | m.counter.lock.Unlock() 66 | m.counter.lock.Lock() 67 | m.counter.currRunning-- 68 | m.counter.lock.Unlock() 69 | } 70 | return m.runErr 71 | } 72 | 73 | type mockInfConn struct { 74 | closeCalled bool 75 | } 76 | 77 | func (m *mockInfConn) Ping(timeout time.Duration) (time.Duration, string, error) { return 0, "", nil } 78 | func (m *mockInfConn) Write(bp influx.BatchPoints) error { return nil } 79 | func (m *mockInfConn) Query(q influx.Query) (*influx.Response, error) { return nil, nil } 80 | func (m *mockInfConn) QueryAsChunk(q influx.Query) (*influx.ChunkedResponse, error) { return nil, nil } 81 | func (m *mockInfConn) Close() error { 82 | m.closeCalled = true 83 | return nil 84 | } 85 | -------------------------------------------------------------------------------- /cmd/outflux/root.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/spf13/cobra" 8 | "github.com/timescale/outflux/internal/cli/flagparsers" 9 | ) 10 | 11 | // RootCmd defines the root outflux command 12 | var RootCmd = &cobra.Command{ 13 | Use: "outflux", 14 | Short: "Outflux migrates an InfluxDB database (or part of a database) to TimescaleDB", 15 | Long: "Outflux offers the capabilities to migrate an InfluxDB database, or specific measurements to TimescaleDB." + 16 | " It can also allow a user to transfer only the schema of a database or measurement to TimescaleDB", 17 | Run: func(cmd *cobra.Command, args []string) { 18 | fmt.Println("Outflux version " + outfluxVersion) 19 | fmt.Println("Run 'outflux --help' for usage") 20 | }, 21 | } 22 | 23 | // Execute is called to execute the root outflux command 24 | func Execute() { 25 | if err := RootCmd.Execute(); err != nil { 26 | log.Fatal(err) 27 | } 28 | } 29 | 30 | func init() { 31 | RootCmd.PersistentFlags().Bool(flagparsers.QuietFlag, false, "If specified will suppress any log to STDOUT") 32 | RootCmd.Flags().Bool(flagparsers.VersionFlag, false, "Print the version of Outflux") 33 | migrateCmd := initMigrateCmd() 34 | RootCmd.AddCommand(migrateCmd) 35 | 36 | schemaTransferCmd := initSchemaTransferCmd() 37 | RootCmd.AddCommand(schemaTransferCmd) 38 | } 39 | -------------------------------------------------------------------------------- /cmd/outflux/schema_transfer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "time" 8 | 9 | influx "github.com/influxdata/influxdb/client/v2" 10 | "github.com/timescale/outflux/internal/cli" 11 | "github.com/timescale/outflux/internal/connections" 12 | 13 | "github.com/spf13/cobra" 14 | "github.com/timescale/outflux/internal/cli/flagparsers" 15 | ) 16 | 17 | func initSchemaTransferCmd() *cobra.Command { 18 | schemaTransferCmd := &cobra.Command{ 19 | Use: "schema-transfer database [measure1 measure2 ...]", 20 | Short: "Discover the schema of measurements and validate or prepare a TimescaleDB hyper-table with the discovered schema", 21 | Long: "Discover the schema of measurements and validate or prepare a TimescaleDB hyper-table with the discovered schema", 22 | Args: cobra.MinimumNArgs(1), 23 | Run: func(cmd *cobra.Command, args []string) { 24 | app := initAppContext() 25 | connArgs, migArgs, err := flagparsers.FlagsToSchemaTransferConfig(cmd.Flags(), args) 26 | if err != nil { 27 | log.Fatal(err) 28 | return 29 | } 30 | 31 | err = transferSchema(app, connArgs, migArgs) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | }, 36 | } 37 | 38 | flagparsers.AddConnectionFlagsToCmd(schemaTransferCmd) 39 | schemaTransferCmd.PersistentFlags().String(flagparsers.RetentionPolicyFlag, flagparsers.DefaultRetentionPolicy, "The retention policy to select the fields and tags from") 40 | schemaTransferCmd.PersistentFlags().String(flagparsers.SchemaStrategyFlag, flagparsers.DefaultSchemaStrategy.String(), "Strategy to use for preparing the schema of the output database. Valid options: ValidateOnly, CreateIfMissing, DropAndCreate, DropCascadeAndCreate") 41 | schemaTransferCmd.PersistentFlags().Bool(flagparsers.TagsAsJSONFlag, flagparsers.DefaultTagsAsJSON, "If this flag is set to true, then the Tags of the influx measures being exported will be combined into a single JSONb column in Timescale") 42 | schemaTransferCmd.PersistentFlags().String(flagparsers.TagsColumnFlag, flagparsers.DefaultTagsColumn, "When "+flagparsers.TagsAsJSONFlag+" is set, this column specifies the name of the JSON column for the tags") 43 | schemaTransferCmd.PersistentFlags().Bool(flagparsers.FieldsAsJSONFlag, flagparsers.DefaultFieldsAsJSON, "If this flag is set to true, then the Fields of the influx measures being exported will be combined into a single JSONb column in Timescale") 44 | schemaTransferCmd.PersistentFlags().String(flagparsers.FieldsColumnFlag, flagparsers.DefaultFieldsColumn, "When "+flagparsers.FieldsAsJSONFlag+" is set, this column specifies the name of the JSON column for the fields") 45 | schemaTransferCmd.PersistentFlags().String(flagparsers.OutputSchemaFlag, flagparsers.DefaultOutputSchema, "The schema of the output database that the data will be inserted into") 46 | schemaTransferCmd.PersistentFlags().Bool(flagparsers.MultishardIntFloatCast, flagparsers.DefaultMultishardIntFloatCast, "If a field is Int64 in one shard, and Float64 in another, with this flag it will be cast to Float64 despite possible data loss") 47 | schemaTransferCmd.PersistentFlags().String(flagparsers.ChunkTimeIntervalFlag, flagparsers.DefaultChunkTimeInterval, "chunk_time_interval of the hypertables created by Outflux") 48 | return schemaTransferCmd 49 | } 50 | 51 | func transferSchema(app *appContext, connArgs *cli.ConnectionConfig, args *cli.MigrationConfig) error { 52 | if args.Quiet { 53 | log.SetFlags(0) 54 | log.SetOutput(ioutil.Discard) 55 | } 56 | 57 | startTime := time.Now() 58 | influxDb := connArgs.InputDb 59 | log.Printf("Selected input database: %s\n", influxDb) 60 | var err error 61 | 62 | // connect to input and output database 63 | infConn, pgConn, err := openConnections(app, connArgs) 64 | if err != nil { 65 | return fmt.Errorf("could not open connections to input and output database\n%v", err) 66 | } 67 | defer infConn.Close() 68 | defer pgConn.Close() 69 | 70 | // transfer the schema for all measures 71 | if len(connArgs.InputMeasures) == 0 { 72 | log.Printf("No measurements explicitly specified. Discovering automatically") 73 | connArgs.InputMeasures, err = discoverMeasures(app, infConn, connArgs.InputDb, args.RetentionPolicy, args.OnConflictConvertIntToFloat) 74 | if err != nil { 75 | return fmt.Errorf("could not discover the available measures for the input db '%s'", connArgs.InputDb) 76 | } 77 | if len(connArgs.InputMeasures) == 0 { 78 | log.Printf("No candidate measurements discovered. Exiting") 79 | return nil 80 | } 81 | } 82 | 83 | for _, measure := range connArgs.InputMeasures { 84 | err := transfer(app, connArgs.InputDb, args, infConn, pgConn, measure) 85 | if err != nil { 86 | return fmt.Errorf("could not transfer schema for measurement '%s'\n%v", measure, err) 87 | } 88 | } 89 | 90 | executionTime := time.Since(startTime).Seconds() 91 | log.Printf("Schema Transfer complete in: %.3f seconds\n", executionTime) 92 | return nil 93 | } 94 | 95 | func discoverMeasures(app *appContext, influxConn influx.Client, db, rp string, onConflictConvertIntToFloat bool) ([]string, error) { 96 | schemaManager := app.schemaManagerService.Influx(influxConn, db, rp, onConflictConvertIntToFloat) 97 | return schemaManager.DiscoverDataSets() 98 | } 99 | 100 | func transfer( 101 | app *appContext, 102 | inputDb string, 103 | args *cli.MigrationConfig, 104 | infConn influx.Client, 105 | pgConn connections.PgxWrap, 106 | measure string) error { 107 | 108 | pipe, err := app.pipeService.Create(infConn, pgConn, measure, inputDb, args) 109 | if err != nil { 110 | return fmt.Errorf("could not create execution pipeline for measure '%s'\n%v", measure, err) 111 | } 112 | 113 | log.Printf("%s starting execution\n", pipe.ID()) 114 | return pipe.Run() 115 | } 116 | -------------------------------------------------------------------------------- /cmd/outflux/schema_transfer_i_test.go: -------------------------------------------------------------------------------- 1 | // +build integration 2 | 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "os" 8 | "testing" 9 | 10 | "github.com/timescale/outflux/internal/cli" 11 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 12 | "github.com/timescale/outflux/internal/testutils" 13 | ) 14 | 15 | func TestSchemaTransfer(t *testing.T) { 16 | db := "test_schema_transfer" 17 | field := "field1" 18 | value := 1 19 | tags := make(map[string]string) 20 | fieldValues := make(map[string]interface{}) 21 | fieldValues[field] = value 22 | measures := []string{"test", "test 1", "test-2"} 23 | 24 | if err := testutils.PrepareServersForITest(db); err != nil { 25 | t.Fatalf("could not prepare servers: %v", err) 26 | } 27 | 28 | defer testutils.ClearServersAfterITest(db) 29 | appContext := initAppContext() 30 | 31 | dbConn, err := testutils.OpenTSConn(db) 32 | if err != nil { 33 | t.Fatal(err) 34 | } 35 | defer dbConn.Close() 36 | for _, measure := range measures { 37 | err := testutils.CreateInfluxMeasure(db, measure, []*map[string]string{&tags}, []*map[string]interface{}{&fieldValues}) 38 | if err != nil { 39 | t.Fatalf("could not create measure: %v", err) 40 | } 41 | 42 | connConf := &cli.ConnectionConfig{ 43 | InputHost: testutils.InfluxHost, 44 | InputDb: db, 45 | InputMeasures: []string{measure}, 46 | OutputDbConnString: fmt.Sprintf(testutils.TsConnStringTemplate, db), 47 | } 48 | config := &cli.MigrationConfig{ 49 | ChunkSize: 1, 50 | OutputSchemaStrategy: schemaconfig.DropAndCreate, 51 | SchemaOnly: true, 52 | } 53 | 54 | err = transferSchema(appContext, connConf, config) 55 | if err != nil { 56 | t.Fatal(err) 57 | } 58 | 59 | rows, err := dbConn.Query(fmt.Sprintf(`SELECT count(*) FROM "%s"`, measure)) 60 | if err != nil { 61 | t.Fatal(err) 62 | } 63 | var count int 64 | if !rows.Next() { 65 | t.Fatal("couldn't check state of TS DB") 66 | } 67 | 68 | err = rows.Scan(&count) 69 | if err != nil { 70 | t.Fatal("couldn't check state of TS DB") 71 | } 72 | 73 | if count != 0 { 74 | t.Errorf("expected no rows in the output database, %d found", count) 75 | } 76 | rows.Close() 77 | } 78 | } 79 | 80 | func TestOutputConnOverridesEnvVars(t *testing.T) { 81 | // Set up servers 82 | db := "test_output_con_overrides" 83 | measure := "test" 84 | field := "field1" 85 | value := 1 86 | tags := make(map[string]string) 87 | fieldValues := make(map[string]interface{}) 88 | fieldValues[field] = value 89 | if err := testutils.PrepareServersForITest(db); err != nil { 90 | t.Fatalf("could not prepare servers: %v", err) 91 | } 92 | 93 | err := testutils.CreateInfluxMeasure(db, measure, []*map[string]string{&tags}, []*map[string]interface{}{&fieldValues}) 94 | if err != nil { 95 | t.Fatalf("could not create influx measure: %v", err) 96 | } 97 | 98 | defer testutils.ClearServersAfterITest(db) 99 | 100 | // Three PG environment variables determining database and password 101 | os.Setenv("PGDATABASE", "wrong_db") 102 | os.Setenv("PGPORT", "5433") 103 | os.Setenv("PGPASSWORD", "postgres") 104 | 105 | connConf := &cli.ConnectionConfig{ 106 | InputHost: testutils.InfluxHost, 107 | InputDb: db, 108 | InputMeasures: []string{measure}, 109 | } 110 | config := &cli.MigrationConfig{ 111 | ChunkSize: 1, 112 | OutputSchemaStrategy: schemaconfig.DropAndCreate, 113 | SchemaOnly: true, 114 | } 115 | appContext := initAppContext() 116 | 117 | // connection should fail, wrong db 118 | err = transferSchema(appContext, connConf, config) 119 | if err == nil { 120 | t.Fatal("expected error, none received") 121 | } 122 | 123 | // Conn String that will override database and user 124 | connString := fmt.Sprintf("user=postgres dbname=%s", db) 125 | connConf.OutputDbConnString = connString 126 | err = transferSchema(appContext, connConf, config) 127 | if err != nil { 128 | t.Errorf("unexpected error: %v", err) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /cmd/outflux/schema_transfer_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "testing" 7 | "time" 8 | 9 | influx "github.com/influxdata/influxdb/client/v2" 10 | "github.com/jackc/pgx" 11 | "github.com/timescale/outflux/internal/cli" 12 | "github.com/timescale/outflux/internal/idrf" 13 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 14 | ) 15 | 16 | func TestDiscoverMeasures(t *testing.T) { 17 | mockClient := &tdmc{} 18 | mockSchemaMngr := &tdmsm{} 19 | mockAll := &mockService{inflConn: mockClient, inflSchemMngr: mockSchemaMngr} 20 | app := &appContext{ 21 | ics: mockAll, 22 | schemaManagerService: mockAll, 23 | } 24 | _, err := discoverMeasures(app, mockClient, "db", "autogen", true) 25 | if err != nil { 26 | t.Errorf("unexpected error:%v", err) 27 | } 28 | 29 | if mockClient.closed || !mockSchemaMngr.discoverCalled { 30 | t.Errorf("expected closed: false, discover: true\ngot closed:%v, discover:%v", mockClient.closed, mockSchemaMngr.discoverCalled) 31 | } 32 | } 33 | 34 | func TestTransferSchemaErrorOnDiscoverMeasures(t *testing.T) { 35 | mockAll := &mockService{inflConnErr: fmt.Errorf("error")} 36 | app := &appContext{ics: mockAll} 37 | connArgs := &cli.ConnectionConfig{} 38 | stArgs := &cli.MigrationConfig{} 39 | err := transferSchema(app, connArgs, stArgs) 40 | if err == nil { 41 | t.Errorf("expected err, none got") 42 | } 43 | } 44 | 45 | func TestSchemaTransferErrorOnOpenConn(t *testing.T) { 46 | mockClient := &tdmc{} 47 | mockSchemaMngr := &tdmsm{m: []string{"a"}} 48 | pipe := &mockPipe{runErr: fmt.Errorf("error"), counter: &runCounter{}} 49 | mockAll := &mockService{ 50 | inflConn: mockClient, 51 | inflSchemMngr: mockSchemaMngr, 52 | pipe: pipe, 53 | } 54 | mockTsConn := &mockTsConnSer{tsConnErr: fmt.Errorf("error")} 55 | app := &appContext{ics: mockAll, tscs: mockTsConn, pipeService: mockAll, schemaManagerService: mockAll} 56 | connArgs := &cli.ConnectionConfig{} 57 | stArgs := &cli.MigrationConfig{Quiet: true} 58 | err := transferSchema(app, connArgs, stArgs) 59 | if err == nil { 60 | t.Errorf("expected err, none got") 61 | } 62 | } 63 | 64 | func TestTransferSchemaErrorOnRun(t *testing.T) { 65 | mockClient := &tdmc{} 66 | mockSchemaMngr := &tdmsm{m: []string{"a"}} 67 | pipe := &mockPipe{runErr: fmt.Errorf("error"), counter: &runCounter{lock: &sync.Mutex{}}} 68 | mockAll := &mockService{ 69 | inflConn: mockClient, 70 | inflSchemMngr: mockSchemaMngr, 71 | pipe: pipe, 72 | } 73 | mockTsConn := &mockTsConnSer{tsConn: &pgx.Conn{}} 74 | app := &appContext{ics: mockAll, tscs: mockTsConn, pipeService: mockAll, schemaManagerService: mockAll} 75 | connArgs := &cli.ConnectionConfig{} 76 | stArgs := &cli.MigrationConfig{Quiet: true} 77 | err := transferSchema(app, connArgs, stArgs) 78 | if err == nil { 79 | t.Errorf("expected err, none got") 80 | } 81 | 82 | if pipe.counter.maxRunning != 1 { 83 | t.Errorf("pipe didn't run") 84 | } 85 | } 86 | 87 | func TestErrorOnPipeCreate(t *testing.T) { 88 | mockClient := &tdmc{} 89 | mockSchemaMngr := &tdmsm{m: []string{"a"}} 90 | mockAll := &mockService{ 91 | inflConn: mockClient, 92 | inflSchemMngr: mockSchemaMngr, 93 | pipeErr: fmt.Errorf("error"), 94 | } 95 | mockTsConn := &mockTsConnSer{tsConn: &pgx.Conn{}} 96 | app := &appContext{ics: mockAll, tscs: mockTsConn, pipeService: mockAll, schemaManagerService: mockAll} 97 | connArgs := &cli.ConnectionConfig{} 98 | stArgs := &cli.MigrationConfig{Quiet: true} 99 | err := transferSchema(app, connArgs, stArgs) 100 | if err == nil { 101 | t.Errorf("expected err, none got") 102 | } 103 | } 104 | func TestTransferSchema(t *testing.T) { 105 | pipe := &mockPipe{counter: &runCounter{lock: &sync.Mutex{}}} 106 | mockAll := &mockService{ 107 | pipe: pipe, 108 | inflConn: &mockInfConn{}, 109 | } 110 | app := &appContext{ics: mockAll, tscs: &mockTsConnSer{tsConn: &pgx.Conn{}}, pipeService: mockAll, schemaManagerService: mockAll} 111 | connArgs := &cli.ConnectionConfig{InputMeasures: []string{"a"}} 112 | stArgs := &cli.MigrationConfig{} 113 | err := transferSchema(app, connArgs, stArgs) 114 | if err != nil { 115 | t.Errorf("unexpected error:%v", err) 116 | } 117 | 118 | if pipe.counter.maxRunning != 1 { 119 | t.Errorf("pipe didn't run") 120 | } 121 | } 122 | 123 | type tdmc struct{ closed bool } 124 | 125 | func (t *tdmc) Ping(timeout time.Duration) (time.Duration, string, error) { return 0, "", nil } 126 | func (t *tdmc) Write(bp influx.BatchPoints) error { return nil } 127 | func (t *tdmc) Query(q influx.Query) (*influx.Response, error) { return nil, nil } 128 | func (t *tdmc) QueryAsChunk(q influx.Query) (*influx.ChunkedResponse, error) { return nil, nil } 129 | func (t *tdmc) Close() error { t.closed = true; return nil } 130 | 131 | type tdmsm struct { 132 | m []string 133 | discoverCalled bool 134 | } 135 | 136 | func (t *tdmsm) DiscoverDataSets() ([]string, error) { t.discoverCalled = true; return t.m, nil } 137 | func (t *tdmsm) FetchDataSet(dataSetIdentifier string) (*idrf.DataSet, error) { return nil, nil } 138 | func (t *tdmsm) PrepareDataSet(dataSet *idrf.DataSet, strategy schemaconfig.SchemaStrategy) error { 139 | return nil 140 | } 141 | -------------------------------------------------------------------------------- /docker-compose-integration.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | influx: 5 | image: influxdb 6 | ports: 7 | - "8086:8086" 8 | timescale: 9 | image: timescale/timescaledb 10 | environment: 11 | - POSTGRES_PASSWORD=postgres 12 | ports: 13 | - "5433:5432" 14 | 15 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/timescale/outflux 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/cockroachdb/apd v1.1.0 // indirect 7 | github.com/gofrs/uuid v4.4.0+incompatible // indirect 8 | github.com/influxdata/influxdb v1.7.11 9 | github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 // indirect 10 | github.com/jackc/pgx v3.6.2+incompatible 11 | github.com/lib/pq v1.0.0 12 | github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24 // indirect 13 | github.com/spf13/cobra v0.0.3 14 | github.com/spf13/pflag v1.0.3 15 | github.com/stretchr/testify v1.4.0 16 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e 17 | ) 18 | -------------------------------------------------------------------------------- /internal/cli/connection_config.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | // ConnectionConfig holds all arguments required to establish a connection to an input and output db 4 | type ConnectionConfig struct { 5 | InputHost string 6 | InputDb string 7 | InputMeasures []string 8 | InputUser string 9 | InputPass string 10 | InputUnsafeHTTPS bool 11 | OutputDbConnString string 12 | } 13 | -------------------------------------------------------------------------------- /internal/cli/extraction_conf_creator.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/timescale/outflux/internal/extraction/config" 7 | ) 8 | 9 | const ( 10 | extractorIDTemplate = "%s_ext" 11 | ) 12 | 13 | type extractionConfCreator interface { 14 | create(pipeID string, db, measure string, conf *MigrationConfig) *config.ExtractionConfig 15 | } 16 | 17 | type defaultExtractionConfCreator struct{} 18 | 19 | func (d *defaultExtractionConfCreator) create(pipeID, db, measure string, conf *MigrationConfig) *config.ExtractionConfig { 20 | measureExtractionConf := &config.MeasureExtraction{ 21 | Database: db, 22 | Measure: measure, 23 | From: conf.From, 24 | To: conf.To, 25 | ChunkSize: conf.ChunkSize, 26 | Limit: conf.Limit, 27 | SchemaOnly: conf.SchemaOnly, 28 | RetentionPolicy: conf.RetentionPolicy, 29 | OnConflictConvertIntToFloat: conf.OnConflictConvertIntToFloat, 30 | } 31 | 32 | ex := &config.ExtractionConfig{ 33 | ExtractorID: fmt.Sprintf(extractorIDTemplate, pipeID), 34 | MeasureExtraction: measureExtractionConf, 35 | DataBufferSize: conf.DataBuffer, 36 | } 37 | 38 | return ex 39 | } 40 | -------------------------------------------------------------------------------- /internal/cli/flagparsers/connection_args_parser.go: -------------------------------------------------------------------------------- 1 | package flagparsers 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/spf13/pflag" 7 | "github.com/timescale/outflux/internal/cli" 8 | ) 9 | 10 | // FlagsToConnectionConfig extracts flags related to establishing the connection to input and output database 11 | func FlagsToConnectionConfig(flags *pflag.FlagSet, args []string) (*cli.ConnectionConfig, error) { 12 | if args[0] == "" { 13 | return nil, fmt.Errorf("input database name not specified") 14 | } 15 | 16 | inputUser, _ := flags.GetString(InputUserFlag) 17 | inputPass, _ := flags.GetString(InputPassFlag) 18 | inputHost, _ := flags.GetString(InputServerFlag) 19 | inputUnsafe, _ := flags.GetBool(InputUnsafeHTTPSFlag) 20 | outputConnString, _ := flags.GetString(OutputConnFlag) 21 | return &cli.ConnectionConfig{ 22 | InputDb: args[0], 23 | InputMeasures: args[1:], 24 | InputHost: inputHost, 25 | InputUser: inputUser, 26 | InputPass: inputPass, 27 | InputUnsafeHTTPS: inputUnsafe, 28 | OutputDbConnString: outputConnString, 29 | }, nil 30 | } 31 | -------------------------------------------------------------------------------- /internal/cli/flagparsers/connection_flags.go: -------------------------------------------------------------------------------- 1 | package flagparsers 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // AddConnectionFlagsToCmd adds the flags required to connect to an Influx and Timescale database 8 | func AddConnectionFlagsToCmd(cmd *cobra.Command) { 9 | cmd.PersistentFlags().String( 10 | InputServerFlag, 11 | DefaultInputServer, 12 | "Host of the input database, http(s)://location:port.") 13 | cmd.PersistentFlags().String( 14 | InputUserFlag, 15 | DefaultInputUser, 16 | "Username to use when connecting to the input database. If set overrides $INFLUX_USERNAME") 17 | cmd.PersistentFlags().String( 18 | InputPassFlag, 19 | DefaultInputPass, 20 | "Password to use when connecting to the input database. If set overrides $INFLUX_PASSWORD") 21 | cmd.PersistentFlags().Bool( 22 | InputUnsafeHTTPSFlag, 23 | DefaultInputUnsafeHTTPS, 24 | "Should 'InsecureSkipVerify' be passed to the input connection") 25 | cmd.PersistentFlags().String( 26 | OutputConnFlag, 27 | DefaultOutputConn, 28 | "Connection string to use to connect to the output database, overrides values in the PG environment variables") 29 | } 30 | -------------------------------------------------------------------------------- /internal/cli/flagparsers/flags.go: -------------------------------------------------------------------------------- 1 | package flagparsers 2 | 3 | import ( 4 | ingestionConfig "github.com/timescale/outflux/internal/ingestion/config" 5 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 6 | ) 7 | 8 | // Flags used in outflux and their default values 9 | const ( 10 | VersionFlag = "version" 11 | InputServerFlag = "input-server" 12 | InputUserFlag = "input-user" 13 | InputPassFlag = "input-pass" 14 | InputUnsafeHTTPSFlag = "input-unsafe-https" 15 | RetentionPolicyFlag = "retention-policy" 16 | OutputConnFlag = "output-conn" 17 | SchemaStrategyFlag = "schema-strategy" 18 | CommitStrategyFlag = "commit-strategy" 19 | OutputSchemaFlag = "output-schema" 20 | FromFlag = "from" 21 | ToFlag = "to" 22 | LimitFlag = "limit" 23 | ChunkSizeFlag = "chunk-size" 24 | QuietFlag = "quiet" 25 | DataBufferFlag = "data-buffer" 26 | MaxParallelFlag = "max-parallel" 27 | RollbackOnExternalErrorFlag = "rollback-on-external-error" 28 | BatchSizeFlag = "batch-size" 29 | TagsAsJSONFlag = "tags-as-json" 30 | TagsColumnFlag = "tags-column" 31 | FieldsAsJSONFlag = "fields-as-json" 32 | FieldsColumnFlag = "fields-column" 33 | ChunkTimeIntervalFlag = "chunk-time-interval" 34 | // InfluxDB can have different data types for the same field accross 35 | // different shards. If a field is discovered with an Int64 and a Float64 type 36 | // and this flag is TRUE it will allow the field to be converted to float, 37 | // otherwise it will return an error. 38 | MultishardIntFloatCast = "multishard-int-float-cast" 39 | 40 | DefaultInputServer = "http://localhost:8086" 41 | DefaultInputUser = "" 42 | DefaultInputPass = "" 43 | DefaultInputUnsafeHTTPS = false 44 | DefaultRetentionPolicy = "autogen" 45 | DefaultOutputConn = "sslmode=disable" 46 | DefaultOutputSchema = "" 47 | DefaultSchemaStrategy = schemaconfig.CreateIfMissing 48 | DefaultCommitStrategy = ingestionConfig.CommitOnEachBatch 49 | DefaultDataBufferSize = 15000 50 | DefaultChunkSize = 15000 51 | DefaultLimit = 0 52 | DefaultMaxParallel = 2 53 | DefaultRollbackOnExternalError = true 54 | DefaultBatchSize = 8000 55 | DefaultTagsAsJSON = false 56 | DefaultTagsColumn = "tags" 57 | DefaultFieldsAsJSON = false 58 | DefaultFieldsColumn = "fields" 59 | DefaultMultishardIntFloatCast = false 60 | DefaultChunkTimeInterval = "" 61 | ) 62 | -------------------------------------------------------------------------------- /internal/cli/flagparsers/migrate_args_parser.go: -------------------------------------------------------------------------------- 1 | package flagparsers 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | 7 | "github.com/spf13/pflag" 8 | "github.com/timescale/outflux/internal/cli" 9 | ingestionConfig "github.com/timescale/outflux/internal/ingestion/config" 10 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 11 | ) 12 | 13 | // FlagsToMigrateConfig extracts the config for running a migration from the flags of the command 14 | func FlagsToMigrateConfig(flags *pflag.FlagSet, args []string) (*cli.ConnectionConfig, *cli.MigrationConfig, error) { 15 | connectionArgs, err := FlagsToConnectionConfig(flags, args) 16 | if err != nil { 17 | return nil, nil, err 18 | } 19 | 20 | strategyAsStr, _ := flags.GetString(SchemaStrategyFlag) 21 | var strategy schemaconfig.SchemaStrategy 22 | if strategy, err = schemaconfig.ParseStrategyString(strategyAsStr); err != nil { 23 | return nil, nil, err 24 | } 25 | 26 | commitStrategyAsStr, _ := flags.GetString(CommitStrategyFlag) 27 | var commitStrategy ingestionConfig.CommitStrategy 28 | if commitStrategy, err = ingestionConfig.ParseStrategyString(commitStrategyAsStr); err != nil { 29 | return nil, nil, err 30 | } 31 | 32 | limit, err := flags.GetUint64(LimitFlag) 33 | if err != nil { 34 | return nil, nil, err 35 | } 36 | 37 | chunkSize, err := flags.GetUint16(ChunkSizeFlag) 38 | if err != nil || chunkSize == 0 { 39 | return nil, nil, fmt.Errorf("value for the '%s' flag must be an integer > 0 and < %d", ChunkSizeFlag, math.MaxUint16) 40 | } 41 | 42 | batchSize, err := flags.GetUint16(BatchSizeFlag) 43 | if err != nil || batchSize == 0 { 44 | return nil, nil, fmt.Errorf("value for the '%s' flag must be an integer > 0 and < %d", ChunkSizeFlag, math.MaxUint16) 45 | } 46 | 47 | dataBuffer, err := flags.GetUint16(DataBufferFlag) 48 | if err != nil { 49 | return nil, nil, fmt.Errorf("value for the '%s' flag must be an integer >= 0 and < %d", DataBufferFlag, math.MaxUint16) 50 | } 51 | 52 | maxParallel, err := flags.GetUint8(MaxParallelFlag) 53 | if err != nil || maxParallel == 0 { 54 | return nil, nil, fmt.Errorf("value for the '%s' flag must be an integer > 0 and < %d", MaxParallelFlag, math.MaxUint16) 55 | } 56 | 57 | quiet, err := flags.GetBool(QuietFlag) 58 | if err != nil { 59 | return nil, nil, fmt.Errorf("value for the '%s' flag must be a true or false", QuietFlag) 60 | } 61 | 62 | rollBack, err := flags.GetBool(RollbackOnExternalErrorFlag) 63 | if err != nil { 64 | return nil, nil, fmt.Errorf("value for the '%s' flag must be a true or false", RollbackOnExternalErrorFlag) 65 | } 66 | 67 | from, _ := flags.GetString(FromFlag) 68 | to, _ := flags.GetString(ToFlag) 69 | tagsAsJSON, _ := flags.GetBool(TagsAsJSONFlag) 70 | tagsColumn, _ := flags.GetString(TagsColumnFlag) 71 | if tagsAsJSON && tagsColumn == "" { 72 | return nil, nil, fmt.Errorf("When the '%s' flag is set, the '%s' must also have a value", TagsAsJSONFlag, TagsColumnFlag) 73 | } 74 | 75 | fieldsAsJSON, _ := flags.GetBool(FieldsAsJSONFlag) 76 | fieldsColumn, _ := flags.GetString(FieldsColumnFlag) 77 | if fieldsAsJSON && fieldsColumn == "" { 78 | return nil, nil, fmt.Errorf("When the '%s' flag is set, the '%s' must also have a value", FieldsAsJSONFlag, FieldsColumnFlag) 79 | } 80 | outputSchema, _ := flags.GetString(OutputSchemaFlag) 81 | rp, _ := flags.GetString(RetentionPolicyFlag) 82 | intToFloat, _ := flags.GetBool(MultishardIntFloatCast) 83 | chunkTimeInterval, _ := flags.GetString(ChunkTimeIntervalFlag) 84 | migrateArgs := &cli.MigrationConfig{ 85 | RetentionPolicy: rp, 86 | OutputSchemaStrategy: strategy, 87 | OutputSchema: outputSchema, 88 | From: from, 89 | To: to, 90 | Limit: limit, 91 | ChunkSize: chunkSize, 92 | BatchSize: batchSize, 93 | DataBuffer: dataBuffer, 94 | MaxParallel: maxParallel, 95 | Quiet: quiet, 96 | RollbackAllMeasureExtractionsOnError: rollBack, 97 | CommitStrategy: commitStrategy, 98 | TagsAsJSON: tagsAsJSON, 99 | TagsCol: tagsColumn, 100 | FieldsAsJSON: fieldsAsJSON, 101 | FieldsCol: fieldsColumn, 102 | OnConflictConvertIntToFloat: intToFloat, 103 | ChunkTimeInterval: chunkTimeInterval, 104 | } 105 | 106 | return connectionArgs, migrateArgs, nil 107 | } 108 | -------------------------------------------------------------------------------- /internal/cli/flagparsers/schema_transfer_args_parser.go: -------------------------------------------------------------------------------- 1 | package flagparsers 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/spf13/pflag" 7 | "github.com/timescale/outflux/internal/cli" 8 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 9 | ) 10 | 11 | // FlagsToSchemaTransferConfig extracts the config for running schema transfer from the flags of the command 12 | func FlagsToSchemaTransferConfig(flags *pflag.FlagSet, args []string) (*cli.ConnectionConfig, *cli.MigrationConfig, error) { 13 | connectionArgs, err := FlagsToConnectionConfig(flags, args) 14 | if err != nil { 15 | return nil, nil, err 16 | } 17 | 18 | retentionPolicy, _ := flags.GetString(RetentionPolicyFlag) 19 | strategyAsStr, _ := flags.GetString(SchemaStrategyFlag) 20 | var strategy schemaconfig.SchemaStrategy 21 | if strategy, err = schemaconfig.ParseStrategyString(strategyAsStr); err != nil { 22 | return nil, nil, err 23 | } 24 | 25 | tagsAsJSON, _ := flags.GetBool(TagsAsJSONFlag) 26 | tagsColumn, _ := flags.GetString(TagsColumnFlag) 27 | if tagsAsJSON && tagsColumn == "" { 28 | return nil, nil, fmt.Errorf("When the '%s' flag is set, the '%s' must also have a value", TagsAsJSONFlag, TagsColumnFlag) 29 | } 30 | 31 | fieldsAsJSON, _ := flags.GetBool(FieldsAsJSONFlag) 32 | fieldsColumn, _ := flags.GetString(FieldsColumnFlag) 33 | if fieldsAsJSON && fieldsColumn == "" { 34 | return nil, nil, fmt.Errorf("When the '%s' flag is set, the '%s' must also have a value", FieldsAsJSONFlag, FieldsColumnFlag) 35 | } 36 | 37 | quiet, err := flags.GetBool(QuietFlag) 38 | if err != nil { 39 | return nil, nil, fmt.Errorf("value for the '%s' flag must be a true or false", QuietFlag) 40 | } 41 | outputSchema, _ := flags.GetString(OutputSchemaFlag) 42 | intToFloat, _ := flags.GetBool(MultishardIntFloatCast) 43 | chunkTimeInterval, _ := flags.GetString(ChunkTimeIntervalFlag) 44 | return connectionArgs, &cli.MigrationConfig{ 45 | RetentionPolicy: retentionPolicy, 46 | OutputSchema: outputSchema, 47 | OutputSchemaStrategy: strategy, 48 | Quiet: quiet, 49 | SchemaOnly: true, 50 | ChunkSize: 1, 51 | TagsAsJSON: tagsAsJSON, 52 | TagsCol: tagsColumn, 53 | FieldsAsJSON: fieldsAsJSON, 54 | FieldsCol: fieldsColumn, 55 | OnConflictConvertIntToFloat: intToFloat, 56 | ChunkTimeInterval: chunkTimeInterval, 57 | }, nil 58 | } 59 | -------------------------------------------------------------------------------- /internal/cli/ingestion_conf_creator.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/timescale/outflux/internal/ingestion/config" 7 | ) 8 | 9 | const ( 10 | ingestorIDTemplate = "%s_ing" 11 | ) 12 | 13 | type ingestionConfCreator interface { 14 | create(pipeID string, conf *MigrationConfig) *config.IngestorConfig 15 | } 16 | 17 | type defaultIngestionConfCreator struct { 18 | } 19 | 20 | func (s *defaultIngestionConfCreator) create(pipeID string, conf *MigrationConfig) *config.IngestorConfig { 21 | return &config.IngestorConfig{ 22 | IngestorID: fmt.Sprintf(ingestorIDTemplate, pipeID), 23 | BatchSize: conf.BatchSize, 24 | RollbackOnExternalError: conf.RollbackAllMeasureExtractionsOnError, 25 | CommitStrategy: conf.CommitStrategy, 26 | SchemaStrategy: conf.OutputSchemaStrategy, 27 | Schema: conf.OutputSchema, 28 | ChunkTimeInterval: conf.ChunkTimeInterval, 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /internal/cli/migration_config.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | ingestionConf "github.com/timescale/outflux/internal/ingestion/config" 5 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 6 | ) 7 | 8 | // MigrationConfig contains the configurable parameters for migrating an InfluxDB to TimescaleDB 9 | type MigrationConfig struct { 10 | RetentionPolicy string 11 | OutputSchema string 12 | OutputSchemaStrategy schemaconfig.SchemaStrategy 13 | From string 14 | To string 15 | Limit uint64 16 | ChunkSize uint16 17 | BatchSize uint16 18 | Quiet bool 19 | DataBuffer uint16 20 | MaxParallel uint8 21 | RollbackAllMeasureExtractionsOnError bool 22 | CommitStrategy ingestionConf.CommitStrategy 23 | SchemaOnly bool 24 | TagsAsJSON bool 25 | TagsCol string 26 | FieldsAsJSON bool 27 | FieldsCol string 28 | OnConflictConvertIntToFloat bool 29 | ChunkTimeInterval string 30 | } 31 | -------------------------------------------------------------------------------- /internal/cli/pipe_service.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | 6 | influx "github.com/influxdata/influxdb/client/v2" 7 | 8 | "github.com/timescale/outflux/internal/connections" 9 | "github.com/timescale/outflux/internal/extraction" 10 | "github.com/timescale/outflux/internal/ingestion" 11 | "github.com/timescale/outflux/internal/pipeline" 12 | ) 13 | 14 | const ( 15 | pipeIDTemplate = "pipe_%s" 16 | ) 17 | 18 | // PipeService defines methods for creating pipelines 19 | type PipeService interface { 20 | Create(infConn influx.Client, pgConn connections.PgxWrap, measure, inputDb string, conf *MigrationConfig) (pipeline.Pipe, error) 21 | } 22 | 23 | type pipeService struct { 24 | ingestorService ingestion.IngestorService 25 | extractorService extraction.ExtractorService 26 | transformerService TransformerService 27 | extractionConfCreator 28 | ingestionConfCreator 29 | } 30 | 31 | // NewPipeService creates a new instance of the PipeService 32 | func NewPipeService( 33 | ingestorService ingestion.IngestorService, 34 | extractorService extraction.ExtractorService, 35 | transformerService TransformerService) PipeService { 36 | return &pipeService{ 37 | ingestorService: ingestorService, 38 | extractorService: extractorService, 39 | transformerService: transformerService, 40 | extractionConfCreator: &defaultExtractionConfCreator{}, 41 | ingestionConfCreator: &defaultIngestionConfCreator{}, 42 | } 43 | } 44 | 45 | func (s *pipeService) Create(infConn influx.Client, tsConn connections.PgxWrap, measure, inputDb string, conf *MigrationConfig) (pipeline.Pipe, error) { 46 | pipeID := fmt.Sprintf(pipeIDTemplate, measure) 47 | extractionConf := s.extractionConfCreator.create(pipeID, inputDb, measure, conf) 48 | ingestionConf := s.ingestionConfCreator.create(pipeID, conf) 49 | extractor, ingestor, err := s.createElements(infConn, tsConn, extractionConf, ingestionConf) 50 | if err != nil { 51 | return nil, fmt.Errorf("%s: could not create extractor and ingestor:\n%v", pipeID, err) 52 | } 53 | 54 | transformers, err := s.createTransformers(pipeID, infConn, measure, inputDb, conf) 55 | if err != nil { 56 | return nil, fmt.Errorf("%s: could not create transformers:\n%v", pipeID, err) 57 | } 58 | 59 | return pipeline.NewPipe(pipeID, ingestor, extractor, transformers, conf.SchemaOnly), nil 60 | } 61 | -------------------------------------------------------------------------------- /internal/cli/pipe_service_create_elements.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | 6 | influx "github.com/influxdata/influxdb/client/v2" 7 | "github.com/timescale/outflux/internal/connections" 8 | "github.com/timescale/outflux/internal/extraction" 9 | extrConfig "github.com/timescale/outflux/internal/extraction/config" 10 | "github.com/timescale/outflux/internal/ingestion" 11 | ingConfig "github.com/timescale/outflux/internal/ingestion/config" 12 | ) 13 | 14 | func (p *pipeService) createElements( 15 | infConn influx.Client, 16 | tsConn connections.PgxWrap, 17 | extrConf *extrConfig.ExtractionConfig, 18 | ingConf *ingConfig.IngestorConfig) (extraction.Extractor, ingestion.Ingestor, error) { 19 | extractor, err := p.extractorService.InfluxExtractor(infConn, extrConf) 20 | if err != nil { 21 | return nil, nil, fmt.Errorf("could not create extractor\n%v", err) 22 | } 23 | 24 | ingestor := p.ingestorService.NewTimescaleIngestor(tsConn, ingConf) 25 | return extractor, ingestor, nil 26 | } 27 | -------------------------------------------------------------------------------- /internal/cli/pipe_service_create_transformers.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | 6 | influx "github.com/influxdata/influxdb/client/v2" 7 | "github.com/timescale/outflux/internal/transformation" 8 | ) 9 | 10 | const ( 11 | transformerIDTemplate = "%s_transfomer_%s" 12 | ) 13 | 14 | func (p *pipeService) createTransformers(pipeID string, infConn influx.Client, measure string, inputDb string, conf *MigrationConfig) ([]transformation.Transformer, error) { 15 | transformers := []transformation.Transformer{} 16 | 17 | if conf.TagsAsJSON { 18 | id := fmt.Sprintf(transformerIDTemplate, pipeID, "tagsAsJSON") 19 | tagsTransformer, err := p.transformerService.TagsAsJSON(infConn, id, inputDb, conf.RetentionPolicy, measure, conf.TagsCol) 20 | if err != nil { 21 | return nil, err 22 | } 23 | // if measurement has no tags, a nil transformer is returned 24 | if tagsTransformer != nil { 25 | transformers = append(transformers, tagsTransformer) 26 | } 27 | } 28 | 29 | if conf.FieldsAsJSON { 30 | id := fmt.Sprintf(transformerIDTemplate, pipeID, "fieldsAsJSON") 31 | fieldsTransformer, err := p.transformerService.FieldsAsJSON(infConn, id, inputDb, conf.RetentionPolicy, measure, conf.FieldsCol) 32 | if err != nil { 33 | return nil, err 34 | } 35 | transformers = append(transformers, fieldsTransformer) 36 | } 37 | 38 | return transformers, nil 39 | } 40 | -------------------------------------------------------------------------------- /internal/cli/pipe_service_create_transformers_test.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | "github.com/timescale/outflux/internal/idrf" 9 | "github.com/timescale/outflux/internal/transformation" 10 | ) 11 | 12 | func TestCreateTransformers(t *testing.T) { 13 | err := fmt.Errorf("error") 14 | testCases := []struct { 15 | desc string 16 | mock *psctMockService 17 | expectedTransIds []string 18 | expectErr bool 19 | conf *MigrationConfig 20 | connConf *ConnectionConfig 21 | }{ 22 | { 23 | desc: "error on tags as json transformer", 24 | mock: &psctMockService{tagsErr: err}, 25 | conf: &MigrationConfig{TagsAsJSON: true}, 26 | connConf: &ConnectionConfig{}, 27 | expectErr: true, 28 | }, { 29 | desc: "error on fields as json transformer", 30 | mock: &psctMockService{ 31 | fieldsErr: err, 32 | }, 33 | conf: &MigrationConfig{FieldsAsJSON: true}, 34 | connConf: &ConnectionConfig{}, 35 | expectErr: true, 36 | }, { 37 | desc: "tags transformer is nil, no tags for measure", 38 | mock: &psctMockService{}, 39 | expectedTransIds: []string{}, 40 | conf: &MigrationConfig{TagsAsJSON: true}, 41 | connConf: &ConnectionConfig{}, 42 | }, { 43 | desc: "all transformers created", 44 | mock: &psctMockService{ 45 | tagsT: &psctMockTrans{id: "t"}, 46 | fieldsT: &psctMockTrans{id: "f"}, 47 | }, 48 | expectedTransIds: []string{"t", "f"}, 49 | conf: &MigrationConfig{FieldsAsJSON: true, TagsAsJSON: true}, 50 | connConf: &ConnectionConfig{}, 51 | }, 52 | } 53 | for _, tc := range testCases { 54 | ps := &pipeService{ 55 | transformerService: tc.mock, 56 | } 57 | 58 | trans, err := ps.createTransformers("id", nil, "measure", "inputDb", tc.conf) 59 | if err == nil && tc.expectErr { 60 | t.Fatalf("%s:expected error, none got", tc.desc) 61 | } else if err != nil && !tc.expectErr { 62 | t.Fatalf("%s: unexpected err: %v", tc.desc, err) 63 | } 64 | 65 | if tc.expectErr { 66 | continue 67 | } 68 | 69 | if len(trans) != len(tc.expectedTransIds) { 70 | t.Fatalf("%s: expected %d transformers, got %d", tc.desc, len(tc.expectedTransIds), len(trans)) 71 | } 72 | 73 | for i, returnedTrans := range trans { 74 | if returnedTrans.ID() != tc.expectedTransIds[i] { 75 | t.Fatalf("%s: expected trans id '%s', got '%s'", tc.desc, returnedTrans.ID(), tc.expectedTransIds[i]) 76 | } 77 | } 78 | } 79 | } 80 | 81 | type psctMockService struct { 82 | tagsT transformation.Transformer 83 | tagsErr error 84 | fieldsT transformation.Transformer 85 | fieldsErr error 86 | } 87 | 88 | func (p *psctMockService) TagsAsJSON(infConn influx.Client, id, db, rp, measure string, resultCol string) (transformation.Transformer, error) { 89 | return p.tagsT, p.tagsErr 90 | } 91 | 92 | func (p *psctMockService) FieldsAsJSON(infConn influx.Client, id, db, rp, measure string, resultCol string) (transformation.Transformer, error) { 93 | return p.fieldsT, p.fieldsErr 94 | } 95 | 96 | type psctMockTrans struct { 97 | id string 98 | } 99 | 100 | func (p *psctMockTrans) ID() string { 101 | return p.id 102 | } 103 | func (p *psctMockTrans) Prepare(input *idrf.Bundle) (*idrf.Bundle, error) { return nil, nil } 104 | func (p *psctMockTrans) Start(chan error) error { return nil } 105 | -------------------------------------------------------------------------------- /internal/cli/transformer_service.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | "github.com/timescale/outflux/internal/idrf" 9 | "github.com/timescale/outflux/internal/schemamanagement/influx/discovery" 10 | "github.com/timescale/outflux/internal/transformation" 11 | jsonCombiner "github.com/timescale/outflux/internal/transformation/jsoncombiner" 12 | ) 13 | 14 | // TransformerService creates different transformers 15 | type TransformerService interface { 16 | TagsAsJSON(infConn influx.Client, id, db, rp, measure string, resultCol string) (transformation.Transformer, error) 17 | FieldsAsJSON(infConn influx.Client, id, db, rp, measure string, resultCol string) (transformation.Transformer, error) 18 | } 19 | 20 | // NewTransformerService creates a new implementation of the TransformerService interface 21 | func NewTransformerService(influxTagExplorer discovery.TagExplorer, influxFieldExplorer discovery.FieldExplorer) TransformerService { 22 | return &transformerService{ 23 | influxTagExplorer: influxTagExplorer, 24 | influxFieldExplorer: influxFieldExplorer, 25 | } 26 | } 27 | 28 | type transformerService struct { 29 | influxTagExplorer discovery.TagExplorer 30 | influxFieldExplorer discovery.FieldExplorer 31 | } 32 | 33 | // TagsAsJSON returns a transformer that combines the tags into a single JSONb column. 34 | // Returns a Transformer instance or nil if there are no tags. 35 | // Returns an error if the tags couldn't be discovered or the instance of the transformer 36 | // could not be created. 37 | func (t *transformerService) TagsAsJSON(infConn influx.Client, id, db, rp, measure string, resultCol string) (transformation.Transformer, error) { 38 | log.Printf("Tags for measure '%s' will be combined into a single JSONB column", measure) 39 | tags, err := t.fetchTags(infConn, db, rp, measure) 40 | if err != nil { 41 | return nil, fmt.Errorf("could not create the transformer for measure '%s'\n%v", measure, err) 42 | } 43 | 44 | if len(tags) == 0 { 45 | log.Printf("%s: measure '%s' doesn't have any tags, will not be transformed", id, measure) 46 | return nil, nil 47 | } 48 | return jsonCombiner.NewTransformer(id, tags, resultCol) 49 | } 50 | 51 | // FieldsAsJSON returns a transformer that combines the fields into a single JSONb column. 52 | func (t *transformerService) FieldsAsJSON(infConn influx.Client, id, db, rp, measure string, resultCol string) (transformation.Transformer, error) { 53 | log.Printf("Fields for measure '%s' will be combined into a single JSONB column", measure) 54 | fields, err := t.fetchFields(infConn, db, rp, measure) 55 | if err != nil { 56 | return nil, fmt.Errorf("could not create the transformer for measure '%s'\n%v", measure, err) 57 | } 58 | 59 | return jsonCombiner.NewTransformer(id, fields, resultCol) 60 | } 61 | 62 | type fetchColumnsFn func() ([]*idrf.Column, error) 63 | 64 | func (t *transformerService) fetchTags(infConn influx.Client, db, rp, measure string) ([]string, error) { 65 | fetchFn := func() ([]*idrf.Column, error) { 66 | return t.influxTagExplorer.DiscoverMeasurementTags(infConn, db, rp, measure) 67 | } 68 | 69 | return fetch(fetchFn) 70 | } 71 | 72 | func (t *transformerService) fetchFields(infConn influx.Client, db, rp, measure string) ([]string, error) { 73 | // Because the columns are combined in a JSON it doesn't matter if they are 74 | // int or float in different shards 75 | onConflictConvertIntToFloat := true 76 | fetchFn := func() ([]*idrf.Column, error) { 77 | return t.influxFieldExplorer.DiscoverMeasurementFields(infConn, db, rp, measure, onConflictConvertIntToFloat) 78 | } 79 | 80 | return fetch(fetchFn) 81 | } 82 | 83 | func fetch(fetch fetchColumnsFn) ([]string, error) { 84 | columns, err := fetch() 85 | if err != nil { 86 | return nil, err 87 | } 88 | 89 | columnNames := make([]string, len(columns)) 90 | for i, column := range columns { 91 | columnNames[i] = column.Name 92 | } 93 | 94 | return columnNames, nil 95 | } 96 | -------------------------------------------------------------------------------- /internal/connections/influx_connection.go: -------------------------------------------------------------------------------- 1 | package connections 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | ) 9 | 10 | // Environment variable names to be used for the InfluxDB connection 11 | const ( 12 | UserEnvVar = "INFLUX_USERNAME" 13 | PassEnvVar = "INFLUX_PASSWORD" 14 | ) 15 | 16 | // InfluxConnectionParams represents the parameters required to open a InfluxDB connection 17 | type InfluxConnectionParams struct { 18 | Server string 19 | Username string 20 | Password string 21 | Database string 22 | UnsafeHTTPS bool 23 | } 24 | 25 | // InfluxConnectionService creates new clients connected to some Influx server 26 | type InfluxConnectionService interface { 27 | NewConnection(*InfluxConnectionParams) (influx.Client, error) 28 | } 29 | 30 | type defaultInfluxConnectionService struct{} 31 | 32 | // NewInfluxConnectionService creates a new instance of the service 33 | func NewInfluxConnectionService() InfluxConnectionService { 34 | return &defaultInfluxConnectionService{} 35 | } 36 | 37 | func (s *defaultInfluxConnectionService) NewConnection(params *InfluxConnectionParams) (influx.Client, error) { 38 | if params == nil { 39 | return nil, fmt.Errorf("Connection params shouldn't be nil") 40 | } 41 | 42 | var user, pass string 43 | 44 | if params.Username != "" { 45 | user = params.Username 46 | } else { 47 | user = os.Getenv(UserEnvVar) 48 | } 49 | 50 | if params.Password != "" { 51 | pass = params.Password 52 | } else { 53 | pass = os.Getenv(PassEnvVar) 54 | } 55 | clientConfig := influx.HTTPConfig{ 56 | Addr: params.Server, 57 | Username: user, 58 | Password: pass, 59 | InsecureSkipVerify: params.UnsafeHTTPS, 60 | } 61 | 62 | newClient, err := influx.NewHTTPClient(clientConfig) 63 | return newClient, err 64 | } 65 | -------------------------------------------------------------------------------- /internal/connections/influx_connection_test.go: -------------------------------------------------------------------------------- 1 | package connections 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestInfluxConnectionServiceNewConnection(t *testing.T) { 8 | service := &defaultInfluxConnectionService{} 9 | if _, err := service.NewConnection(nil); err == nil { 10 | t.Error("should not be able to create a client without connection params") 11 | } 12 | 13 | params := &InfluxConnectionParams{} 14 | if _, err := service.NewConnection(params); err == nil { 15 | t.Error("server address should not be accepted") 16 | } 17 | 18 | params.Server = "http://someaddress" 19 | if res, err := service.NewConnection(params); err != nil || res == nil { 20 | t.Error("client should have been created without errors") 21 | } 22 | 23 | //increase coverage 24 | params.Username = "hyuck" 25 | params.Password = "hyuck" 26 | if res, err := service.NewConnection(params); err != nil || res == nil { 27 | t.Error("client should have been created") 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /internal/connections/pgx_mock.go: -------------------------------------------------------------------------------- 1 | package connections 2 | 3 | import "github.com/jackc/pgx" 4 | 5 | // MockPgxW is a mock implementation of the PgxWrapper. 6 | type MockPgxW struct { 7 | ExecRes []pgx.CommandTag 8 | ExecErrs []error 9 | CurrentExec int 10 | QueryRes []*pgx.Rows 11 | QueryErrs []error 12 | CurrentQ int 13 | ExpQ []string 14 | ExpQArgs [][]interface{} 15 | ExpExec []string 16 | ExpExecArgs [][]interface{} 17 | BeginRes []*pgx.Tx 18 | BeginErr []error 19 | CurrentBegin int 20 | CopyFromErr []error 21 | CurrentCopyFrom int 22 | ExpCopyFromTab []pgx.Identifier 23 | ExpCopyFromCol [][]string 24 | } 25 | 26 | // Begin opens a transaction. 27 | func (t *MockPgxW) Begin() (*pgx.Tx, error) { 28 | tmp := t.CurrentBegin 29 | t.CurrentBegin++ 30 | return t.BeginRes[tmp], t.BeginErr[tmp] 31 | } 32 | 33 | // CopyFrom uses COPY to insert data. 34 | func (t *MockPgxW) CopyFrom(tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int, error) { 35 | tmp := t.CurrentCopyFrom 36 | if t.ExpCopyFromTab == nil { 37 | t.ExpCopyFromTab = make([]pgx.Identifier, len(t.CopyFromErr)) 38 | t.ExpCopyFromCol = make([][]string, len(t.CopyFromErr)) 39 | } 40 | t.ExpCopyFromTab[tmp] = tableName 41 | t.ExpCopyFromCol[tmp] = columnNames 42 | t.CurrentCopyFrom++ 43 | return 0, t.CopyFromErr[tmp] 44 | } 45 | 46 | // Exec executes an SQL statement, no results returned. 47 | func (t *MockPgxW) Exec(sql string, arguments ...interface{}) (commandTag pgx.CommandTag, err error) { 48 | if t.ExpExec == nil { 49 | t.ExpExec = make([]string, len(t.ExecRes)) 50 | t.ExpExecArgs = make([][]interface{}, len(t.ExecRes)) 51 | } 52 | tmp := t.CurrentExec 53 | t.ExpExec[tmp] = sql 54 | t.ExpExecArgs[tmp] = arguments 55 | t.CurrentExec++ 56 | return t.ExecRes[tmp], t.ExecErrs[tmp] 57 | } 58 | 59 | // Query data from the db. 60 | func (t *MockPgxW) Query(sql string, args ...interface{}) (*pgx.Rows, error) { 61 | if t.ExpQ == nil { 62 | t.ExpQ = make([]string, len(t.QueryRes)) 63 | t.ExpQArgs = make([][]interface{}, len(t.QueryRes)) 64 | } 65 | tmp := t.CurrentQ 66 | t.ExpQ[tmp] = sql 67 | t.ExpQArgs[tmp] = args 68 | t.CurrentQ++ 69 | return t.QueryRes[tmp], t.QueryErrs[tmp] 70 | } 71 | 72 | // Close the connection. 73 | func (t *MockPgxW) Close() error { 74 | return nil 75 | } 76 | -------------------------------------------------------------------------------- /internal/connections/pgx_wrapper.go: -------------------------------------------------------------------------------- 1 | package connections 2 | 3 | import "github.com/jackc/pgx" 4 | 5 | // PgxWrap represents a wrapper interface around pgx.Conn, for easier testing. 6 | type PgxWrap interface { 7 | Begin() (*pgx.Tx, error) 8 | CopyFrom(tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int, error) 9 | Exec(sql string, arguments ...interface{}) (commandTag pgx.CommandTag, err error) 10 | Query(sql string, args ...interface{}) (*pgx.Rows, error) 11 | Close() error 12 | } 13 | 14 | type defaultPgxWrapper struct { 15 | db *pgx.Conn 16 | } 17 | 18 | // NewPgxWrapper creates a new pgx.Conn wrapper. 19 | func NewPgxWrapper(db *pgx.Conn) PgxWrap { 20 | return &defaultPgxWrapper{db} 21 | } 22 | 23 | func (d *defaultPgxWrapper) CopyFrom(tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int, error) { 24 | return d.db.CopyFrom(tableName, columnNames, rowSrc) 25 | } 26 | func (d *defaultPgxWrapper) Exec(sql string, arguments ...interface{}) (commandTag pgx.CommandTag, err error) { 27 | return d.db.Exec(sql, arguments...) 28 | } 29 | func (d *defaultPgxWrapper) Query(sql string, args ...interface{}) (*pgx.Rows, error) { 30 | return d.db.Query(sql, args...) 31 | } 32 | func (d *defaultPgxWrapper) Close() error { 33 | return d.db.Close() 34 | } 35 | func (d *defaultPgxWrapper) Begin() (*pgx.Tx, error) { 36 | return d.db.Begin() 37 | } 38 | -------------------------------------------------------------------------------- /internal/connections/timescale_connection.go: -------------------------------------------------------------------------------- 1 | package connections 2 | 3 | import ( 4 | "log" 5 | "strings" 6 | 7 | "github.com/jackc/pgx" 8 | ) 9 | 10 | // TSConnectionService creates new timescale db connections 11 | type TSConnectionService interface { 12 | NewConnection(connectionString string) (PgxWrap, error) 13 | } 14 | 15 | type defaultTSConnectionService struct{} 16 | 17 | // NewTSConnectionService creates a new TSConnectionService instance 18 | func NewTSConnectionService() TSConnectionService { 19 | return &defaultTSConnectionService{} 20 | } 21 | 22 | func (s *defaultTSConnectionService) NewConnection(connectionString string) (PgxWrap, error) { 23 | log.Printf("Overriding PG environment variables for connection with: %s", connectionString) 24 | envConnConfig, err := pgx.ParseEnvLibpq() 25 | if err != nil { 26 | return nil, err 27 | } 28 | 29 | if strings.HasPrefix(connectionString, `'`) && strings.HasSuffix(connectionString, `'`) { 30 | connectionString = connectionString[1 : len(connectionString)-1] 31 | } else if strings.HasPrefix(connectionString, `"`) && strings.HasSuffix(connectionString, `"`) { 32 | connectionString = connectionString[1 : len(connectionString)-1] 33 | } 34 | 35 | connConfig, err := pgx.ParseConnectionString(connectionString) 36 | if err != nil { 37 | return nil, err 38 | } 39 | 40 | connConfig = envConnConfig.Merge(connConfig) 41 | pgxConn, err := pgx.Connect(connConfig) 42 | if err != nil { 43 | return nil, err 44 | } 45 | 46 | return NewPgxWrapper(pgxConn), nil 47 | } 48 | -------------------------------------------------------------------------------- /internal/connections/timescale_connection_i_test.go: -------------------------------------------------------------------------------- 1 | // +build integration 2 | 3 | package connections 4 | 5 | import ( 6 | "os" 7 | "testing" 8 | 9 | "github.com/timescale/outflux/internal/testutils" 10 | ) 11 | 12 | func TestNewConnection(t *testing.T) { 13 | db := "test_new_conn" 14 | if err := testutils.CreateTimescaleDb(db); err != nil { 15 | t.Fatalf("could not prepare db: %v", err) 16 | } 17 | 18 | defer testutils.DeleteTimescaleDb(db) 19 | 20 | goodEnv := map[string]string{ 21 | "PGPORT": "5433", 22 | "PGUSER": "postgres", 23 | "PGPASSWORD": "postgres", 24 | "PGDATABASE": db, 25 | } 26 | 27 | badEnv := map[string]string{ 28 | "PGPORT": "5433", 29 | "PGUSER": "postgres", 30 | "PGPASSWORD": "postgres", 31 | "PGDATABASE": "wrong_db", 32 | } 33 | connService := &defaultTSConnectionService{} 34 | testCases := []struct { 35 | desc string 36 | conn string 37 | env map[string]string 38 | expectErr bool 39 | }{ 40 | {desc: "nothing is set, env is empty", expectErr: true}, 41 | {desc: "environment is set, no overrides", env: goodEnv}, 42 | {desc: "environment is set, overrides make is bad", env: goodEnv, conn: "dbname=wrong_db", expectErr: true}, 43 | {desc: "environment is set badly, overrides make it good", env: badEnv, conn: "dbname=" + db}, 44 | } 45 | 46 | for _, tc := range testCases { 47 | // make sure the environment is only that in tc.env 48 | os.Clearenv() 49 | for k, v := range tc.env { 50 | os.Setenv(k, v) 51 | } 52 | res, err := connService.NewConnection(tc.conn) 53 | if err != nil && !tc.expectErr { 54 | t.Fatalf("%s\nunexpected error: %v", tc.desc, err) 55 | } else if err == nil && tc.expectErr { 56 | res.Close() 57 | t.Fatalf("%s\nexpected error, none received", tc.desc) 58 | } 59 | 60 | if tc.expectErr { 61 | continue 62 | } 63 | 64 | rows, err := res.Query("SELECT 1") 65 | if err != nil { 66 | t.Fatalf("could execute query with established connection") 67 | } 68 | 69 | if !rows.Next() { 70 | t.Fatalf("no result returned for SELECT 1") 71 | } else { 72 | var dest int 73 | rows.Scan(&dest) 74 | if dest != 1 { 75 | t.Fatalf("expected 1, got %d", dest) 76 | } 77 | } 78 | 79 | rows.Close() 80 | res.Close() 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /internal/extraction/config/extraction_config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | const ( 9 | acceptedTimeFormat = time.RFC3339 10 | ) 11 | 12 | // MeasureExtraction holds config properties for a single measure 13 | type MeasureExtraction struct { 14 | Database string 15 | Measure string 16 | From string 17 | To string 18 | ChunkSize uint16 19 | Limit uint64 20 | SchemaOnly bool 21 | RetentionPolicy string 22 | OnConflictConvertIntToFloat bool 23 | } 24 | 25 | // ValidateMeasureExtractionConfig validates the fields 26 | // 'chunkSize' must be positive, specifies the number of rows the database server sends to the client at once 27 | // 'limit' if > 0 limits the number of points extracted from the measure, if == 0 all data is requested 28 | // 'from' and 'to' are timestamps and optional. If specified request data only between these timescamps 29 | func ValidateMeasureExtractionConfig(config *MeasureExtraction) error { 30 | if config.Database == "" || config.Measure == "" { 31 | return fmt.Errorf("database and measure can't be empty") 32 | } 33 | 34 | if config.ChunkSize == 0 { 35 | return fmt.Errorf("chunk size must be > 0") 36 | } 37 | 38 | _, formatError := time.Parse(acceptedTimeFormat, config.From) 39 | if config.From != "" && formatError != nil { 40 | return fmt.Errorf("'from' time must be formatted as %s", acceptedTimeFormat) 41 | } 42 | 43 | _, formatError = time.Parse(acceptedTimeFormat, config.To) 44 | if config.To != "" && formatError != nil { 45 | return fmt.Errorf("'to' time must be formatted as %s", acceptedTimeFormat) 46 | } 47 | 48 | return nil 49 | } 50 | 51 | // ExtractionConfig combines everything needed to create and start an Extractor 52 | type ExtractionConfig struct { 53 | ExtractorID string 54 | MeasureExtraction *MeasureExtraction 55 | DataBufferSize uint16 56 | } 57 | -------------------------------------------------------------------------------- /internal/extraction/config/extraction_config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestNewMeasureExtractionConfig(t *testing.T) { 8 | badCases := []MeasureExtraction{ 9 | {Database: "", Measure: "measure", ChunkSize: 1}, 10 | {Database: "Db", Measure: "", ChunkSize: 1}, 11 | {Database: "Db", Measure: "measure", ChunkSize: 0}, 12 | {Database: "Db", Measure: "measure", From: "2019-01-01T00:00:00", ChunkSize: 1}, 13 | {Database: "Db", Measure: "measure", From: "2019-01-01", ChunkSize: 1}, 14 | {Database: "Db", Measure: "measure", To: "2019-01-01T00:00:00", ChunkSize: 1}, 15 | {Database: "Db", Measure: "measure", To: "2019-01-01", ChunkSize: 1}, 16 | } 17 | 18 | for _, badCase := range badCases { 19 | err := ValidateMeasureExtractionConfig(&badCase) 20 | 21 | if err == nil { 22 | t.Error("expected an error, no error received") 23 | } 24 | } 25 | 26 | goodCases := []MeasureExtraction{ 27 | {Database: "Database", Measure: "Measure", ChunkSize: 1}, 28 | {Database: "Database", Measure: "Measure", ChunkSize: 1, Limit: 1}, 29 | {Database: "Database", Measure: "Measure", ChunkSize: 1, From: "2019-01-01T00:00:00Z"}, 30 | {Database: "Database", Measure: "Measure", ChunkSize: 1, From: "2019-01-01T00:00:00+00:00"}, 31 | {Database: "Database", Measure: "Measure", ChunkSize: 1, From: "2019-01-01T00:00:00-01:00"}, 32 | {Database: "Database", Measure: "Measure", ChunkSize: 1, To: "2019-01-01T00:00:00-01:00"}, 33 | {Database: "Database", Measure: "Measure", ChunkSize: 1, From: "2019-01-01T00:00:00-01:00", To: "2019-01-01T00:00:00+01:00"}, 34 | } 35 | 36 | for _, goodCase := range goodCases { 37 | err := ValidateMeasureExtractionConfig(&goodCase) 38 | 39 | if err != nil { 40 | t.Errorf("expected not error, got: %v", err) 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /internal/extraction/extraction_service.go: -------------------------------------------------------------------------------- 1 | package extraction 2 | 3 | import ( 4 | "fmt" 5 | 6 | influx "github.com/influxdata/influxdb/client/v2" 7 | "github.com/timescale/outflux/internal/extraction/config" 8 | influxExtraction "github.com/timescale/outflux/internal/extraction/influx" 9 | "github.com/timescale/outflux/internal/schemamanagement" 10 | ) 11 | 12 | // ExtractorService defines methods for creating extractor instances 13 | type ExtractorService interface { 14 | InfluxExtractor(influx.Client, *config.ExtractionConfig) (Extractor, error) 15 | } 16 | 17 | // NewExtractorService creates a new instance of the service that can create extractors 18 | func NewExtractorService(schemaManagerService schemamanagement.SchemaManagerService) ExtractorService { 19 | return &extractorService{schemaManagerService} 20 | } 21 | 22 | type extractorService struct { 23 | schemaManagerService schemamanagement.SchemaManagerService 24 | } 25 | 26 | func (e *extractorService) InfluxExtractor(conn influx.Client, conf *config.ExtractionConfig) (Extractor, error) { 27 | exConf := conf.MeasureExtraction 28 | err := config.ValidateMeasureExtractionConfig(exConf) 29 | if err != nil { 30 | return nil, fmt.Errorf("measure extraction config is not valid: %s", err.Error()) 31 | } 32 | 33 | sm := e.schemaManagerService.Influx(conn, exConf.Database, exConf.RetentionPolicy, exConf.OnConflictConvertIntToFloat) 34 | dataProducer := influxExtraction.NewDataProducer(conf.ExtractorID, conn) 35 | return &influxExtraction.Extractor{ 36 | Config: conf, 37 | SM: sm, 38 | DataProducer: dataProducer, 39 | }, nil 40 | } 41 | -------------------------------------------------------------------------------- /internal/extraction/extractor.go: -------------------------------------------------------------------------------- 1 | package extraction 2 | 3 | import ( 4 | "github.com/timescale/outflux/internal/idrf" 5 | ) 6 | 7 | // Extractor defines an interface for pulling data out of a database. 8 | // When Prepare is called a data channel with a description of the 9 | // data is returned. On Start the data channel is populated. 10 | type Extractor interface { 11 | ID() string 12 | Prepare() (*idrf.Bundle, error) 13 | Start(chan error) error 14 | } 15 | -------------------------------------------------------------------------------- /internal/extraction/influx/idrfconversion/idrf_converter.go: -------------------------------------------------------------------------------- 1 | package idrfconversion 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/timescale/outflux/internal/idrf" 9 | ) 10 | 11 | // IdrfConverter defines methods to convert the results of an InfluxDB Query result row to IDRF 12 | type IdrfConverter interface { 13 | Convert(row []interface{}) (idrf.Row, error) 14 | } 15 | 16 | // NewIdrfConverter creates an instance of the IdrfConverter that converts the results 17 | // of an InfluxDB Query result row to IDRF 18 | func NewIdrfConverter(dataSet *idrf.DataSet) IdrfConverter { 19 | return &defaultIdrfConverter{dataSet} 20 | } 21 | 22 | type defaultIdrfConverter struct { 23 | dataSet *idrf.DataSet 24 | } 25 | 26 | func (conv *defaultIdrfConverter) Convert(row []interface{}) (idrf.Row, error) { 27 | if len(row) != len(conv.dataSet.Columns) { 28 | return nil, fmt.Errorf( 29 | "could not convert extracted row, number of extracted values is %d, expected %d values", 30 | len(row), len(conv.dataSet.Columns)) 31 | } 32 | 33 | converted := make([]interface{}, len(row)) 34 | for i, item := range row { 35 | converted[i] = convertByType(item, conv.dataSet.Columns[i].DataType) 36 | } 37 | 38 | return converted, nil 39 | } 40 | 41 | func convertByType(rawValue interface{}, expected idrf.DataType) interface{} { 42 | if rawValue == nil { 43 | return nil 44 | } 45 | 46 | switch { 47 | case expected == idrf.IDRFInteger32: 48 | valAsInt64, _ := rawValue.(json.Number).Int64() 49 | return int32(valAsInt64) 50 | case expected == idrf.IDRFInteger64: 51 | valAsInt64, _ := rawValue.(json.Number).Int64() 52 | return valAsInt64 53 | case expected == idrf.IDRFDouble: 54 | valAsFloat64, _ := rawValue.(json.Number).Float64() 55 | return valAsFloat64 56 | case expected == idrf.IDRFSingle: 57 | valAsFloat64, _ := rawValue.(json.Number).Float64() 58 | return float32(valAsFloat64) 59 | case expected == idrf.IDRFTimestamptz || expected == idrf.IDRFTimestamp: 60 | ts, _ := time.Parse(time.RFC3339, rawValue.(string)) 61 | return ts 62 | default: 63 | return rawValue 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /internal/extraction/influx/idrfconversion/idrf_converter_test.go: -------------------------------------------------------------------------------- 1 | package idrfconversion 2 | 3 | import ( 4 | "encoding/json" 5 | "reflect" 6 | "testing" 7 | 8 | "github.com/timescale/outflux/internal/idrf" 9 | ) 10 | 11 | func TestConvertByType(t *testing.T) { 12 | tcs := []struct { 13 | inVal interface{} 14 | inType idrf.DataType 15 | expected interface{} 16 | isConverted bool 17 | }{ 18 | {json.Number("1"), idrf.IDRFInteger32, int32(1), true}, 19 | {json.Number("1"), idrf.IDRFInteger64, int64(1), true}, 20 | {json.Number("1.0"), idrf.IDRFSingle, float32(1), true}, 21 | {json.Number("1"), idrf.IDRFDouble, float64(1), true}, 22 | {"1", idrf.IDRFString, "1", false}, 23 | {nil, idrf.IDRFBoolean, nil, false}, 24 | {"{\"a\":1}", idrf.IDRFJson, "{\"a\":1}", false}, 25 | } 26 | 27 | for _, tc := range tcs { 28 | res := convertByType(tc.inVal, tc.inType) 29 | if tc.inVal == nil { 30 | if res != nil { 31 | t.Errorf("nil expected, got: %v", res) 32 | } else { 33 | continue 34 | } 35 | } 36 | 37 | expectedType := reflect.TypeOf(tc.expected) 38 | gotType := reflect.TypeOf(res) 39 | if expectedType != gotType { 40 | t.Errorf("expected type: %v\ngot: %v", expectedType, gotType) 41 | } 42 | } 43 | } 44 | 45 | func TestConvertValues(t *testing.T) { 46 | testIn := make([]interface{}, 1) 47 | testIn[0] = "1" 48 | cols := []*idrf.Column{(&idrf.Column{DataType: idrf.IDRFString})} 49 | tcs := []struct { 50 | in idrf.Row 51 | ds *idrf.DataSet 52 | expectErr bool 53 | }{ 54 | {in: make([]interface{}, 1), ds: &idrf.DataSet{}, expectErr: true}, 55 | {in: make([]interface{}, 0), ds: &idrf.DataSet{}, expectErr: false}, 56 | {in: []interface{}{"1"}, ds: &idrf.DataSet{Columns: cols}, expectErr: false}, 57 | } 58 | 59 | for _, tc := range tcs { 60 | conv := &defaultIdrfConverter{dataSet: tc.ds} 61 | res, err := conv.Convert(tc.in) 62 | if tc.expectErr && err == nil { 63 | t.Error("expected an error, none received") 64 | } 65 | 66 | if !tc.expectErr && err != nil { 67 | t.Errorf("didn't expected error, got: %v\n", err) 68 | } 69 | 70 | if tc.expectErr { 71 | continue 72 | } 73 | 74 | if len(res) != len(tc.in) { 75 | t.Errorf("result is different length than input.\nexpected: %d, got: %d", len(tc.in), len(res)) 76 | } 77 | 78 | if len(res) == 0 { 79 | continue 80 | } 81 | 82 | converted := res[0].(string) 83 | if converted != tc.in[0] { 84 | t.Errorf("expected: %v\ngot: %v", tc.in[0], converted) 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /internal/extraction/influx/influx_data_producer.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | 8 | "github.com/timescale/outflux/internal/extraction/influx/idrfconversion" 9 | 10 | influx "github.com/influxdata/influxdb/client/v2" 11 | "github.com/timescale/outflux/internal/idrf" 12 | ) 13 | 14 | // DataProducer populates a data channel with the results from an influx query 15 | type DataProducer interface { 16 | Fetch(*producerArgs) error 17 | } 18 | 19 | // NewDataProducer craetes a new DataProducer 20 | func NewDataProducer(id string, influxClient influx.Client) DataProducer { 21 | return &defaultDataProducer{ 22 | id, influxClient, 23 | } 24 | } 25 | 26 | type defaultDataProducer struct { 27 | extractorID string 28 | influxClient influx.Client 29 | } 30 | 31 | type producerArgs struct { 32 | dataChannel chan idrf.Row 33 | errChannel chan error 34 | query *influx.Query 35 | converter idrfconversion.IdrfConverter 36 | } 37 | 38 | // Executes the select query and receives the chunked response, piping it to a data channel. 39 | // If an error occurs a single error is sent to the error channel. Both channels are closed at the end of the routine. 40 | func (dp *defaultDataProducer) Fetch(args *producerArgs) error { 41 | defer close(args.dataChannel) 42 | 43 | chunkResponse, err := dp.influxClient.QueryAsChunk(*args.query) 44 | if err != nil { 45 | err = fmt.Errorf("extractor '%s' could not execute a chunked query.\n%v", dp.extractorID, err) 46 | log.Printf("'%s': %v", dp.extractorID, err) 47 | return err 48 | } 49 | 50 | defer chunkResponse.Close() 51 | 52 | totalRows := 0 53 | for { 54 | // Before requesting the next chunk, check if an error occurred in some other goroutine 55 | if err = checkError(args.errChannel); err != nil { 56 | return nil 57 | } 58 | 59 | response, err := chunkResponse.NextResponse() 60 | if err != nil { 61 | if err == io.EOF { 62 | return nil 63 | } 64 | 65 | // If we got an error while decoding the response, send that back. 66 | err = fmt.Errorf("extractor '%s': error decoding response.\n%v", dp.extractorID, err) 67 | return err 68 | } 69 | 70 | if response == nil || response.Err != "" || len(response.Results) != 1 { 71 | return fmt.Errorf("extractor '%s': server did not return a proper response", dp.extractorID) 72 | } 73 | 74 | series := response.Results[0].Series 75 | if len(series) > 1 { 76 | return fmt.Errorf("extractor '%s': returned response had an unexpected format", dp.extractorID) 77 | } else if len(series) == 0 { 78 | return nil 79 | } 80 | 81 | rows := series[0] 82 | totalRows += len(rows.Values) 83 | log.Printf("%s: Extracted %d rows from Influx", dp.extractorID, totalRows) 84 | for _, valRow := range rows.Values { 85 | convertedRow, err := args.converter.Convert(valRow) 86 | if err != nil { 87 | return fmt.Errorf("extractor '%s': could not convert influx result to IDRF row\n%v", dp.extractorID, err) 88 | } 89 | 90 | args.dataChannel <- convertedRow 91 | } 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /internal/extraction/influx/influx_extractor.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | "github.com/timescale/outflux/internal/extraction/config" 9 | "github.com/timescale/outflux/internal/extraction/influx/idrfconversion" 10 | "github.com/timescale/outflux/internal/idrf" 11 | "github.com/timescale/outflux/internal/schemamanagement" 12 | ) 13 | 14 | // Extractor is an implementation of the extraction.Extractor interface for 15 | // pulling data out of InfluxDB 16 | type Extractor struct { 17 | Config *config.ExtractionConfig 18 | SM schemamanagement.SchemaManager 19 | cachedElementData *idrf.Bundle 20 | DataProducer DataProducer 21 | } 22 | 23 | // ID of the extractor, useful for logging and error reporting 24 | func (e *Extractor) ID() string { 25 | return e.Config.ExtractorID 26 | } 27 | 28 | // Prepare discovers the data set schema for the measure in the config 29 | func (e *Extractor) Prepare() (*idrf.Bundle, error) { 30 | measureName := e.Config.MeasureExtraction.Measure 31 | log.Printf("Discovering influx schema for measurement: %s", measureName) 32 | 33 | discoveredDataSet, err := e.SM.FetchDataSet(measureName) 34 | if err != nil { 35 | return nil, fmt.Errorf("%s: could not fetch data set definition for measure: %s\n%v", e.ID(), measureName, err) 36 | } 37 | 38 | log.Printf("Discovered: %s", discoveredDataSet.String()) 39 | e.cachedElementData = &idrf.Bundle{ 40 | DataDef: discoveredDataSet, 41 | DataChan: make(chan idrf.Row, e.Config.DataBufferSize), 42 | } 43 | 44 | return e.cachedElementData, nil 45 | } 46 | 47 | // Start pulls the data from an InfluxDB measure and feeds it to a data channel 48 | // Peridicly (between chunks) checks for external errors and quits if it detects them 49 | func (e *Extractor) Start(errChan chan error) error { 50 | if e.cachedElementData == nil { 51 | return fmt.Errorf("%s: Prepare not called before start", e.ID()) 52 | } 53 | 54 | id := e.Config.ExtractorID 55 | dataDef := e.cachedElementData.DataDef 56 | measureConf := e.Config.MeasureExtraction 57 | 58 | log.Printf("Starting extractor '%s' for measure: %s\n", id, dataDef.DataSetName) 59 | intChunkSize := int(measureConf.ChunkSize) 60 | 61 | query := &influx.Query{ 62 | Command: buildSelectCommand(measureConf, dataDef.Columns), 63 | Database: measureConf.Database, 64 | RetentionPolicy: measureConf.RetentionPolicy, 65 | Chunked: true, 66 | ChunkSize: intChunkSize, 67 | } 68 | 69 | log.Printf("%s: Extracting data from database '%s'\n", id, query.Database) 70 | log.Printf("%s: %s\n", id, query.Command) 71 | log.Printf("%s:Pulling chunks with size %d\n", id, intChunkSize) 72 | 73 | idrfConverter := idrfconversion.NewIdrfConverter(dataDef) 74 | producerArgs := &producerArgs{ 75 | dataChannel: e.cachedElementData.DataChan, 76 | errChannel: errChan, 77 | query: query, 78 | converter: idrfConverter, 79 | } 80 | 81 | return e.DataProducer.Fetch(producerArgs) 82 | } 83 | -------------------------------------------------------------------------------- /internal/extraction/influx/query_building.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/timescale/outflux/internal/extraction/config" 8 | "github.com/timescale/outflux/internal/idrf" 9 | ) 10 | 11 | const ( 12 | selectQueryDoubleBoundTemplate = "SELECT %s FROM %s WHERE time >= '%s' AND time <= '%s'" 13 | selectQueryLowerBoundTemplate = "SELECT %s FROM %s WHERE time >= '%s'" 14 | selectQueryUpperBoundTemplate = "SELECT %s FROM %s WHERE time <= '%s'" 15 | selectQueryNoBoundTemplate = "SELECT %s FROM %s" 16 | limitSuffixTemplate = "LIMIT %d" 17 | measurementNameTemplate = `"%s"` 18 | measurementNameWithRPTemplate = `"%s"."%s"` 19 | ) 20 | 21 | func buildSelectCommand(config *config.MeasureExtraction, columns []*idrf.Column) string { 22 | projection := buildProjection(columns) 23 | measurementName := buildMeasurementName(config.RetentionPolicy, config.Measure) 24 | var command string 25 | if config.From != "" && config.To != "" { 26 | command = fmt.Sprintf(selectQueryDoubleBoundTemplate, projection, measurementName, config.From, config.To) 27 | } else if config.From != "" { 28 | command = fmt.Sprintf(selectQueryLowerBoundTemplate, projection, measurementName, config.From) 29 | } else if config.To != "" { 30 | command = fmt.Sprintf(selectQueryUpperBoundTemplate, projection, measurementName, config.To) 31 | } else { 32 | command = fmt.Sprintf(selectQueryNoBoundTemplate, projection, measurementName) 33 | } 34 | 35 | if config.Limit == 0 { 36 | return command 37 | } 38 | 39 | limit := fmt.Sprintf(limitSuffixTemplate, config.Limit) 40 | return fmt.Sprintf("%s %s", command, limit) 41 | } 42 | 43 | func buildMeasurementName(rp, measurement string) string { 44 | if rp != "" { 45 | return fmt.Sprintf(measurementNameWithRPTemplate, rp, measurement) 46 | } 47 | return fmt.Sprintf(measurementNameTemplate, measurement) 48 | } 49 | 50 | func buildProjection(columns []*idrf.Column) string { 51 | columnNames := make([]string, len(columns)) 52 | for i, column := range columns { 53 | columnNames[i] = fmt.Sprintf(`"%s"`, column.Name) 54 | } 55 | 56 | return strings.Join(columnNames, ", ") 57 | } 58 | 59 | func checkError(errorChannel chan error) error { 60 | select { 61 | case err := <-errorChannel: 62 | return err 63 | default: 64 | return nil 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /internal/extraction/influx/query_building_test.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/timescale/outflux/internal/extraction/config" 7 | "github.com/timescale/outflux/internal/idrf" 8 | ) 9 | 10 | func TestBuildMeasurementName(t *testing.T) { 11 | testCases := []struct { 12 | in string 13 | inRp string 14 | exp string 15 | }{ 16 | {in: "measure", inRp: "", exp: `"measure"`}, 17 | {in: "m.easure", inRp: "rp", exp: `"rp"."m.easure"`}, 18 | {in: "m.measure name", inRp: "r p", exp: `"r p"."m.measure name"`}, 19 | {in: "measure name.measure.name", exp: `"measure name.measure.name"`}, 20 | } 21 | 22 | for _, tc := range testCases { 23 | out := buildMeasurementName(tc.inRp, tc.in) 24 | if out != tc.exp { 25 | t.Errorf("expected: %s, got: %s", tc.exp, out) 26 | } 27 | } 28 | } 29 | 30 | func TestBuildProjection(t *testing.T) { 31 | testCases := []struct { 32 | in []*idrf.Column 33 | exp string 34 | }{ 35 | {in: []*idrf.Column{{Name: "col1"}}, exp: `"col1"`}, 36 | {in: []*idrf.Column{{Name: "col 1"}}, exp: `"col 1"`}, 37 | {in: []*idrf.Column{{Name: "col 1"}, {Name: "col 2"}}, exp: `"col 1", "col 2"`}, 38 | } 39 | 40 | for _, tc := range testCases { 41 | out := buildProjection(tc.in) 42 | if out != tc.exp { 43 | t.Errorf("expected: %s, got: %s", tc.exp, out) 44 | } 45 | } 46 | } 47 | 48 | func TestBuildSelectCommand(t *testing.T) { 49 | testCases := []struct { 50 | measure string 51 | rp string 52 | columns []*idrf.Column 53 | from string 54 | to string 55 | limit uint64 56 | exp string 57 | }{ 58 | { 59 | measure: "m", 60 | columns: []*idrf.Column{{Name: "col1"}}, 61 | exp: `SELECT "col1" FROM "m"`, 62 | }, { 63 | measure: "m.m", 64 | columns: []*idrf.Column{{Name: "col1"}, {Name: "col 2"}}, 65 | from: "a", 66 | exp: `SELECT "col1", "col 2" FROM "m.m" WHERE time >= 'a'`, 67 | }, { 68 | measure: "m", 69 | columns: []*idrf.Column{{Name: "col1"}}, 70 | to: "b", 71 | exp: `SELECT "col1" FROM "m" WHERE time <= 'b'`, 72 | }, { 73 | measure: "m", 74 | columns: []*idrf.Column{{Name: "col1"}}, 75 | from: "a", 76 | to: "b", 77 | exp: `SELECT "col1" FROM "m" WHERE time >= 'a' AND time <= 'b'`, 78 | }, { 79 | measure: "m", 80 | columns: []*idrf.Column{{Name: "col1"}}, 81 | limit: 11, 82 | exp: `SELECT "col1" FROM "m" LIMIT 11`, 83 | }, { 84 | measure: "m", 85 | rp: "rep pol", 86 | columns: []*idrf.Column{{Name: "col1"}}, 87 | from: "a", 88 | to: "b", 89 | exp: `SELECT "col1" FROM "rep pol"."m" WHERE time >= 'a' AND time <= 'b'`, 90 | }, 91 | } 92 | 93 | for _, tc := range testCases { 94 | config := &config.MeasureExtraction{ 95 | Measure: tc.measure, 96 | RetentionPolicy: tc.rp, 97 | From: tc.from, 98 | To: tc.to, 99 | Limit: tc.limit, 100 | } 101 | 102 | out := buildSelectCommand(config, tc.columns) 103 | if out != tc.exp { 104 | t.Errorf("expected: %s, got: %s", tc.exp, out) 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /internal/idrf/bundle.go: -------------------------------------------------------------------------------- 1 | package idrf 2 | 3 | // Bundle defines a bundle of a data definition (schema) and a channel that caries data in IDRF format 4 | type Bundle struct { 5 | DataDef *DataSet 6 | DataChan chan Row 7 | } 8 | -------------------------------------------------------------------------------- /internal/idrf/column_info.go: -------------------------------------------------------------------------------- 1 | package idrf 2 | 3 | import "fmt" 4 | 5 | // Column represents DDL description of a single column in IDRF 6 | type Column struct { 7 | Name string 8 | DataType DataType 9 | } 10 | 11 | func (c Column) String() string { 12 | return fmt.Sprintf("Column { Name: %s, DataType: %s}", c.Name, c.DataType.String()) 13 | } 14 | 15 | // NewColumn creates a new ColumnInfo without a foreign key while checking the arguments 16 | func NewColumn(columnName string, dataType DataType) (*Column, error) { 17 | if len(columnName) == 0 { 18 | return nil, fmt.Errorf("Column must have a name") 19 | } 20 | 21 | return &Column{columnName, dataType}, nil 22 | } 23 | -------------------------------------------------------------------------------- /internal/idrf/column_info_test.go: -------------------------------------------------------------------------------- 1 | package idrf 2 | 3 | import "testing" 4 | 5 | func TestNewColumn(t *testing.T) { 6 | if _, err := NewColumn("", IDRFBoolean); err == nil { 7 | t.Error("expected error, none received") 8 | } 9 | 10 | res, err := NewColumn("some name", IDRFBoolean) 11 | if res == nil || err != nil { 12 | t.Errorf("unexpected error: %v", err) 13 | } 14 | 15 | str := res.String() 16 | if res.Name != "some name" || res.DataType != IDRFBoolean { 17 | t.Errorf("unexpected values: %s", str) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /internal/idrf/data_set.go: -------------------------------------------------------------------------------- 1 | // Package idrf provides the structures for the Outflux Intermediate Data Reperesentation 2 | // Format. These structures describe data and it's schema. The package also contains 3 | // functions for safe initialization of the structures 4 | package idrf 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | // DataSet represents DDL description of a single data set (table, measurement) in IDRF 11 | type DataSet struct { 12 | DataSetName string 13 | Columns []*Column 14 | TimeColumn string 15 | } 16 | 17 | func (set *DataSet) String() string { 18 | return fmt.Sprintf("DataSet { Name: %s, Columns: %s, Time Column: %s }", set.DataSetName, set.Columns, set.TimeColumn) 19 | } 20 | 21 | // ColumnNamed returns the ColumnInfo for a column given it's name, or nil if no column 22 | // with that name exists in the data set 23 | func (set *DataSet) ColumnNamed(columnName string) *Column { 24 | for _, column := range set.Columns { 25 | if columnName == column.Name { 26 | return column 27 | } 28 | } 29 | 30 | return nil 31 | } 32 | 33 | // NewDataSet creates a new instance of DataSet with checked arguments 34 | func NewDataSet(dataSetName string, columns []*Column, timeColumn string) (*DataSet, error) { 35 | if len(dataSetName) == 0 { 36 | return nil, fmt.Errorf("data set name can't be empty") 37 | } 38 | 39 | if len(columns) == 0 { 40 | return nil, fmt.Errorf("data set must have at least one column") 41 | } 42 | 43 | if timeColumn == "" { 44 | return nil, fmt.Errorf("data set must have a time column specified") 45 | } 46 | 47 | columnSet := make(map[string]bool) 48 | timeColumnDefined := false 49 | for _, columnInfo := range columns { 50 | if _, exists := columnSet[columnInfo.Name]; exists { 51 | return nil, fmt.Errorf("duplicate column names found: %s", columnInfo.Name) 52 | } 53 | 54 | columnSet[columnInfo.Name] = true 55 | if columnInfo.Name == timeColumn { 56 | if columnInfo.DataType != IDRFTimestamp && columnInfo.DataType != IDRFTimestamptz { 57 | return nil, fmt.Errorf("time column '%s', is not of a Timestamp(tz) type", timeColumn) 58 | } 59 | 60 | timeColumnDefined = true 61 | } 62 | } 63 | 64 | if !timeColumnDefined { 65 | return nil, fmt.Errorf("time column %s, not found in columns array", timeColumn) 66 | } 67 | 68 | return &DataSet{dataSetName, columns, timeColumn}, nil 69 | } 70 | -------------------------------------------------------------------------------- /internal/idrf/data_set_test.go: -------------------------------------------------------------------------------- 1 | package idrf 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestNewDataSet(t *testing.T) { 9 | column, _ := NewColumn("Col 1", IDRFTimestamp) 10 | intColumn, _ := NewColumn("Col 1", IDRFInteger32) 11 | columns := []*Column{column} 12 | noTimestampTimeColumns := []*Column{intColumn} 13 | if _, error := NewDataSet("", columns, "Col 1"); error == nil { 14 | t.Error("Should not be able to create a data set with an empty name") 15 | } 16 | 17 | noColumns := []*Column{} 18 | if _, error := NewDataSet("Data Set", noColumns, ""); error == nil { 19 | t.Error("Should not be able to create a data set without columns") 20 | } 21 | 22 | duplicateColumns := []*Column{column, column} 23 | if _, error := NewDataSet("data set", duplicateColumns, "Col 1"); error == nil { 24 | t.Error("Should not be able to create a data set with duplicate columns") 25 | } 26 | 27 | if _, error := NewDataSet("Data Set", columns, "Col 2"); error == nil { 28 | t.Error("Data Set should not have been created, time column not in column set") 29 | } 30 | 31 | if _, error := NewDataSet("Data Set", noTimestampTimeColumns, "Col 1"); error == nil { 32 | t.Error("Data Set should not have been created, time column not a timestamp") 33 | } 34 | 35 | if _, error := NewDataSet("Data Set", columns, ""); error == nil { 36 | t.Error("data set should not have been created with time column empty") 37 | } 38 | 39 | dataSet, err := NewDataSet("Data Set", columns, "Col 1") 40 | if err != nil || dataSet == nil { 41 | t.Errorf("Data Set should have been created. Unexepcted err: %v", err) 42 | } 43 | 44 | if dataSet.DataSetName != "Data Set" { 45 | t.Errorf("Data set named %s, instead of %s", dataSet.DataSetName, "Data Set") 46 | } 47 | 48 | if len(dataSet.Columns) != 1 && dataSet.TimeColumn != "Col 1" { 49 | t.Errorf("data set columns not properly initialized") 50 | } 51 | 52 | } 53 | func TestColumnNamed(t *testing.T) { 54 | goodColumnName := "Col 1" 55 | badColumnName := "Col 2" 56 | 57 | expectedColumnType := IDRFTimestamp 58 | 59 | column, _ := NewColumn(goodColumnName, expectedColumnType) 60 | columns := []*Column{column} 61 | dataSet, _ := NewDataSet("Data Set", columns, "Col 1") 62 | 63 | goodColumn := dataSet.ColumnNamed(goodColumnName) 64 | if goodColumn == nil { 65 | t.Error("Column should have been found") 66 | } 67 | 68 | if dataSet.ColumnNamed(badColumnName) != nil { 69 | t.Error("Column name should not have been found") 70 | } 71 | 72 | if goodColumn.Name != goodColumnName || goodColumn.DataType != expectedColumnType { 73 | t.Error( 74 | fmt.Sprintf( 75 | "Found column was not good. Expected: name <%s> and type <%s>. Got: name <%s> and type <%s>", 76 | goodColumnName, expectedColumnType, goodColumn.Name, goodColumn.DataType, 77 | )) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /internal/idrf/data_type.go: -------------------------------------------------------------------------------- 1 | package idrf 2 | 3 | // DataType Supported data types in the Intermediate Data Representation Format 4 | type DataType int 5 | 6 | // Available values for IDRF DataType enum 7 | const ( 8 | IDRFInteger32 DataType = iota + 1 9 | IDRFInteger64 10 | IDRFDouble 11 | IDRFSingle 12 | IDRFString 13 | IDRFBoolean 14 | IDRFTimestamptz 15 | IDRFTimestamp 16 | IDRFJson 17 | IDRFUnknown 18 | ) 19 | 20 | func (d DataType) String() string { 21 | switch d { 22 | case IDRFBoolean: 23 | return "Boolean" 24 | case IDRFDouble: 25 | return "Double" 26 | case IDRFInteger32: 27 | return "Integer32" 28 | case IDRFString: 29 | return "String" 30 | case IDRFTimestamp: 31 | return "IDRFTimestamp" 32 | case IDRFTimestamptz: 33 | return "Timestamptz" 34 | case IDRFInteger64: 35 | return "Integer64" 36 | case IDRFSingle: 37 | return "Single" 38 | case IDRFJson: 39 | return "Json" 40 | case IDRFUnknown: 41 | return "Unknown" 42 | default: 43 | panic("Unexpected value") 44 | } 45 | } 46 | 47 | // CanFitInto returns true if this data type can be safely cast to the other data type 48 | func (d DataType) CanFitInto(other DataType) bool { 49 | if d == other { 50 | return true 51 | } 52 | 53 | switch d { 54 | case IDRFInteger32: 55 | return other == IDRFSingle || other == IDRFDouble || other == IDRFInteger64 56 | case IDRFSingle: 57 | return other == IDRFDouble 58 | case IDRFTimestamp: 59 | return other == IDRFTimestamptz 60 | } 61 | 62 | return false 63 | } 64 | -------------------------------------------------------------------------------- /internal/idrf/data_type_test.go: -------------------------------------------------------------------------------- 1 | package idrf 2 | 3 | import "testing" 4 | 5 | func TestCanFitInto(t *testing.T) { 6 | allTypes := []DataType{ 7 | IDRFInteger32, 8 | IDRFInteger64, 9 | IDRFDouble, 10 | IDRFSingle, 11 | IDRFString, 12 | IDRFBoolean, 13 | IDRFTimestamptz, 14 | IDRFTimestamp, 15 | IDRFJson, 16 | IDRFUnknown, 17 | } 18 | 19 | for _, dt := range allTypes { 20 | if !dt.CanFitInto(dt) { 21 | t.Errorf("%v can't fit into himself", dt) 22 | } 23 | } 24 | 25 | for _, dt := range allTypes { 26 | if dt == IDRFInteger64 || dt == IDRFSingle || dt == IDRFDouble { 27 | if !IDRFInteger32.CanFitInto(dt) { 28 | t.Errorf("%s should've fit in %s", IDRFInteger32, dt) 29 | } 30 | } else if dt != IDRFInteger32 && IDRFInteger32.CanFitInto(dt) { 31 | t.Errorf("%s shouldn't have fit in %s", IDRFInteger32, dt) 32 | continue 33 | } 34 | 35 | if dt == IDRFDouble { 36 | if !IDRFSingle.CanFitInto(dt) { 37 | t.Errorf("%s should've fit in %s", IDRFSingle, dt) 38 | } 39 | } else if dt != IDRFSingle && IDRFSingle.CanFitInto(dt) { 40 | t.Errorf("%s shouldn't have fit in %s", IDRFSingle, dt) 41 | continue 42 | } 43 | 44 | if dt == IDRFTimestamptz && !IDRFTimestamp.CanFitInto(dt) { 45 | t.Errorf("%s should've fit in %s", IDRFTimestamp, dt) 46 | continue 47 | } 48 | 49 | if dt != IDRFString && dt.CanFitInto(IDRFString) { 50 | t.Errorf("%s shouldn't have fit in %s", dt, IDRFString) 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /internal/idrf/row.go: -------------------------------------------------------------------------------- 1 | package idrf 2 | 3 | // Row represents a single row of values in Intermediate Data Representation Format 4 | type Row []interface{} 5 | -------------------------------------------------------------------------------- /internal/ingestion/config/ingestor_config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 7 | ) 8 | 9 | // IngestorConfig holds all the properties required to create and run an ingestor 10 | type IngestorConfig struct { 11 | IngestorID string 12 | BatchSize uint16 13 | RollbackOnExternalError bool 14 | CommitStrategy CommitStrategy 15 | SchemaStrategy schemaconfig.SchemaStrategy 16 | Schema string 17 | ChunkTimeInterval string 18 | } 19 | 20 | // CommitStrategy describes how the ingestor should handle the ingested data 21 | // Single Transaction or Commit on Each Batch 22 | type CommitStrategy int 23 | 24 | // Available values for the CommitStrategy enum 25 | const ( 26 | CommitOnEnd CommitStrategy = iota + 1 27 | CommitOnEachBatch 28 | ) 29 | 30 | // ParseStrategyString returns the enum value matching the string, or an error 31 | func ParseStrategyString(strategy string) (CommitStrategy, error) { 32 | switch strategy { 33 | case "CommitOnEnd": 34 | return CommitOnEnd, nil 35 | case "CommitOnEachBatch": 36 | return CommitOnEachBatch, nil 37 | default: 38 | return CommitOnEnd, fmt.Errorf("unknown commit strategy '%s'", strategy) 39 | } 40 | } 41 | 42 | func (s CommitStrategy) String() string { 43 | switch s { 44 | case CommitOnEnd: 45 | return "CommitOnEnd" 46 | case CommitOnEachBatch: 47 | return "CommitOnEachBatch" 48 | default: 49 | panic("unknown type") 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /internal/ingestion/config/ingestor_config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestIngestorConfigParse(t *testing.T) { 10 | x, err := ParseStrategyString(CommitOnEnd.String()) 11 | assert.Equal(t, CommitOnEnd, x) 12 | assert.NoError(t, err) 13 | x, err = ParseStrategyString(CommitOnEachBatch.String()) 14 | assert.Equal(t, CommitOnEachBatch, x) 15 | assert.NoError(t, err) 16 | x, err = ParseStrategyString("anything else") 17 | assert.Error(t, err) 18 | } 19 | -------------------------------------------------------------------------------- /internal/ingestion/ingestor.go: -------------------------------------------------------------------------------- 1 | package ingestion 2 | 3 | import ( 4 | "github.com/timescale/outflux/internal/idrf" 5 | ) 6 | 7 | // Ingestor takes a data channel of idrf rows and inserts them in a target database 8 | type Ingestor interface { 9 | ID() string 10 | Prepare(conn *idrf.Bundle) error 11 | Start(chan error) error 12 | } 13 | -------------------------------------------------------------------------------- /internal/ingestion/ingestor_service.go: -------------------------------------------------------------------------------- 1 | package ingestion 2 | 3 | import ( 4 | "github.com/timescale/outflux/internal/connections" 5 | "github.com/timescale/outflux/internal/ingestion/config" 6 | "github.com/timescale/outflux/internal/ingestion/ts" 7 | tsSchema "github.com/timescale/outflux/internal/schemamanagement/ts" 8 | ) 9 | 10 | // IngestorService exposes methods to create new ingestors 11 | type IngestorService interface { 12 | NewTimescaleIngestor(dbConn connections.PgxWrap, config *config.IngestorConfig) Ingestor 13 | } 14 | 15 | // NewIngestorService creates an instance of the IngestorService 16 | func NewIngestorService() IngestorService { 17 | return &ingestorService{} 18 | } 19 | 20 | type ingestorService struct { 21 | } 22 | 23 | // NewIngestor creates a new instance of an Ingestor with a specified config, for a specified 24 | // data set and data channel 25 | func (i *ingestorService) NewTimescaleIngestor(dbConn connections.PgxWrap, config *config.IngestorConfig) Ingestor { 26 | schemaManager := tsSchema.NewTSSchemaManager(dbConn, config.Schema, config.ChunkTimeInterval) 27 | return &ts.TSIngestor{ 28 | DbConn: dbConn, 29 | Config: config, 30 | IngestionRoutine: ts.NewRoutine(), 31 | SchemaManager: schemaManager, 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /internal/ingestion/ts/ingestion_routine.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/jackc/pgx" 8 | "github.com/timescale/outflux/internal/connections" 9 | "github.com/timescale/outflux/internal/idrf" 10 | "github.com/timescale/outflux/internal/ingestion/config" 11 | "github.com/timescale/outflux/internal/utils" 12 | ) 13 | 14 | type ingestDataArgs struct { 15 | // id of the ingestor used to subscribe and unsubscribe to errors from other goroutines 16 | ingestorID string 17 | // channel delivering errors that happened in other routines 18 | errChan chan error 19 | // the channel notified when the ingestor has completed 20 | ackChannel chan bool 21 | // the input channel that delivers the data to be inserted 22 | dataChannel chan idrf.Row 23 | // on each ${batchSize} rows inserted the ingestor checks if there is an error in some of the other goroutines 24 | batchSize uint16 25 | // if an error occurred in another goroutine should a rollback be done 26 | rollbackOnExternalError bool 27 | // the database connection 28 | dbConn connections.PgxWrap 29 | // column names 30 | colNames []string 31 | // name of table where inserts happen 32 | tableName string 33 | // name of schema where the table is 34 | schemaName string 35 | // commit strategy 36 | commitStrategy config.CommitStrategy 37 | } 38 | 39 | // Routine defines an interface that consumes a channel of idrf.Rows and 40 | // and writes them to a TimescaleDB 41 | type Routine interface { 42 | ingest(args *ingestDataArgs) error 43 | } 44 | 45 | // NewRoutine creates a new 46 | func NewRoutine() Routine { 47 | return &defaultRoutine{} 48 | } 49 | 50 | type defaultRoutine struct{} 51 | 52 | func (routine *defaultRoutine) ingest(args *ingestDataArgs) error { 53 | log.Printf("Starting data ingestor '%s'", args.ingestorID) 54 | 55 | err := utils.CheckError(args.errChan) 56 | if err != nil { 57 | log.Printf("%s: received external error before starting data insertion. Quitting\n", args.ingestorID) 58 | return nil 59 | } 60 | 61 | tx, err := openTx(args) 62 | if err != nil { 63 | return err 64 | } 65 | 66 | numInserts := uint(0) 67 | batchInserts := uint16(0) 68 | log.Printf("Will batch insert %d rows at once. With commit strategy: %v", args.batchSize, args.commitStrategy) 69 | batch := make([][]interface{}, args.batchSize) 70 | var tableIdentifier *pgx.Identifier 71 | if args.schemaName != "" { 72 | tableIdentifier = &pgx.Identifier{args.schemaName, args.tableName} 73 | } else { 74 | tableIdentifier = &pgx.Identifier{args.tableName} 75 | } 76 | 77 | for row := range args.dataChannel { 78 | batch[batchInserts] = row 79 | batchInserts++ 80 | if batchInserts < args.batchSize { 81 | continue 82 | } 83 | 84 | if args.rollbackOnExternalError && utils.CheckError(args.errChan) != nil { 85 | log.Printf("%s: Error received from outside of ingestor. Rolling back\n", args.ingestorID) 86 | _ = tx.Rollback() 87 | return nil 88 | } 89 | 90 | numInserts += uint(batchInserts) 91 | batchInserts = 0 92 | if err = copyToDb(args, tableIdentifier, tx, batch); err != nil { 93 | return err 94 | } 95 | if args.commitStrategy != config.CommitOnEachBatch { 96 | continue 97 | } 98 | if err = commitTx(args, tx); err != nil { 99 | return err 100 | } 101 | if tx, err = openTx(args); err != nil { 102 | return err 103 | } 104 | } 105 | 106 | if batchInserts > 0 { 107 | batch = batch[:batchInserts] 108 | if err = copyToDb(args, tableIdentifier, tx, batch); err != nil { 109 | return err 110 | } 111 | numInserts += uint(batchInserts) 112 | } 113 | 114 | if err = commitTx(args, tx); err != nil { 115 | return err 116 | } 117 | 118 | log.Printf("%s: Complete. Inserted %d rows.\n", args.ingestorID, numInserts) 119 | return nil 120 | } 121 | 122 | func commitTx(args *ingestDataArgs, tx *pgx.Tx) error { 123 | err := tx.Commit() 124 | if err != nil { 125 | log.Printf("%s could not commit transaction in output db\n%v", args.ingestorID, err) 126 | } 127 | 128 | return err 129 | } 130 | 131 | func copyToDb(args *ingestDataArgs, identifier *pgx.Identifier, tx *pgx.Tx, batch [][]interface{}) error { 132 | source := pgx.CopyFromRows(batch) 133 | _, err := args.dbConn.CopyFrom(*identifier, args.colNames, source) 134 | if err != nil { 135 | log.Printf("%s could not insert batch of rows in output db\n%v", args.ingestorID, err) 136 | _ = tx.Rollback() 137 | } 138 | 139 | return err 140 | } 141 | 142 | func openTx(args *ingestDataArgs) (*pgx.Tx, error) { 143 | tx, err := args.dbConn.Begin() 144 | if err != nil { 145 | return nil, fmt.Errorf("%s: could not create transaction\n%v", args.ingestorID, err) 146 | } 147 | 148 | return tx, err 149 | } 150 | -------------------------------------------------------------------------------- /internal/ingestion/ts/ingestion_routine_test.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | 7 | "github.com/jackc/pgx" 8 | "github.com/stretchr/testify/assert" 9 | "github.com/timescale/outflux/internal/connections" 10 | ) 11 | 12 | func TestOpenTx(t *testing.T) { 13 | res, err := openTx(&ingestDataArgs{ 14 | dbConn: &connections.MockPgxW{ 15 | BeginRes: []*pgx.Tx{nil}, 16 | BeginErr: []error{errors.New("generic error")}, 17 | }, 18 | }) 19 | assert.Error(t, err) 20 | assert.Nil(t, res) 21 | res, err = openTx(&ingestDataArgs{ 22 | dbConn: &connections.MockPgxW{ 23 | BeginRes: []*pgx.Tx{&pgx.Tx{}}, 24 | BeginErr: []error{nil}, 25 | }, 26 | }) 27 | assert.NoError(t, err) 28 | assert.NotNil(t, res) 29 | } 30 | 31 | func TestCopyToDb(t *testing.T) { 32 | assert.Panics(t, func() { 33 | copyToDb(&ingestDataArgs{ 34 | dbConn: &connections.MockPgxW{ 35 | CopyFromErr: []error{errors.New("err")}, 36 | }, 37 | }, &pgx.Identifier{"x"}, &pgx.Tx{}, [][]interface{}{}) 38 | }, "should panic because of tx.Rollback") 39 | mock := &connections.MockPgxW{CopyFromErr: []error{nil}} 40 | copyToDb(&ingestDataArgs{ 41 | dbConn: mock, 42 | colNames: []string{"a"}, 43 | }, &pgx.Identifier{"x"}, &pgx.Tx{}, [][]interface{}{}) 44 | assert.Equal(t, mock.ExpCopyFromTab[0], pgx.Identifier{"x"}) 45 | assert.Equal(t, mock.ExpCopyFromCol, [][]string{[]string{"a"}}) 46 | } 47 | -------------------------------------------------------------------------------- /internal/ingestion/ts/ts_ingestor.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/timescale/outflux/internal/connections" 7 | "github.com/timescale/outflux/internal/idrf" 8 | "github.com/timescale/outflux/internal/ingestion/config" 9 | "github.com/timescale/outflux/internal/schemamanagement" 10 | ) 11 | 12 | // TSIngestor implements a TimescaleDB ingestor 13 | type TSIngestor struct { 14 | Config *config.IngestorConfig 15 | DbConn connections.PgxWrap 16 | IngestionRoutine Routine 17 | SchemaManager schemamanagement.SchemaManager 18 | cachedBundle *idrf.Bundle 19 | } 20 | 21 | // ID returns a string identifying the ingestor instance in logs 22 | func (i *TSIngestor) ID() string { 23 | return i.Config.IngestorID 24 | } 25 | 26 | // Prepare creates or validates the output tables in Timescale 27 | func (i *TSIngestor) Prepare(bundle *idrf.Bundle) error { 28 | i.cachedBundle = bundle 29 | return i.SchemaManager.PrepareDataSet(bundle.DataDef, i.Config.SchemaStrategy) 30 | } 31 | 32 | // Start consumes a data channel of idrf.Row(s) and inserts them into a TimescaleDB hypertable 33 | func (i *TSIngestor) Start(errChan chan error) error { 34 | if i.cachedBundle == nil { 35 | return fmt.Errorf("%s: Start called without calling Prepare first", i.Config.IngestorID) 36 | } 37 | 38 | dataSet := i.cachedBundle.DataDef 39 | colNames := extractColumnNames(dataSet.Columns) 40 | 41 | ingestArgs := &ingestDataArgs{ 42 | ingestorID: i.Config.IngestorID, 43 | errChan: errChan, 44 | dataChannel: i.cachedBundle.DataChan, 45 | rollbackOnExternalError: i.Config.RollbackOnExternalError, 46 | batchSize: i.Config.BatchSize, 47 | dbConn: i.DbConn, 48 | colNames: colNames, 49 | tableName: dataSet.DataSetName, 50 | schemaName: i.Config.Schema, 51 | commitStrategy: i.Config.CommitStrategy, 52 | } 53 | 54 | return i.IngestionRoutine.ingest(ingestArgs) 55 | } 56 | 57 | func extractColumnNames(columns []*idrf.Column) []string { 58 | columnNames := make([]string, len(columns)) 59 | for i, column := range columns { 60 | columnNames[i] = column.Name 61 | } 62 | 63 | return columnNames 64 | } 65 | -------------------------------------------------------------------------------- /internal/pipeline/pipe.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import ( 4 | "log" 5 | 6 | "github.com/timescale/outflux/internal/transformation" 7 | 8 | "github.com/timescale/outflux/internal/extraction" 9 | "github.com/timescale/outflux/internal/ingestion" 10 | ) 11 | 12 | // Pipe connects an extractor and an ingestor 13 | type Pipe interface { 14 | Run() error 15 | ID() string 16 | } 17 | 18 | // NewPipe creates an implementation of the Pipe interface 19 | func NewPipe(id string, ing ingestion.Ingestor, ext extraction.Extractor, trans []transformation.Transformer, prepareOnly bool) Pipe { 20 | return &defPipe{ 21 | id, ing, ext, trans, prepareOnly, 22 | } 23 | } 24 | 25 | type defPipe struct { 26 | id string 27 | ingestor ingestion.Ingestor 28 | extractor extraction.Extractor 29 | transformers []transformation.Transformer 30 | prepareOnly bool 31 | } 32 | 33 | func (p *defPipe) ID() string { 34 | return p.id 35 | } 36 | 37 | func (p *defPipe) Run() error { 38 | // prepare elements 39 | err := p.prepareElements(p.extractor, p.ingestor, p.transformers) 40 | if err != nil { 41 | return err 42 | } 43 | 44 | if p.prepareOnly { 45 | log.Printf("No data transfer will occur") 46 | return nil 47 | } 48 | 49 | // run them 50 | return p.run(p.extractor, p.ingestor, p.transformers) 51 | } 52 | -------------------------------------------------------------------------------- /internal/pipeline/pipe_extraction_routine.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/timescale/outflux/internal/extraction" 7 | ) 8 | 9 | type extractorRoutineArgs struct { 10 | wg *sync.WaitGroup 11 | e extraction.Extractor 12 | eb func(error) 13 | ec chan error 14 | } 15 | 16 | func extractorRoutine(args *extractorRoutineArgs) { 17 | err := args.e.Start(args.ec) 18 | if err != nil { 19 | args.eb(err) 20 | } 21 | args.wg.Done() 22 | } 23 | -------------------------------------------------------------------------------- /internal/pipeline/pipe_ingestor_routine.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/timescale/outflux/internal/ingestion" 7 | ) 8 | 9 | type ingestorRoutineArgs struct { 10 | wg *sync.WaitGroup 11 | i ingestion.Ingestor 12 | eb func(error) 13 | ec chan error 14 | } 15 | 16 | func ingestorRoutine(args *ingestorRoutineArgs) { 17 | err := args.i.Start(args.ec) 18 | if err != nil { 19 | args.eb(err) 20 | } 21 | 22 | args.wg.Done() 23 | } 24 | -------------------------------------------------------------------------------- /internal/pipeline/pipe_prepare_elements.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/timescale/outflux/internal/transformation" 7 | 8 | "github.com/timescale/outflux/internal/extraction" 9 | "github.com/timescale/outflux/internal/ingestion" 10 | ) 11 | 12 | func (p *defPipe) prepareElements( 13 | extractor extraction.Extractor, 14 | ingestor ingestion.Ingestor, 15 | transformers []transformation.Transformer) error { 16 | bundle, err := extractor.Prepare() 17 | if err != nil { 18 | return fmt.Errorf("%s: could not prepare extractor\n%v", p.id, err) 19 | } 20 | 21 | for _, transformer := range transformers { 22 | bundle, err = transformer.Prepare(bundle) 23 | if err != nil { 24 | return fmt.Errorf("%s: could not prepare transformer\n%v", p.id, err) 25 | } 26 | } 27 | 28 | err = ingestor.Prepare(bundle) 29 | if err != nil { 30 | return fmt.Errorf("%s: could not prepare ingestor\n%v", p.id, err) 31 | } 32 | return nil 33 | } 34 | -------------------------------------------------------------------------------- /internal/pipeline/pipe_run.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | 7 | "github.com/timescale/outflux/internal/transformation" 8 | 9 | "github.com/timescale/outflux/internal/extraction" 10 | "github.com/timescale/outflux/internal/ingestion" 11 | "github.com/timescale/outflux/internal/utils" 12 | ) 13 | 14 | func (p *defPipe) run( 15 | extractor extraction.Extractor, 16 | ingestor ingestion.Ingestor, 17 | transformers []transformation.Transformer) error { 18 | errorBroadcaster := utils.NewErrorBroadcaster() 19 | ingErrors, err := errorBroadcaster.Subscribe(ingestor.ID()) 20 | if err != nil { 21 | return fmt.Errorf("%s: could not subscribe ingestor for errors\n%v", p.id, err) 22 | } 23 | extErrors, err := errorBroadcaster.Subscribe(extractor.ID()) 24 | if err != nil { 25 | return fmt.Errorf("%s: could not subscribe extractor for errors\n%v", p.id, err) 26 | } 27 | 28 | transformerErrChannels := make([]chan error, len(transformers)) 29 | for i, transformer := range transformers { 30 | transformerErrChannels[i], err = errorBroadcaster.Subscribe(transformer.ID()) 31 | if err != nil { 32 | return fmt.Errorf("%s: could not subscribe transformer '%s' for errors\n%v", p.id, transformer.ID(), err) 33 | } 34 | } 35 | 36 | defer errorBroadcaster.Close() 37 | var waitgroup sync.WaitGroup 38 | waitgroup.Add(2 + len(transformers)) 39 | go extractorRoutine(&extractorRoutineArgs{ 40 | wg: &waitgroup, 41 | e: extractor, 42 | eb: wrappedBroadcast(extractor.ID(), errorBroadcaster), 43 | ec: extErrors, 44 | }) 45 | for i, transformer := range transformers { 46 | go transformerRoutine(&transformerRoutineArgs{ 47 | wg: &waitgroup, 48 | t: transformer, 49 | eb: wrappedBroadcast(transformer.ID(), errorBroadcaster), 50 | ec: transformerErrChannels[i], 51 | }) 52 | } 53 | go ingestorRoutine(&ingestorRoutineArgs{ 54 | wg: &waitgroup, 55 | i: ingestor, 56 | eb: wrappedBroadcast(ingestor.ID(), errorBroadcaster), 57 | ec: ingErrors, 58 | }) 59 | 60 | waitgroup.Wait() 61 | return nil 62 | } 63 | 64 | func wrappedBroadcast(id string, eb utils.ErrorBroadcaster) func(error) { 65 | return func(e error) { eb.Broadcast(id, e) } 66 | } 67 | -------------------------------------------------------------------------------- /internal/pipeline/pipe_transform_routine.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/timescale/outflux/internal/transformation" 7 | ) 8 | 9 | type transformerRoutineArgs struct { 10 | wg *sync.WaitGroup 11 | t transformation.Transformer 12 | eb func(error) 13 | ec chan error 14 | } 15 | 16 | func transformerRoutine(args *transformerRoutineArgs) { 17 | err := args.t.Start(args.ec) 18 | if err != nil { 19 | args.eb(err) 20 | } 21 | args.wg.Done() 22 | } 23 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/dataset_constructor.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | "fmt" 5 | 6 | influx "github.com/influxdata/influxdb/client/v2" 7 | "github.com/timescale/outflux/internal/idrf" 8 | "github.com/timescale/outflux/internal/schemamanagement/influx/discovery" 9 | ) 10 | 11 | // DataSetConstructor builds a idrf.DataSet for a given measure 12 | type dataSetConstructor interface { 13 | construct(measure string) (*idrf.DataSet, error) 14 | } 15 | 16 | // NewDataSetConstructor creates a new instance of a DataSetConstructor 17 | func newDataSetConstructor( 18 | db, rp string, onConflictConvertIntToFloat bool, 19 | client influx.Client, 20 | tagExplorer discovery.TagExplorer, 21 | fieldExplorer discovery.FieldExplorer) dataSetConstructor { 22 | return &defaultDSConstructor{ 23 | database: db, 24 | rp: rp, 25 | influxClient: client, 26 | tagExplorer: tagExplorer, 27 | fieldExplorer: fieldExplorer, 28 | onConflictConvertIntToFloat: onConflictConvertIntToFloat, 29 | } 30 | } 31 | 32 | type defaultDSConstructor struct { 33 | database string 34 | rp string 35 | onConflictConvertIntToFloat bool 36 | tagExplorer discovery.TagExplorer 37 | fieldExplorer discovery.FieldExplorer 38 | influxClient influx.Client 39 | } 40 | 41 | func (d *defaultDSConstructor) construct(measure string) (*idrf.DataSet, error) { 42 | idrfTags, err := d.tagExplorer.DiscoverMeasurementTags(d.influxClient, d.database, d.rp, measure) 43 | if err != nil { 44 | return nil, fmt.Errorf("could not discover the tags of measurement '%s'\n%v", measure, err) 45 | } 46 | 47 | idrfFields, err := d.fieldExplorer.DiscoverMeasurementFields(d.influxClient, d.database, d.rp, measure, d.onConflictConvertIntToFloat) 48 | if err != nil { 49 | return nil, fmt.Errorf("could not discover the fields of measure '%s'\n%v", measure, err) 50 | } 51 | 52 | idrfTimeColumn, _ := idrf.NewColumn("time", idrf.IDRFTimestamptz) 53 | allColumns := []*idrf.Column{idrfTimeColumn} 54 | allColumns = append(allColumns, idrfTags...) 55 | allColumns = append(allColumns, idrfFields...) 56 | dataSet, err := idrf.NewDataSet(measure, allColumns, "time") 57 | return dataSet, err 58 | } 59 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/dataset_constructor_test.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/timescale/outflux/internal/idrf" 8 | 9 | influx "github.com/influxdata/influxdb/client/v2" 10 | ) 11 | 12 | func TestNewDataSetConstructor(t *testing.T) { 13 | newDataSetConstructor("", "rp", true, nil, nil, nil) 14 | } 15 | 16 | func TestConstruct(t *testing.T) { 17 | genError := fmt.Errorf("generic error") 18 | tags := []*idrf.Column{{Name: "tag", DataType: idrf.IDRFString}} 19 | fields := []*idrf.Column{{Name: "field", DataType: idrf.IDRFBoolean}} 20 | testCases := []struct { 21 | desc string 22 | tags []*idrf.Column 23 | tagsErr error 24 | fields []*idrf.Column 25 | fieldsErr error 26 | expectedErr bool 27 | }{ 28 | { 29 | desc: "Error on discover tags", 30 | tagsErr: genError, 31 | expectedErr: true, 32 | }, { 33 | desc: "Error on discover fields", 34 | tags: tags, 35 | fieldsErr: genError, 36 | expectedErr: true, 37 | }, { 38 | desc: "All good", 39 | tags: tags, 40 | fields: fields, 41 | }, 42 | } 43 | 44 | for _, tc := range testCases { 45 | mock := &mocker{tags: tc.tags, tagsErr: tc.tagsErr, fields: tc.fields, fieldsErr: tc.fieldsErr} 46 | constructor := defaultDSConstructor{ 47 | tagExplorer: mock, 48 | fieldExplorer: mock, 49 | } 50 | 51 | res, err := constructor.construct("a") 52 | if err != nil && !tc.expectedErr { 53 | t.Errorf("unexpected error %v", err) 54 | } else if err == nil && tc.expectedErr { 55 | t.Errorf("expected error, none received") 56 | } 57 | 58 | if tc.expectedErr { 59 | continue 60 | } 61 | 62 | if res.DataSetName != "a" { 63 | t.Errorf("expected data set to be named: a, got: %s", res.DataSetName) 64 | } 65 | 66 | if len(res.Columns) != 1+len(tags)+len(fields) { //time, tags, fields 67 | t.Errorf("exected %d columns, got %d", 1+len(tags)+len(fields), len(res.Columns)) 68 | } 69 | 70 | if res.TimeColumn != res.Columns[0].Name { 71 | t.Errorf("expectd time column to be first in columns array") 72 | } 73 | } 74 | } 75 | 76 | type mocker struct { 77 | tags []*idrf.Column 78 | tagsErr error 79 | fields []*idrf.Column 80 | fieldsErr error 81 | } 82 | 83 | func (m *mocker) DiscoverMeasurementTags(influxClient influx.Client, db, rp, measure string) ([]*idrf.Column, error) { 84 | return m.tags, m.tagsErr 85 | } 86 | 87 | func (m *mocker) DiscoverMeasurementFields(influxClient influx.Client, db, rp, measurement string, convertIntToFloat bool) ([]*idrf.Column, error) { 88 | return m.fields, m.fieldsErr 89 | } 90 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/discovery/field_discovery.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "sort" 7 | 8 | influx "github.com/influxdata/influxdb/client/v2" 9 | "github.com/timescale/outflux/internal/idrf" 10 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 11 | ) 12 | 13 | const ( 14 | showFieldsQueryTemplate = `SHOW FIELD KEYS FROM "%s"."%s"` 15 | ) 16 | 17 | // FieldExplorer defines an API for discovering InfluxDB fields of a specified measurement 18 | type FieldExplorer interface { 19 | // DiscoverMeasurementFields creates the ColumnInfo for the Fields of a given measurement 20 | DiscoverMeasurementFields(influxClient influx.Client, db, rp, measurement string, onConflictConvertIntToFloat bool) ([]*idrf.Column, error) 21 | } 22 | 23 | type defaultFieldExplorer struct { 24 | queryService influxqueries.InfluxQueryService 25 | } 26 | 27 | // NewFieldExplorer creates a new instance of the Field discovert API 28 | func NewFieldExplorer(queryService influxqueries.InfluxQueryService) FieldExplorer { 29 | return &defaultFieldExplorer{queryService} 30 | } 31 | 32 | // InfluxDB can have different data types for the same field accross 33 | // different shards. If a field is discovered with Int64 and Float64 type 34 | // and the 'onConflictConvertIntToFloat' flag is TRUE it will allow the field to be converted to float, 35 | // otherwise it will return an error 36 | func (fe *defaultFieldExplorer) DiscoverMeasurementFields(influxClient influx.Client, db, rp, measurement string, onConflictConvertIntToFloat bool) ([]*idrf.Column, error) { 37 | fields, err := fe.fetchMeasurementFields(influxClient, db, rp, measurement) 38 | if err != nil { 39 | return nil, fmt.Errorf("error fetching fields for measurement '%s'\n%v", measurement, err) 40 | } 41 | 42 | return convertFields(fields, onConflictConvertIntToFloat) 43 | } 44 | 45 | func (fe *defaultFieldExplorer) fetchMeasurementFields(influxClient influx.Client, db, rp, measurement string) ([][2]string, error) { 46 | showFieldsQuery := fmt.Sprintf(showFieldsQueryTemplate, rp, measurement) 47 | result, err := fe.queryService.ExecuteShowQuery(influxClient, db, showFieldsQuery) 48 | 49 | if err != nil { 50 | return nil, fmt.Errorf("error executing query: %s\n%v", showFieldsQuery, err) 51 | } 52 | 53 | if len(result.Values) == 0 { 54 | errorString := fmt.Sprintf("field keys query returned unexpected result. "+ 55 | "no values returned for measure '%s'", measurement) 56 | return nil, fmt.Errorf(errorString) 57 | } 58 | 59 | fieldKeys := make([][2]string, len(result.Values)) 60 | for index, valuesRow := range result.Values { 61 | if len(valuesRow) != 2 { 62 | errorString := "field key query returned unexpected result. " + 63 | "field name and type not represented in two columns" 64 | return nil, fmt.Errorf(errorString) 65 | } 66 | 67 | fieldName := valuesRow[0] 68 | fieldType := valuesRow[1] 69 | fieldKeys[index] = [2]string{fieldName, fieldType} 70 | } 71 | 72 | return fieldKeys, nil 73 | } 74 | 75 | func convertFields(fieldsWithType [][2]string, convertInt64ToFloat64 bool) ([]*idrf.Column, error) { 76 | columnMap, err := chooseDataTypeForFields(fieldsWithType, convertInt64ToFloat64) 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | columns := make([]*idrf.Column, len(columnMap)) 82 | currentColumn := 0 83 | columnNames := make([]string, len(columnMap)) 84 | for columnName := range columnMap { 85 | columnNames[currentColumn] = columnName 86 | currentColumn++ 87 | } 88 | sort.Strings(columnNames) 89 | for i, columnName := range columnNames { 90 | columnType := columnMap[columnName] 91 | idrfColumn, err := idrf.NewColumn(columnName, columnType) 92 | 93 | if err != nil { 94 | return nil, fmt.Errorf("could not convert field to Intermediate Data Representation Format. \n%v", err.Error()) 95 | } 96 | 97 | columns[i] = idrfColumn 98 | } 99 | 100 | return columns, nil 101 | } 102 | 103 | func convertDataType(influxType string) idrf.DataType { 104 | switch influxType { 105 | case "float": 106 | return idrf.IDRFDouble 107 | case "string": 108 | return idrf.IDRFString 109 | case "integer": 110 | return idrf.IDRFInteger64 111 | case "boolean": 112 | return idrf.IDRFBoolean 113 | default: 114 | panic("Unexpected value") 115 | } 116 | } 117 | 118 | func chooseDataTypeForFields(fieldsWithType [][2]string, convertInt64ToFloat64 bool) (map[string]idrf.DataType, error) { 119 | columnMap := make(map[string]idrf.DataType) 120 | for _, field := range fieldsWithType { 121 | fieldName := field[0] 122 | fieldType := field[1] 123 | columnType := convertDataType(fieldType) 124 | existingType, ok := columnMap[fieldName] 125 | if !ok { 126 | columnMap[fieldName] = columnType 127 | continue 128 | } else if columnType.CanFitInto(existingType) { 129 | log.Printf("Field %s exists as %s and %s in the same measurement.", fieldName, existingType, columnType) 130 | log.Printf("Will be cast to %s during migration", existingType) 131 | continue 132 | } else if existingType.CanFitInto(columnType) { 133 | columnMap[fieldName] = columnType 134 | log.Printf("Field %s exists as %s and %s in the same measurement.", fieldName, existingType, columnType) 135 | log.Printf("Will be cast to %s during migration", columnType) 136 | continue 137 | } else if convertInt64ToFloat64 && intFloatCombo(existingType, columnType) { 138 | log.Printf("Field %s exists as %s and %s in the same measurement.", fieldName, existingType, columnType) 139 | log.Printf("Flag set to cast int64 to float64 for this field during migration") 140 | columnMap[fieldName] = idrf.IDRFDouble 141 | continue 142 | } 143 | 144 | return nil, fmt.Errorf("field '%s' has incomparable types accross multiple shards. "+ 145 | "Exists with type %s and %s", fieldName, existingType, columnType) 146 | } 147 | 148 | return columnMap, nil 149 | } 150 | 151 | func intFloatCombo(oneType, secondType idrf.DataType) bool { 152 | return (oneType == idrf.IDRFInteger64 && secondType == idrf.IDRFDouble) || 153 | (oneType == idrf.IDRFDouble && secondType == idrf.IDRFInteger64) 154 | } 155 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/discovery/field_discovery_test.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "testing" 7 | 8 | influx "github.com/influxdata/influxdb/client/v2" 9 | "github.com/timescale/outflux/internal/idrf" 10 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 11 | ) 12 | 13 | func TestDiscoverMeasurementFields(t *testing.T) { 14 | var mockClient influx.Client 15 | mockClient = &influxqueries.MockClient{} 16 | database := "database" 17 | measure := "measure" 18 | rp := "autogen" 19 | cases := []testCase{ 20 | { 21 | desc: "not good, error executing query", 22 | expectedError: true, 23 | showQueryError: fmt.Errorf("error executing query"), 24 | }, { 25 | desc: "empty result returned, error should be result, must have fields", 26 | expectedError: true, 27 | showQueryResult: &influxqueries.InfluxShowResult{ 28 | Values: [][]string{}, 29 | }, 30 | }, { 31 | desc: "result has more than two columns", 32 | expectedError: true, 33 | showQueryResult: &influxqueries.InfluxShowResult{ 34 | Values: [][]string{ 35 | {"1", "2", "3"}, 36 | }, 37 | }, 38 | }, { 39 | desc: "proper result", 40 | showQueryResult: &influxqueries.InfluxShowResult{ 41 | Values: [][]string{ 42 | {"1", "boolean"}, 43 | {"2", "float"}, 44 | {"3", "integer"}, 45 | {"4", "string"}, 46 | }, 47 | }, 48 | expectedTags: []*idrf.Column{ 49 | {Name: "1", DataType: idrf.IDRFBoolean}, 50 | {Name: "2", DataType: idrf.IDRFDouble}, 51 | {Name: "3", DataType: idrf.IDRFInteger64}, 52 | {Name: "4", DataType: idrf.IDRFString}, 53 | }, 54 | }, { 55 | desc: "same field, diff types, uncastable", 56 | showQueryResult: &influxqueries.InfluxShowResult{ 57 | Values: [][]string{ 58 | {"1", "boolean"}, 59 | {"1", "float"}, 60 | }, 61 | }, 62 | expectedError: true, 63 | }, { 64 | desc: "same field, diff types, int and float (flag says error)", 65 | expectedError: true, 66 | showQueryResult: &influxqueries.InfluxShowResult{ 67 | Values: [][]string{ 68 | {"1", "integer"}, 69 | {"1", "float"}, 70 | }, 71 | }, 72 | }, { 73 | desc: "same field, diff types, int and float (flag says no error)", 74 | showQueryResult: &influxqueries.InfluxShowResult{ 75 | Values: [][]string{ 76 | {"1", "integer"}, 77 | {"1", "float"}, 78 | }, 79 | }, 80 | onConflictConvertIntToFloat: true, 81 | expectedTags: []*idrf.Column{ 82 | {Name: "1", DataType: idrf.IDRFDouble}, 83 | }, 84 | }, 85 | } 86 | 87 | for _, testCase := range cases { 88 | t.Run(testCase.desc, func(t *testing.T) { 89 | fieldExplorer := defaultFieldExplorer{ 90 | queryService: mock(testCase), 91 | } 92 | result, err := fieldExplorer.DiscoverMeasurementFields(mockClient, database, rp, measure, testCase.onConflictConvertIntToFloat) 93 | if err != nil && !testCase.expectedError { 94 | t.Errorf("unexpected error %v", err) 95 | } else if err == nil && testCase.expectedError { 96 | t.Errorf("expected error, none received") 97 | } 98 | 99 | if testCase.expectedError { 100 | return 101 | } 102 | 103 | expected := testCase.expectedTags 104 | if len(expected) != len(result) { 105 | t.Errorf("expected result: '%v', got '%v'", expected, result) 106 | } 107 | 108 | for index, resColumn := range result { 109 | if resColumn.Name != expected[index].Name || resColumn.DataType != expected[index].DataType { 110 | t.Errorf("expected column: %v, got %v", expected[index], resColumn) 111 | } 112 | } 113 | }) 114 | } 115 | } 116 | 117 | func TestChooseDataTypeForFields(t *testing.T) { 118 | testCases := []struct { 119 | desc string 120 | in [][2]string 121 | out map[string]idrf.DataType 122 | onConflictConvertIntToFloat bool 123 | expectErr bool 124 | }{ 125 | { 126 | desc: "All good, single string field", 127 | in: [][2]string{{"a", "string"}}, 128 | out: map[string]idrf.DataType{"a": idrf.IDRFString}, 129 | }, { 130 | desc: "All good, multiple distinct fields", 131 | in: [][2]string{{"a", "string"}, {"b", "integer"}, {"c", "float"}}, 132 | out: map[string]idrf.DataType{"a": idrf.IDRFString, "b": idrf.IDRFInteger64, "c": idrf.IDRFDouble}, 133 | }, { 134 | desc: "Not good, incomparable fields", 135 | in: [][2]string{{"a", "string"}, {"b", "integer"}, {"b", "float"}}, 136 | expectErr: true, 137 | }, { 138 | desc: "Good, incomparable fields, but forced int to float conversion", 139 | in: [][2]string{{"a", "string"}, {"b", "integer"}, {"b", "float"}}, 140 | onConflictConvertIntToFloat: true, 141 | out: map[string]idrf.DataType{"a": idrf.IDRFString, "b": idrf.IDRFDouble}, 142 | }, 143 | } 144 | for _, tc := range testCases { 145 | t.Run(tc.desc, func(t *testing.T) { 146 | got, err := chooseDataTypeForFields(tc.in, tc.onConflictConvertIntToFloat) 147 | if err != nil && !tc.expectErr { 148 | t.Errorf("unexpected error: %v", err) 149 | return 150 | } 151 | if err == nil && tc.expectErr { 152 | t.Error("unexpected lack of error") 153 | return 154 | } 155 | if !reflect.DeepEqual(got, tc.out) { 156 | t.Errorf("expected: %v\ngot: %v", tc.out, got) 157 | } 158 | }) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/discovery/measure_discovery.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 9 | ) 10 | 11 | const ( 12 | showMeasurementsQuery = "SHOW MEASUREMENTS" 13 | ) 14 | 15 | // MeasureExplorer defines an API for discovering the available measures in an InfluxDB database 16 | type MeasureExplorer interface { 17 | FetchAvailableMeasurements(influxClient influx.Client, db, rp string, onConflictConvertIntToFloat bool) ([]string, error) 18 | } 19 | 20 | // defaultMeasureExplorer contains the functions that can be swapped out during testing 21 | type defaultMeasureExplorer struct { 22 | queryService influxqueries.InfluxQueryService 23 | fieldExplorer FieldExplorer 24 | } 25 | 26 | // NewMeasureExplorer creates a new implementation of the MeasureExplorer API 27 | func NewMeasureExplorer(queryService influxqueries.InfluxQueryService, fieldExplorer FieldExplorer) MeasureExplorer { 28 | return &defaultMeasureExplorer{ 29 | queryService: queryService, 30 | fieldExplorer: fieldExplorer, 31 | } 32 | } 33 | 34 | // FetchAvailableMeasurements returns the names of all available measurements for a given database, 35 | // or an error if the query could not be executed, or the result was in an unexpected format 36 | func (me *defaultMeasureExplorer) FetchAvailableMeasurements(influxClient influx.Client, db, rp string, onConflictConvertIntToFloat bool) ([]string, error) { 37 | result, err := me.queryService.ExecuteShowQuery(influxClient, db, showMeasurementsQuery) 38 | if err != nil { 39 | return nil, fmt.Errorf("error executing query: %s\nerror: %v", showMeasurementsQuery, err) 40 | } 41 | 42 | measuresInDb := make([]string, len(result.Values)) 43 | for index, valuesRow := range result.Values { 44 | if len(valuesRow) != 1 { 45 | errorString := "measurement discovery query returned unexpected result. " + 46 | "measurement names not represented in single column" 47 | return nil, fmt.Errorf(errorString) 48 | } 49 | 50 | measuresInDb[index] = valuesRow[0] 51 | } 52 | 53 | measuresInRP := []string{} 54 | for _, measure := range measuresInDb { 55 | _, err := me.fieldExplorer.DiscoverMeasurementFields(influxClient, db, rp, measure, onConflictConvertIntToFloat) 56 | if err != nil { 57 | log.Printf("Will ignore measurement '%s' because:\n%s", measure, err.Error()) 58 | continue 59 | } 60 | 61 | measuresInRP = append(measuresInRP, measure) 62 | } 63 | return measuresInRP, nil 64 | } 65 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/discovery/measure_discovery_test.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/timescale/outflux/internal/idrf" 9 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 10 | 11 | influx "github.com/influxdata/influxdb/client/v2" 12 | ) 13 | 14 | type executeShowQueryFn = func(influxClient influx.Client, database, query string) (*influxqueries.InfluxShowResult, error) 15 | 16 | type testCase struct { 17 | desc string 18 | expectedError bool 19 | showQueryResult *influxqueries.InfluxShowResult 20 | showQueryError error 21 | expectedMeasures []string 22 | expectedTags []*idrf.Column 23 | fieldsErr error 24 | onConflictConvertIntToFloat bool 25 | } 26 | 27 | func TestNewMeasureExplorer(t *testing.T) { 28 | NewMeasureExplorer(nil, nil) 29 | } 30 | 31 | func TestFetchAvailableMeasurements(t *testing.T) { 32 | var mockClient influx.Client 33 | mockClient = &influxqueries.MockClient{} 34 | database := "database" 35 | rp := "autogen" 36 | cases := []testCase{ 37 | { 38 | expectedError: true, 39 | showQueryError: fmt.Errorf("error executing query"), 40 | }, { // empty result returned 41 | expectedError: false, 42 | showQueryResult: &influxqueries.InfluxShowResult{ 43 | Values: [][]string{}, 44 | }, 45 | expectedMeasures: []string{}, 46 | }, { // result has more than one column 47 | expectedError: true, 48 | showQueryResult: &influxqueries.InfluxShowResult{ 49 | Values: [][]string{{"1", "2"}}, 50 | }, 51 | showQueryError: fmt.Errorf("too many columns"), 52 | }, { 53 | expectedError: false, 54 | showQueryResult: &influxqueries.InfluxShowResult{ // result is proper 55 | Values: [][]string{ 56 | {"1"}, 57 | }, 58 | }, 59 | expectedMeasures: []string{"1"}, 60 | }, { 61 | expectedError: false, // no fields discovered for measure in given rp, measure is not returned 62 | showQueryResult: &influxqueries.InfluxShowResult{ 63 | Values: [][]string{ 64 | {"1"}, 65 | }, 66 | }, 67 | fieldsErr: errors.New("generic error"), 68 | expectedMeasures: []string{}, 69 | }, 70 | } 71 | 72 | for _, testC := range cases { 73 | mock := mock(testC) 74 | measureExplorer := defaultMeasureExplorer{ 75 | queryService: mock, 76 | fieldExplorer: mock, 77 | } 78 | 79 | result, err := measureExplorer.FetchAvailableMeasurements(mockClient, database, rp, false) 80 | if err != nil && !testC.expectedError { 81 | t.Errorf("no error expected, got: %v", err) 82 | } else if err == nil && testC.expectedError { 83 | t.Errorf("expected error, none received") 84 | } 85 | 86 | if testC.expectedError { 87 | continue 88 | } 89 | 90 | expected := testC.expectedMeasures 91 | if len(expected) != len(result) { 92 | t.Errorf("еxpected result: '%v', got '%v'", expected, result) 93 | } 94 | 95 | for index, measureName := range result { 96 | if measureName != expected[index] { 97 | t.Errorf("Expected measure: %s, got %s", expected[index], measureName) 98 | } 99 | } 100 | } 101 | } 102 | 103 | type mockAll struct { 104 | sqRes *influxqueries.InfluxShowResult 105 | sqErr error 106 | fieldsErr error 107 | } 108 | 109 | func (m *mockAll) ExecuteQuery(client influx.Client, database, command string) ([]influx.Result, error) { 110 | panic("should not come here") 111 | } 112 | 113 | func (m *mockAll) ExecuteShowQuery(influxClient influx.Client, database, query string) (*influxqueries.InfluxShowResult, error) { 114 | return m.sqRes, m.sqErr 115 | } 116 | 117 | func (m *mockAll) DiscoverMeasurementFields(c influx.Client, db, rp, ms string, onConflictConvertIntToFloat bool) ([]*idrf.Column, error) { 118 | return nil, m.fieldsErr 119 | } 120 | 121 | func mock(tc testCase) *mockAll { 122 | return &mockAll{ 123 | sqRes: tc.showQueryResult, sqErr: tc.showQueryError, fieldsErr: tc.fieldsErr, 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/discovery/tag_discovery.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "fmt" 5 | 6 | influx "github.com/influxdata/influxdb/client/v2" 7 | "github.com/timescale/outflux/internal/idrf" 8 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 9 | ) 10 | 11 | const ( 12 | showTagsQueryTemplate = `SHOW TAG KEYS FROM "%s"."%s"` 13 | ) 14 | 15 | // TagExplorer Defines an API for discovering the tags of an InfluxDB measurement 16 | type TagExplorer interface { 17 | DiscoverMeasurementTags(influxClient influx.Client, database, rp, measure string) ([]*idrf.Column, error) 18 | } 19 | 20 | type defaultTagExplorer struct { 21 | queryService influxqueries.InfluxQueryService 22 | } 23 | 24 | // NewTagExplorer creates a new implementation of that can discover the tags of an influx measurement 25 | func NewTagExplorer(queryService influxqueries.InfluxQueryService) TagExplorer { 26 | return &defaultTagExplorer{ 27 | queryService: queryService, 28 | } 29 | } 30 | 31 | // DiscoverMeasurementTags retrieves the tags for a given measurement and returns an IDRF representation for them. 32 | func (te *defaultTagExplorer) DiscoverMeasurementTags(influxClient influx.Client, database, rp, measure string) ([]*idrf.Column, error) { 33 | tags, err := te.fetchMeasurementTags(influxClient, database, rp, measure) 34 | 35 | if err != nil { 36 | return nil, fmt.Errorf("error fetching tags for measurement '%s'\n%v", measure, err) 37 | } 38 | 39 | return convertTags(tags) 40 | } 41 | 42 | func (te *defaultTagExplorer) fetchMeasurementTags(influxClient influx.Client, database, rp, measure string) ([]string, error) { 43 | showTagsQuery := fmt.Sprintf(showTagsQueryTemplate, rp, measure) 44 | result, err := te.queryService.ExecuteShowQuery(influxClient, database, showTagsQuery) 45 | 46 | if err != nil { 47 | return nil, fmt.Errorf("error executing query: %s\n%v", showTagsQuery, err) 48 | } 49 | 50 | if len(result.Values) == 0 { 51 | return []string{}, nil 52 | } 53 | 54 | tagNames := make([]string, len(result.Values)) 55 | for index, valuesRow := range result.Values { 56 | if len(valuesRow) != 1 { 57 | errorString := "tag discovery query returned unexpected result. " + 58 | "Tag names not represented in single column" 59 | return nil, fmt.Errorf(errorString) 60 | } 61 | 62 | tagNames[index] = valuesRow[0] 63 | } 64 | 65 | return tagNames, nil 66 | } 67 | 68 | func convertTags(tags []string) ([]*idrf.Column, error) { 69 | columns := make([]*idrf.Column, len(tags)) 70 | for i, tag := range tags { 71 | idrfColumn, err := idrf.NewColumn(tag, idrf.IDRFString) 72 | 73 | if err != nil { 74 | return nil, fmt.Errorf("Could not convert tags to IDRF. \n%v" + err.Error()) 75 | } 76 | 77 | columns[i] = idrfColumn 78 | } 79 | 80 | return columns, nil 81 | } 82 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/discovery/tag_discovery_test.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | "github.com/timescale/outflux/internal/idrf" 9 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 10 | ) 11 | 12 | func TestNewTagExplorer(t *testing.T) { 13 | NewTagExplorer(nil) 14 | } 15 | 16 | func TestDiscoverMeasurementTags(t *testing.T) { 17 | var mockClient influx.Client 18 | mockClient = &influxqueries.MockClient{} 19 | database := "database" 20 | measure := "measure" 21 | rp := "autogen" 22 | cases := []testCase{ 23 | { 24 | expectedError: true, 25 | showQueryError: fmt.Errorf("error executing query"), 26 | }, { // empty result returned 27 | expectedError: false, 28 | showQueryResult: &influxqueries.InfluxShowResult{ 29 | Values: [][]string{}, 30 | }, 31 | }, { // result has more than one column 32 | expectedError: true, 33 | showQueryResult: &influxqueries.InfluxShowResult{ 34 | Values: [][]string{ 35 | {"1", "2"}, 36 | }, 37 | }, 38 | }, { 39 | expectedError: false, 40 | showQueryResult: &influxqueries.InfluxShowResult{ // result is proper 41 | Values: [][]string{ 42 | {"1"}, 43 | }, 44 | }, 45 | expectedTags: []*idrf.Column{ 46 | { 47 | Name: "1", 48 | DataType: idrf.IDRFString, 49 | }, 50 | }, 51 | }, 52 | } 53 | 54 | for _, testCase := range cases { 55 | tagExplorer := defaultTagExplorer{ 56 | queryService: mock(testCase), 57 | } 58 | 59 | result, err := tagExplorer.DiscoverMeasurementTags(mockClient, database, rp, measure) 60 | if err != nil && !testCase.expectedError { 61 | t.Errorf("did not еxpected error. got '%v'", err) 62 | } else if err == nil && testCase.expectedError { 63 | t.Error("еxpected error, none received") 64 | } 65 | 66 | if testCase.expectedError { 67 | continue 68 | } 69 | 70 | expected := testCase.expectedTags 71 | if len(expected) != len(result) { 72 | t.Errorf("еxpected result: '%v', got '%v'", expected, result) 73 | } 74 | 75 | for index, resultColumn := range result { 76 | if resultColumn.Name != expected[index].Name || resultColumn.DataType != expected[index].DataType { 77 | t.Errorf("Expected column: %v, got %v", expected[index], resultColumn) 78 | } 79 | } 80 | } 81 | } 82 | 83 | func TestFetchMeasurementsShowTagsQuery(t *testing.T) { 84 | db := "db" 85 | rp := "rp" 86 | testCases := []struct { 87 | expectedQuery string 88 | measure string 89 | db string 90 | }{ 91 | { 92 | expectedQuery: `SHOW TAG KEYS FROM "rp"."measure"`, 93 | measure: "measure", 94 | }, { 95 | expectedQuery: `SHOW TAG KEYS FROM "rp"."measure 1"`, 96 | measure: "measure 1", 97 | }, { 98 | expectedQuery: `SHOW TAG KEYS FROM "rp"."measure-2"`, 99 | measure: "measure-2", 100 | }, 101 | } 102 | for _, tc := range testCases { 103 | mockClient := &influxqueries.MockClient{} 104 | queryService := &mockQueryServiceTD{ 105 | expectedDb: db, 106 | expectedQ: tc.expectedQuery, 107 | } 108 | 109 | tagExplorer := defaultTagExplorer{ 110 | queryService: queryService, 111 | } 112 | 113 | _, err := tagExplorer.fetchMeasurementTags(mockClient, db, rp, tc.measure) 114 | if err != nil { 115 | t.Errorf("unexpected err: %v", err) 116 | } 117 | } 118 | } 119 | 120 | type mockQueryServiceTD struct { 121 | expectedQ string 122 | expectedDb string 123 | } 124 | 125 | func (m *mockQueryServiceTD) ExecuteQuery(client influx.Client, database, command string) ([]influx.Result, error) { 126 | return nil, nil 127 | } 128 | 129 | func (m *mockQueryServiceTD) ExecuteShowQuery(influxClient influx.Client, database, query string) (*influxqueries.InfluxShowResult, error) { 130 | if m.expectedDb != database || m.expectedQ != query { 131 | return nil, fmt.Errorf("expected db '%s' and measure '%s', got '%s' and '%s'", m.expectedDb, m.expectedQ, database, query) 132 | } 133 | return &influxqueries.InfluxShowResult{ 134 | Values: [][]string{}, 135 | }, nil 136 | } 137 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/influx_schema_manager.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | influx "github.com/influxdata/influxdb/client/v2" 5 | "github.com/timescale/outflux/internal/idrf" 6 | "github.com/timescale/outflux/internal/schemamanagement/influx/discovery" 7 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 8 | ) 9 | 10 | // SchemaManager implements the schemamanagement.SchemaManager interface 11 | type SchemaManager struct { 12 | measureExplorer discovery.MeasureExplorer 13 | influxClient influx.Client 14 | dataSetConstructor dataSetConstructor 15 | database string 16 | rp string 17 | onConflictConvertIntToFloat bool 18 | } 19 | 20 | // NewSchemaManager creates new schema manager that can discover influx data sets 21 | func NewSchemaManager( 22 | client influx.Client, 23 | db, rp string, 24 | onConflictConvertIntToFloat bool, 25 | me discovery.MeasureExplorer, 26 | tagExplorer discovery.TagExplorer, 27 | fieldExplorer discovery.FieldExplorer) *SchemaManager { 28 | dsConstructor := newDataSetConstructor(db, rp, onConflictConvertIntToFloat, client, tagExplorer, fieldExplorer) 29 | return &SchemaManager{ 30 | measureExplorer: me, 31 | influxClient: client, 32 | dataSetConstructor: dsConstructor, 33 | database: db, 34 | rp: rp, 35 | onConflictConvertIntToFloat: onConflictConvertIntToFloat, 36 | } 37 | } 38 | 39 | // DiscoverDataSets returns a list of the available measurements in the connected 40 | func (sm *SchemaManager) DiscoverDataSets() ([]string, error) { 41 | return sm.measureExplorer.FetchAvailableMeasurements(sm.influxClient, sm.database, sm.rp, sm.onConflictConvertIntToFloat) 42 | } 43 | 44 | // FetchDataSet for a given data set identifier (retention.measureName, or just measureName) 45 | // returns the idrf.DataSet describing it 46 | func (sm *SchemaManager) FetchDataSet(dataSetIdentifier string) (*idrf.DataSet, error) { 47 | return sm.dataSetConstructor.construct(dataSetIdentifier) 48 | } 49 | 50 | // PrepareDataSet NOT IMPLEMENTED 51 | func (sm *SchemaManager) PrepareDataSet(dataSet *idrf.DataSet, strategy schemaconfig.SchemaStrategy) error { 52 | panic("not implemented") 53 | } 54 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/influx_schema_manager_test.go: -------------------------------------------------------------------------------- 1 | package influx 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/timescale/outflux/internal/idrf" 8 | 9 | influx "github.com/influxdata/influxdb/client/v2" 10 | ) 11 | 12 | func TestNewInfluxSchemaManager(t *testing.T) { 13 | NewSchemaManager(nil, "", "", true, nil, nil, nil) 14 | } 15 | 16 | func TestDiscoverDataSets(t *testing.T) { 17 | mock := &ismMeasureExp{measureErr: fmt.Errorf("error")} 18 | sm := &SchemaManager{ 19 | measureExplorer: mock, 20 | } 21 | _, err := sm.DiscoverDataSets() 22 | if err == nil { 23 | t.Errorf("expected error, none received") 24 | } 25 | 26 | mock.measureErr = nil 27 | mock.measures = []string{"a"} 28 | res, err := sm.DiscoverDataSets() 29 | if err != nil { 30 | t.Errorf("unexpected err: %v", err) 31 | } 32 | if res[0] != "a" { 33 | t.Errorf("expected: 'a' got '%v'", res) 34 | } 35 | } 36 | 37 | func TestFetchDataSet(t *testing.T) { 38 | // Given mock values used in the test cases 39 | genericError := fmt.Errorf("generic error") 40 | goodMeasure := "a" 41 | measures := []string{goodMeasure} 42 | dataSet := &idrf.DataSet{DataSetName: goodMeasure} 43 | // Test cases 44 | cases := []struct { 45 | desc string 46 | expectErr bool 47 | measures []string 48 | reqMeasure string 49 | msErr error 50 | dsErr error 51 | ds *idrf.DataSet 52 | }{ 53 | {desc: "error constructing data set", expectErr: true, measures: measures, reqMeasure: goodMeasure, dsErr: genericError}, 54 | {desc: "good data set", measures: measures, reqMeasure: goodMeasure, ds: dataSet}, 55 | } 56 | 57 | for _, testCase := range cases { 58 | mockMExp := &ismMeasureExp{measures: testCase.measures, measureErr: testCase.msErr} 59 | mockDSCons := &ismDSCons{dsErr: testCase.dsErr, ds: testCase.ds} 60 | manager := &SchemaManager{measureExplorer: mockMExp, dataSetConstructor: mockDSCons} 61 | res, err := manager.FetchDataSet(testCase.reqMeasure) 62 | if testCase.expectErr && err == nil { 63 | t.Error("expected test case to have an error, no error returned") 64 | } else if !testCase.expectErr && err != nil { 65 | t.Errorf("unexpected err: %v", err) 66 | } 67 | 68 | if testCase.expectErr { 69 | continue 70 | } 71 | if res.DataSetName != testCase.ds.DataSetName { 72 | t.Errorf("expected ds name: %s, got %s", testCase.ds.DataSetName, res.DataSetName) 73 | } 74 | } 75 | 76 | } 77 | 78 | type ismMeasureExp struct { 79 | measures []string 80 | measureErr error 81 | } 82 | 83 | func (i *ismMeasureExp) FetchAvailableMeasurements(influxClient influx.Client, db, rp string, convertIntToFloat bool) ([]string, error) { 84 | return i.measures, i.measureErr 85 | } 86 | 87 | type ismDSCons struct { 88 | ds *idrf.DataSet 89 | dsErr error 90 | } 91 | 92 | func (i *ismDSCons) construct(measure string) (*idrf.DataSet, error) { 93 | return i.ds, i.dsErr 94 | } 95 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/influxqueries/influx_query_service.go: -------------------------------------------------------------------------------- 1 | package influxqueries 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | ) 9 | 10 | // InfluxShowResult contains the results/values from an 'SHOW ' query 11 | type InfluxShowResult struct { 12 | Values [][]string 13 | } 14 | 15 | // InfluxQueryService contains helper functions to work with the InfluxDB client 16 | type InfluxQueryService interface { 17 | ExecuteQuery(client influx.Client, database, command string) ([]influx.Result, error) 18 | ExecuteShowQuery(influxClient influx.Client, database, query string) (*InfluxShowResult, error) 19 | } 20 | 21 | type defaultInfluxQueryService struct { 22 | } 23 | 24 | // NewInfluxQueryService creates a new implementation of the client utils struct 25 | func NewInfluxQueryService() InfluxQueryService { 26 | return &defaultInfluxQueryService{} 27 | } 28 | 29 | // ExecuteQuery sends a command query to an InfluxDB server 30 | func (u *defaultInfluxQueryService) ExecuteQuery(client influx.Client, database, command string) (res []influx.Result, err error) { 31 | query := influx.Query{ 32 | Command: command, 33 | Database: database, 34 | } 35 | 36 | if response, err := client.Query(query); err == nil { 37 | if response.Error() != nil { 38 | return res, response.Error() 39 | } 40 | 41 | res = response.Results 42 | } else { 43 | return res, err 44 | } 45 | 46 | return res, err 47 | } 48 | 49 | // ExecuteShowQuery executes a "SHOW ..." InfluxQL query 50 | func (u *defaultInfluxQueryService) ExecuteShowQuery(influxClient influx.Client, database, query string) (*InfluxShowResult, error) { 51 | if !strings.HasPrefix(strings.ToUpper(query), "SHOW ") { 52 | return nil, fmt.Errorf("show query must start with 'SHOW '") 53 | } 54 | 55 | result, err := u.ExecuteQuery(influxClient, database, query) 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | if len(result) != 1 { 61 | errorString := "'SHOW' query failed. No results returned." 62 | return nil, fmt.Errorf(errorString) 63 | } 64 | 65 | series := result[0].Series 66 | if len(series) == 0 { 67 | return &InfluxShowResult{Values: [][]string{}}, nil 68 | } else if len(series) > 1 { 69 | errorString := "'SHOW' query returned unexpected results. More than one series found." 70 | return nil, fmt.Errorf(errorString) 71 | } 72 | 73 | convertedValues, err := castShowResultValues(series[0].Values) 74 | if err != nil { 75 | return nil, err 76 | } 77 | 78 | return &InfluxShowResult{Values: convertedValues}, nil 79 | } 80 | 81 | func castShowResultValues(returnedResults [][]interface{}) ([][]string, error) { 82 | toReturn := make([][]string, len(returnedResults)) 83 | var err bool 84 | for i, row := range returnedResults { 85 | toReturn[i] = make([]string, len(row)) 86 | for j, value := range row { 87 | toReturn[i][j], err = value.(string) 88 | if !err { 89 | return nil, fmt.Errorf("value from 'SHOW ' query could not be cast to string") 90 | } 91 | } 92 | } 93 | 94 | return toReturn, nil 95 | } 96 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/influxqueries/influx_query_service_test.go: -------------------------------------------------------------------------------- 1 | package influxqueries 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | influxModels "github.com/influxdata/influxdb/models" 9 | ) 10 | 11 | func TestExecuteQuery(t *testing.T) { 12 | cases := []MockClient{ 13 | { //Expect client to throw error before getting result 14 | t: t, 15 | expectedQuery: "query 1", 16 | expectedError: fmt.Errorf("error"), 17 | }, { //Expect client to return a result with an error 18 | t: t, 19 | expectedQuery: "query 2", 20 | expectedResponse: &influx.Response{ 21 | Err: "some error in response", 22 | }, 23 | errorInResponse: "some error in response", 24 | }, { // Expect client to return empty result, no error 25 | t: t, 26 | expectedQuery: "query 3", 27 | expectedResponse: &influx.Response{ 28 | Results: []influx.Result{}, 29 | }, 30 | }, { // Expect client to return a non-empty result, no error 31 | t: t, 32 | expectedQuery: "query 4", 33 | expectedResponse: &influx.Response{ 34 | Results: []influx.Result{ 35 | { 36 | Series: []influxModels.Row{}, 37 | }, 38 | }, 39 | }, 40 | }} 41 | 42 | expectedDatabaseName := "database name" 43 | queryExecutor := &defaultInfluxQueryService{} 44 | for _, mockClient := range cases { 45 | var client influx.Client 46 | client = &mockClient 47 | response, err := queryExecutor.ExecuteQuery(client, expectedDatabaseName, mockClient.expectedQuery) 48 | if mockClient.expectedError != nil && err != mockClient.expectedError { 49 | // An error was expected, not from the content of the Response 50 | t.Errorf("Expected to fail with: <%v>, received error was: <%v>", mockClient.expectedError, err) 51 | } 52 | 53 | if mockClient.errorInResponse != "" && err.Error() != mockClient.errorInResponse { 54 | // An error was expected from Response.Error() to be returned 55 | t.Errorf("expected to fail with: %v, received error was: %v", mockClient.errorInResponse, err) 56 | } 57 | 58 | // No response shold have been returned 59 | if mockClient.expectedResponse == nil && response != nil { 60 | t.Errorf("expected response: nil, receivedResponse: %v", response) 61 | } else if mockClient.expectedResponse != nil && response == nil && mockClient.errorInResponse == "" { 62 | // It was expected that no response be returned, but not because of an error in the Response content 63 | t.Errorf("expected response: %v, received: nil", mockClient.expectedResponse) 64 | } else if response != nil && mockClient.expectedResponse != nil { 65 | // It was expected that the same object was returned as a response as the expectedResponse 66 | if len(response) != len(mockClient.expectedResponse.Results) { 67 | t.Errorf( 68 | "expected response length: %d, received response length: %d", 69 | len(mockClient.expectedResponse.Results), 70 | len(response), 71 | ) 72 | } 73 | } 74 | } 75 | } 76 | 77 | func TestExecuteShowQueryWithFailure(t *testing.T) { 78 | database := "database" 79 | queryExecutor := &defaultInfluxQueryService{} 80 | _, err := queryExecutor.ExecuteShowQuery(nil, database, "NO SHOW query") 81 | if err == nil { 82 | t.Error("expected to fail because query didn't start with 'SHOW '") 83 | } 84 | 85 | badCases := []MockClient{ 86 | { //Expect error to be thrown when executing the query, no response 87 | t: t, 88 | expectedQuery: "ShOw something0", 89 | expectedError: fmt.Errorf("error"), 90 | }, { //Expect client to return a single result with no errors 91 | t: t, 92 | expectedQuery: "SHOW something1", 93 | expectedResponse: &influx.Response{ 94 | Results: []influx.Result{ 95 | {}, 96 | {}, 97 | }, 98 | }, 99 | }, { // Expect client to return a single result with multiple series 100 | t: t, 101 | expectedQuery: "SHOW something2", 102 | expectedResponse: &influx.Response{ 103 | Results: []influx.Result{ 104 | { 105 | Series: []influxModels.Row{ 106 | {}, 107 | {}, 108 | }, 109 | }, 110 | }, 111 | }, 112 | }, { // Expect client to return a result with values not castable to string 113 | t: t, 114 | expectedQuery: "SHOW something3", 115 | expectedResponse: &influx.Response{ 116 | Results: []influx.Result{ 117 | { 118 | Series: []influxModels.Row{ 119 | { 120 | Values: [][]interface{}{{1}}, 121 | }, 122 | }, 123 | }, 124 | }, 125 | }, 126 | }} 127 | 128 | for _, badCase := range badCases { 129 | var client influx.Client 130 | client = &badCase 131 | _, err := queryExecutor.ExecuteShowQuery(client, database, badCase.expectedQuery) 132 | if err == nil { 133 | t.Error("error not returned when expecting ") 134 | } 135 | } 136 | 137 | } 138 | 139 | func TestExecuteShowQueryWithOkResults(t *testing.T) { 140 | database := "database" 141 | goodQuery := "SHOW something" 142 | goodValue := "1" 143 | var goodCaseWithResults influx.Client 144 | goodCaseWithResults = &MockClient{ 145 | t: t, 146 | expectedQuery: goodQuery, 147 | expectedResponse: &influx.Response{ 148 | Results: []influx.Result{ 149 | { 150 | Series: []influxModels.Row{ 151 | { 152 | Values: [][]interface{}{{goodValue}}, 153 | }, 154 | }, 155 | }, 156 | }, 157 | }, 158 | } 159 | 160 | queryExecutor := &defaultInfluxQueryService{} 161 | 162 | response, err := queryExecutor.ExecuteShowQuery(goodCaseWithResults, database, goodQuery) 163 | if err != nil { 164 | t.Errorf("Expected no error to happen. Got '%s'", err.Error()) 165 | } 166 | 167 | if response == nil || response.Values == nil { 168 | t.Errorf("Expected a response with non-nil values. Got %v", response) 169 | } 170 | 171 | values := response.Values 172 | if len(values) != 1 || len(values[0]) != 1 && values[0][0] != goodValue { 173 | t.Errorf("Expected one row with one value and that value to be '%s', but got '%v'", goodValue, response) 174 | } 175 | 176 | var goodCaseNoResults influx.Client 177 | goodCaseNoResults = &MockClient{ 178 | t: t, 179 | expectedQuery: goodQuery, 180 | expectedResponse: &influx.Response{ 181 | Results: []influx.Result{ 182 | { 183 | Series: []influxModels.Row{}, 184 | }, 185 | }, 186 | }, 187 | } 188 | 189 | response, err = queryExecutor.ExecuteShowQuery(goodCaseNoResults, database, goodQuery) 190 | if err != nil { 191 | t.Errorf("Expected no error to happen. Got '%s'", err.Error()) 192 | } 193 | 194 | if response == nil || response.Values == nil { 195 | t.Errorf("Expected a response with non-nil values. Got %v", response) 196 | } 197 | 198 | values = response.Values 199 | if len(values) != 0 { 200 | t.Errorf("Expected an empty values matrix, but got '%v'", response) 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /internal/schemamanagement/influx/influxqueries/mock_influx_client.go: -------------------------------------------------------------------------------- 1 | package influxqueries 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | influx "github.com/influxdata/influxdb/client/v2" 9 | ) 10 | 11 | // MockClient mocks an InfluxDB client 12 | type MockClient struct { 13 | t *testing.T 14 | expectedQuery string 15 | expectedResponse *influx.Response 16 | expectedError error 17 | errorInResponse string 18 | closeCalled bool 19 | } 20 | 21 | // Ping mock 22 | func (mc *MockClient) Ping(timeout time.Duration) (time.Duration, string, error) { 23 | return timeout, "", nil 24 | } 25 | 26 | // Write mock 27 | func (mc *MockClient) Write(bp influx.BatchPoints) error { 28 | return nil 29 | } 30 | 31 | // Query mock 32 | func (mc *MockClient) Query(q influx.Query) (*influx.Response, error) { 33 | if q.Command != mc.expectedQuery { 34 | errorString := fmt.Sprintf("Expected <%s> as a query command, got: <%s>", mc.expectedQuery, q.Command) 35 | mc.t.Error(errorString) 36 | return nil, fmt.Errorf(errorString) 37 | } 38 | 39 | return mc.expectedResponse, mc.expectedError 40 | } 41 | 42 | // QueryAsChunk mock 43 | func (mc *MockClient) QueryAsChunk(q influx.Query) (*influx.ChunkedResponse, error) { 44 | return nil, nil 45 | } 46 | 47 | // Close mock 48 | func (mc *MockClient) Close() error { 49 | mc.closeCalled = true 50 | return nil 51 | } 52 | -------------------------------------------------------------------------------- /internal/schemamanagement/schema_manager.go: -------------------------------------------------------------------------------- 1 | package schemamanagement 2 | 3 | import ( 4 | "github.com/timescale/outflux/internal/idrf" 5 | "github.com/timescale/outflux/internal/schemamanagement/schemaconfig" 6 | ) 7 | 8 | // SchemaManager defines methods for schema discovery and preparation 9 | type SchemaManager interface { 10 | DiscoverDataSets() ([]string, error) 11 | FetchDataSet(dataSetIdentifier string) (*idrf.DataSet, error) 12 | PrepareDataSet(*idrf.DataSet, schemaconfig.SchemaStrategy) error 13 | } 14 | -------------------------------------------------------------------------------- /internal/schemamanagement/schema_manager_service.go: -------------------------------------------------------------------------------- 1 | package schemamanagement 2 | 3 | import ( 4 | influx "github.com/influxdata/influxdb/client/v2" 5 | "github.com/timescale/outflux/internal/connections" 6 | influxSchema "github.com/timescale/outflux/internal/schemamanagement/influx" 7 | "github.com/timescale/outflux/internal/schemamanagement/influx/discovery" 8 | tsSchema "github.com/timescale/outflux/internal/schemamanagement/ts" 9 | ) 10 | 11 | // SchemaManagerService defines methods for creating SchemaManagers 12 | type SchemaManagerService interface { 13 | Influx(client influx.Client, db, rp string, onConflictConvertIntToFloat bool) SchemaManager 14 | TimeScale(dbConn connections.PgxWrap, schema, chunkTimeInterval string) SchemaManager 15 | } 16 | 17 | // NewSchemaManagerService returns an instance of SchemaManagerService 18 | func NewSchemaManagerService(measureExplorer discovery.MeasureExplorer, tagExplorer discovery.TagExplorer, fieldExplorer discovery.FieldExplorer) SchemaManagerService { 19 | return &schemaManagerService{ 20 | tagExplorer: tagExplorer, 21 | fieldExplorer: fieldExplorer, 22 | measureExplorer: measureExplorer, 23 | } 24 | } 25 | 26 | type schemaManagerService struct { 27 | tagExplorer discovery.TagExplorer 28 | fieldExplorer discovery.FieldExplorer 29 | measureExplorer discovery.MeasureExplorer 30 | } 31 | 32 | // Influx creates new schema manager that can discover influx data sets 33 | func (s *schemaManagerService) Influx(client influx.Client, db, rp string, onConflictConvertIntToFloat bool) SchemaManager { 34 | return influxSchema.NewSchemaManager(client, db, rp, onConflictConvertIntToFloat, s.measureExplorer, s.tagExplorer, s.fieldExplorer) 35 | } 36 | 37 | func (s *schemaManagerService) TimeScale(dbConn connections.PgxWrap, schema, chunkTimeInterval string) SchemaManager { 38 | return tsSchema.NewTSSchemaManager(dbConn, schema, chunkTimeInterval) 39 | } 40 | -------------------------------------------------------------------------------- /internal/schemamanagement/schemaconfig/schema_strategy.go: -------------------------------------------------------------------------------- 1 | package schemaconfig 2 | 3 | import "fmt" 4 | 5 | // SchemaStrategy is an enum representing what the ingestor should do 6 | // regarding the schema in the target database 7 | type SchemaStrategy int 8 | 9 | // Enum values for SchemaStrategy 10 | const ( 11 | // Validate that the selected database matches the IDRF data set info 12 | ValidateOnly SchemaStrategy = iota + 1 13 | // Create the data set info if it's missing, fail if incompatible 14 | CreateIfMissing 15 | // Drop existing table and create a new one 16 | DropAndCreate 17 | // DROP CASCADE existing table and create a new one 18 | DropCascadeAndCreate 19 | ) 20 | 21 | func (s SchemaStrategy) String() string { 22 | switch s { 23 | case ValidateOnly: 24 | return "ValidateOnly" 25 | case CreateIfMissing: 26 | return "CreateIfMissing" 27 | case DropCascadeAndCreate: 28 | return "DropCascadeAndCreate" 29 | case DropAndCreate: 30 | return "DropAndCreate" 31 | default: 32 | panic("unknown type") 33 | } 34 | } 35 | 36 | // ParseStrategyString returns the enum value matching the string, or an error 37 | func ParseStrategyString(strategy string) (SchemaStrategy, error) { 38 | switch strategy { 39 | case "ValidateOnly": 40 | return ValidateOnly, nil 41 | case "CreateIfMissing": 42 | return CreateIfMissing, nil 43 | case "DropCascadeAndCreate": 44 | return DropCascadeAndCreate, nil 45 | case "DropAndCreate": 46 | return DropAndCreate, nil 47 | default: 48 | return ValidateOnly, fmt.Errorf("unknown schema strategy '%s'", strategy) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/hypertable_dimension_explorer_i_test.go: -------------------------------------------------------------------------------- 1 | // +build integration 2 | 3 | package ts 4 | 5 | import ( 6 | "fmt" 7 | "testing" 8 | 9 | "github.com/timescale/outflux/internal/testutils" 10 | ) 11 | 12 | func TestIsTimePartitionedBy(t *testing.T) { 13 | db := "test_time_partitioned_by" 14 | if err := testutils.DeleteTimescaleDb(db); err != nil { 15 | t.Fatalf("could not prepare db: %v", err) 16 | } 17 | 18 | if err := testutils.CreateTimescaleDb(db); err != nil { 19 | t.Fatalf("could not prepare db: %v", err) 20 | } 21 | 22 | defer testutils.DeleteTimescaleDb(db) 23 | 24 | checker := defaultHypertableDimensionExplorer{} 25 | 26 | notHypertable := "not_hypertable" 27 | wrongPartitionType := "partitioned_by_int" 28 | wrongPartitioningCol := "partitioned_by_other_name" 29 | wrongCol := "wrong_column" 30 | okTable := "good_hypertable" 31 | okCol := "ok_column" 32 | 33 | dbConn, err := testutils.OpenTSConn(db) 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | defer dbConn.Close() 38 | 39 | createOkTableQuery := fmt.Sprintf("CREATE TABLE %s (%s TIMESTAMPTZ NOT NULL)", okTable, okCol) 40 | createOkHypertable := fmt.Sprintf("SELECT create_hypertable('%s','%s')", okTable, okCol) 41 | createNotHypertable := fmt.Sprintf("CREATE TABLE %s(%s int)", notHypertable, okCol) 42 | createWrongPartColumnType := fmt.Sprintf("CREATE TABLE %s(%s INTEGER NOT NULL)", wrongPartitionType, okCol) 43 | createWrongPartColHypertable := fmt.Sprintf("SELECT create_hypertable('%s','%s', chunk_time_interval => 100)", wrongPartitionType, okCol) 44 | createWrongPartColumnName := fmt.Sprintf("CREATE TABLE %s(%s TIMESTAMP NOT NULL)", wrongPartitioningCol, wrongCol) 45 | createWrongPartColNameHypertable := fmt.Sprintf("SELECT create_hypertable('%s', '%s')", wrongPartitioningCol, wrongCol) 46 | 47 | dbConn.Exec(createOkTableQuery) 48 | dbConn.Exec(createOkHypertable) 49 | dbConn.Exec(createNotHypertable) 50 | dbConn.Exec(createWrongPartColumnType) 51 | dbConn.Exec(createWrongPartColHypertable) 52 | dbConn.Exec(createWrongPartColumnName) 53 | dbConn.Exec(createWrongPartColNameHypertable) 54 | tcs := []struct { 55 | table string 56 | timeCol string 57 | expectRes bool 58 | }{ 59 | {table: notHypertable}, 60 | {table: wrongPartitionType}, 61 | {table: wrongPartitioningCol, timeCol: okCol}, 62 | {table: okTable, timeCol: okCol, expectRes: true}, 63 | } 64 | 65 | for _, tc := range tcs { 66 | res, err := checker.isTimePartitionedBy(dbConn, "", tc.table, tc.timeCol) 67 | if err != nil { 68 | t.Fatalf("unexpected error: %v", err) 69 | } 70 | if res != tc.expectRes { 71 | t.Fatalf("expected %v, got %v", tc.expectRes, res) 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/idrr_to_pg_type_test.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | func TestIdrfToPgType(t *testing.T) { 10 | testCases := []struct { 11 | in idrf.DataType 12 | out string 13 | }{ 14 | {idrf.IDRFBoolean, "BOOLEAN"}, 15 | {idrf.IDRFDouble, "FLOAT"}, 16 | {idrf.IDRFInteger32, "INTEGER"}, 17 | {idrf.IDRFInteger64, "BIGINT"}, 18 | {idrf.IDRFString, "TEXT"}, 19 | {idrf.IDRFTimestamp, "TIMESTAMP"}, 20 | {idrf.IDRFTimestamptz, "TIMESTAMPTZ"}, 21 | {idrf.IDRFSingle, "FLOAT"}, 22 | {idrf.IDRFJson, "JSONB"}, 23 | } 24 | 25 | for _, tc := range testCases { 26 | res := idrfToPgType(tc.in) 27 | if res != tc.out { 28 | t.Errorf("Expected:%v\ngot:%v", tc.out, res) 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/pg_type_to_idrf_test.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | func TestPgTypeToIdrf(t *testing.T) { 10 | testCases := []struct { 11 | in string 12 | out idrf.DataType 13 | }{ 14 | {"text", idrf.IDRFString}, 15 | {"timestamp with time zone", idrf.IDRFTimestamptz}, 16 | {"timestamp without time zone", idrf.IDRFTimestamp}, 17 | {"double precision", idrf.IDRFDouble}, 18 | {"integer", idrf.IDRFInteger32}, 19 | {"bigint", idrf.IDRFInteger64}, 20 | {"jsonb", idrf.IDRFJson}, 21 | {"boolean", idrf.IDRFBoolean}, 22 | {"asdasd", idrf.IDRFUnknown}, 23 | } 24 | 25 | for _, tc := range testCases { 26 | res := pgTypeToIdrf(tc.in) 27 | if res != tc.out { 28 | t.Errorf("Expected %v, got %v", tc.out, res) 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/table_compatibility.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | func isExistingTableCompatible(existingColumns []*columnDesc, requiredColumns []*idrf.Column, timeCol string) error { 10 | columnsByName := make(map[string]*columnDesc) 11 | for _, column := range existingColumns { 12 | columnsByName[column.columnName] = column 13 | } 14 | 15 | for _, reqColumn := range requiredColumns { 16 | colName := reqColumn.Name 17 | var existingCol *columnDesc 18 | var ok bool 19 | if existingCol, ok = columnsByName[colName]; !ok { 20 | return fmt.Errorf("Required column %s not found in existing table", colName) 21 | } 22 | 23 | existingType := pgTypeToIdrf(existingCol.dataType) 24 | if !existingType.CanFitInto(reqColumn.DataType) { 25 | return fmt.Errorf( 26 | "Required column %s of type %s is not compatible with existing type %s", 27 | colName, reqColumn.DataType, existingType) 28 | } 29 | 30 | // Only time column is allowed to have a NOT NULL constraint 31 | if !existingCol.isColumnNullable() && existingCol.columnName != timeCol { 32 | return fmt.Errorf("Existing column %s is not nullable. Can't guarantee data transfer", existingCol.columnName) 33 | } 34 | } 35 | 36 | return nil 37 | } 38 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/table_compatibility_test.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | func TestExistingTableCompatible(t *testing.T) { 10 | testCases := []struct { 11 | existingColumns []*columnDesc 12 | reqColumns []*idrf.Column 13 | timeCol string 14 | desc string 15 | errorExpected bool 16 | }{ 17 | { 18 | existingColumns: []*columnDesc{}, 19 | reqColumns: []*idrf.Column{{Name: "a"}}, 20 | desc: "required column not found in existing table", 21 | errorExpected: true, 22 | }, { 23 | existingColumns: []*columnDesc{{columnName: "a", dataType: "text"}}, 24 | reqColumns: []*idrf.Column{{Name: "a", DataType: idrf.IDRFBoolean}}, 25 | desc: "required data type is incompatible with existing column type", 26 | errorExpected: true, 27 | }, { 28 | existingColumns: []*columnDesc{ 29 | {columnName: "a", dataType: "text"}, 30 | {columnName: "b", dataType: "text", isNullable: "NO"}}, 31 | reqColumns: []*idrf.Column{ 32 | {Name: "a", DataType: idrf.IDRFString}, 33 | {Name: "b", DataType: idrf.IDRFString}}, 34 | timeCol: "a", 35 | desc: "only time column should be not-nullable", 36 | errorExpected: true, 37 | }, { 38 | existingColumns: []*columnDesc{ 39 | {columnName: "a", dataType: "text"}, 40 | {columnName: "b", dataType: "text", isNullable: "YES"}}, 41 | reqColumns: []*idrf.Column{ 42 | {Name: "a", DataType: idrf.IDRFString}, 43 | {Name: "b", DataType: idrf.IDRFString}}, 44 | timeCol: "a", 45 | desc: "all is good", 46 | errorExpected: false, 47 | }, 48 | } 49 | 50 | for _, testCase := range testCases { 51 | err := isExistingTableCompatible(testCase.existingColumns, testCase.reqColumns, testCase.timeCol) 52 | if testCase.errorExpected && err == nil { 53 | t.Errorf("Tested: %s.\nExpected an error. None returned", testCase.desc) 54 | } 55 | 56 | if !testCase.errorExpected && err != nil { 57 | t.Errorf("Tested: %s.\nError wasn't expected, got:\n%v", testCase.desc, err) 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/table_creator.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "strings" 7 | "time" 8 | 9 | "github.com/timescale/outflux/internal/connections" 10 | "github.com/timescale/outflux/internal/idrf" 11 | ) 12 | 13 | const ( 14 | createTableQueryTemplate = `CREATE TABLE %s(%s)` 15 | columnDefTemplate = `"%s" %s` 16 | tableNameTemplate = `"%s"` 17 | timeColTemplate = tableNameTemplate 18 | tableNameWithSchemaTemplate = `"%s"."%s"` 19 | createHTWithChunkIntervalQueryTemplate = `SELECT create_hypertable('%s', '%s', chunk_time_interval => interval '%s');` 20 | createHTQueryTemplate = `SELECT create_hypertable('%s', '%s');` 21 | createTimescaleExtensionQuery = "CREATE EXTENSION IF NOT EXISTS timescaledb" 22 | metadataKey = "outflux_last_usage" 23 | getMetadataTemplate = `SELECT EXISTS (SELECT 1 FROM "%s"."%s" WHERE key = $1)` 24 | insertMetadataTemplate = "INSERT INTO %s.%s VALUES($1, $2)" 25 | updateMetadataTemplate = "UPDATE %s.%s SET value=$1 WHERE key=$2" 26 | ) 27 | 28 | type tableCreator interface { 29 | CreateTable(connections.PgxWrap, *idrf.DataSet) error 30 | CreateHypertable(connections.PgxWrap, *idrf.DataSet) error 31 | CreateTimescaleExtension(connections.PgxWrap) error 32 | UpdateMetadata(db connections.PgxWrap, metadataTableName string) error 33 | } 34 | 35 | func newTableCreator(schema, chunkTimeInterval string) tableCreator { 36 | return &defaultTableCreator{schema: schema, chunkTimeInterval: chunkTimeInterval} 37 | } 38 | 39 | type defaultTableCreator struct { 40 | schema string 41 | chunkTimeInterval string 42 | } 43 | 44 | func (d *defaultTableCreator) CreateTable(dbConn connections.PgxWrap, info *idrf.DataSet) error { 45 | query := dataSetToSQLTableDef(d.schema, info) 46 | log.Printf("Creating table with:\n %s", query) 47 | 48 | if _, err := dbConn.Exec(query); err != nil { 49 | return err 50 | } 51 | 52 | if err := d.CreateTimescaleExtension(dbConn); err != nil { 53 | return err 54 | } 55 | 56 | return d.CreateHypertable(dbConn, info) 57 | } 58 | 59 | func (d *defaultTableCreator) CreateHypertable(dbConn connections.PgxWrap, info *idrf.DataSet) error { 60 | var hypertableName string 61 | 62 | if d.schema != "" { 63 | hypertableName = fmt.Sprintf(tableNameWithSchemaTemplate, d.schema, info.DataSetName) 64 | } else { 65 | hypertableName = fmt.Sprintf(tableNameTemplate, info.DataSetName) 66 | } 67 | 68 | var hypertableQuery string 69 | if d.chunkTimeInterval != "" { 70 | hypertableQuery = fmt.Sprintf(createHTWithChunkIntervalQueryTemplate, hypertableName, info.TimeColumn, d.chunkTimeInterval) 71 | } else { 72 | hypertableQuery = fmt.Sprintf(createHTQueryTemplate, hypertableName, info.TimeColumn) 73 | } 74 | 75 | log.Printf("Creating hypertable with: %s", hypertableQuery) 76 | _, err := dbConn.Exec(hypertableQuery) 77 | return err 78 | } 79 | 80 | func (d *defaultTableCreator) CreateTimescaleExtension(dbConn connections.PgxWrap) error { 81 | log.Printf("Preparing TimescaleDB extension:\n%s", createTimescaleExtensionQuery) 82 | _, err := dbConn.Exec(createTimescaleExtensionQuery) 83 | return err 84 | } 85 | 86 | func (d *defaultTableCreator) UpdateMetadata(dbConn connections.PgxWrap, metadataTableName string) error { 87 | log.Printf("Updating Timescale metadata") 88 | metadataQuery := fmt.Sprintf(getMetadataTemplate, timescaleCatalogSchema, metadataTableName) 89 | rows, err := dbConn.Query(metadataQuery, metadataKey) 90 | if err != nil { 91 | return fmt.Errorf("could not check if Outflux metadata already exists. %v", err) 92 | } 93 | exists := false 94 | if !rows.Next() { 95 | rows.Close() 96 | return fmt.Errorf("could not check if Outflux metadata already exists. %v", err) 97 | } 98 | err = rows.Scan(&exists) 99 | if err != nil { 100 | rows.Close() 101 | return fmt.Errorf("could not check if Outflux installation metadata already exists. %v", err) 102 | } 103 | 104 | rows.Close() 105 | currentDateTime := time.Now().Format(time.RFC3339) 106 | if exists { 107 | updateMetadata := fmt.Sprintf(updateMetadataTemplate, timescaleCatalogSchema, metadataTableName) 108 | _, err = dbConn.Exec(updateMetadata, currentDateTime, metadataKey) 109 | } else { 110 | insertMetadata := fmt.Sprintf(insertMetadataTemplate, timescaleCatalogSchema, metadataTableName) 111 | _, err = dbConn.Exec(insertMetadata, metadataKey, currentDateTime) 112 | } 113 | return err 114 | } 115 | 116 | func dataSetToSQLTableDef(schema string, dataSet *idrf.DataSet) string { 117 | columnDefinitions := make([]string, len(dataSet.Columns)) 118 | for i, column := range dataSet.Columns { 119 | dataType := idrfToPgType(column.DataType) 120 | columnDefinitions[i] = fmt.Sprintf(columnDefTemplate, column.Name, dataType) 121 | } 122 | 123 | columnsString := strings.Join(columnDefinitions, ", ") 124 | 125 | var tableName string 126 | if schema != "" { 127 | tableName = fmt.Sprintf(tableNameWithSchemaTemplate, schema, dataSet.DataSetName) 128 | } else { 129 | tableName = fmt.Sprintf(tableNameTemplate, dataSet.DataSetName) 130 | } 131 | 132 | return fmt.Sprintf(createTableQueryTemplate, tableName, columnsString) 133 | } 134 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/table_dropper.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/timescale/outflux/internal/connections" 8 | ) 9 | 10 | const ( 11 | dropTableQueryTemplate = "DROP TABLE \"%s\"" 12 | dropTableCascadeQueryTemplate = "DROP TABLE \"%s\" CASCADE" 13 | ) 14 | 15 | type tableDropper interface { 16 | Drop(db connections.PgxWrap, table string, cascade bool) error 17 | } 18 | 19 | type defaultTableDropper struct{} 20 | 21 | func newTableDropper() tableDropper { 22 | return &defaultTableDropper{} 23 | } 24 | func (d *defaultTableDropper) Drop(db connections.PgxWrap, table string, cascade bool) error { 25 | var query string 26 | if cascade { 27 | query = fmt.Sprintf(dropTableCascadeQueryTemplate, table) 28 | } else { 29 | query = fmt.Sprintf(dropTableQueryTemplate, table) 30 | } 31 | 32 | log.Printf("Executing: %s", query) 33 | _, err := db.Exec(query) 34 | if err != nil { 35 | return err 36 | } 37 | return nil 38 | } 39 | -------------------------------------------------------------------------------- /internal/schemamanagement/ts/type_conversion.go: -------------------------------------------------------------------------------- 1 | package ts 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | func pgTypeToIdrf(pgType string) idrf.DataType { 10 | lowerCaseType := strings.ToLower(pgType) 11 | switch lowerCaseType { 12 | case "text": 13 | return idrf.IDRFString 14 | case "timestamp with time zone": 15 | return idrf.IDRFTimestamptz 16 | case "timestamp without time zone": 17 | return idrf.IDRFTimestamp 18 | case "double precision": 19 | return idrf.IDRFDouble 20 | case "integer": 21 | return idrf.IDRFInteger32 22 | case "bigint": 23 | return idrf.IDRFInteger64 24 | case "jsonb": 25 | return idrf.IDRFJson 26 | case "json": 27 | return idrf.IDRFJson 28 | case "boolean": 29 | return idrf.IDRFBoolean 30 | default: 31 | return idrf.IDRFUnknown 32 | } 33 | } 34 | 35 | func idrfToPgType(dataType idrf.DataType) string { 36 | switch dataType { 37 | case idrf.IDRFBoolean: 38 | return "BOOLEAN" 39 | case idrf.IDRFDouble: 40 | return "FLOAT" 41 | case idrf.IDRFInteger32: 42 | return "INTEGER" 43 | case idrf.IDRFString: 44 | return "TEXT" 45 | case idrf.IDRFTimestamp: 46 | return "TIMESTAMP" 47 | case idrf.IDRFTimestamptz: 48 | return "TIMESTAMPTZ" 49 | case idrf.IDRFInteger64: 50 | return "BIGINT" 51 | case idrf.IDRFSingle: 52 | return "FLOAT" 53 | case idrf.IDRFJson: 54 | return "JSONB" 55 | default: 56 | panic("Unexpected value") 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /internal/testutils/config.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | const ( 4 | // InfluxHost is the default InfluxDB host to be used in integration tests 5 | InfluxHost = "http://localhost:8086" 6 | // TsConnStringTemplate is the conn string for the default Timescale host to be used in integration tests 7 | TsConnStringTemplate = "user=postgres password=postgres port=5433 dbname=%s sslmode=disable" 8 | defaultPgDb = "postgres" 9 | ) 10 | -------------------------------------------------------------------------------- /internal/testutils/server_preparation.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | influx "github.com/influxdata/influxdb/client/v2" 8 | "github.com/jackc/pgx" 9 | "github.com/timescale/outflux/internal/schemamanagement/influx/influxqueries" 10 | ) 11 | 12 | // PrepareServersForITest Creates a database with the same name on the default influx server and default timescale server 13 | func PrepareServersForITest(db string) error { 14 | if err := CreateInfluxDB(db); err != nil { 15 | return err 16 | } 17 | 18 | return CreateTimescaleDb(db) 19 | } 20 | 21 | // ClearServersAfterITest Deletes a database on both the default influx and timescale servers 22 | func ClearServersAfterITest(db string) { 23 | if err := DeleteInfluxDb(db); err != nil { 24 | log.Printf("could not delete influx db: %v", err) 25 | } 26 | 27 | if err := DeleteTimescaleDb(db); err != nil { 28 | log.Printf("could not delete influx db: %v", err) 29 | } 30 | } 31 | 32 | func newInfluxClient() (influx.Client, error) { 33 | clientConfig := influx.HTTPConfig{ 34 | Addr: InfluxHost, 35 | } 36 | 37 | return influx.NewHTTPClient(clientConfig) 38 | } 39 | 40 | // CreateInfluxDB creates a new influx database to the default influx server. Used for integration tests 41 | func CreateInfluxDB(db string) error { 42 | queryService := influxqueries.NewInfluxQueryService() 43 | newClient, err := newInfluxClient() 44 | if err != nil { 45 | return err 46 | } 47 | _, err = queryService.ExecuteQuery(newClient, db, "CREATE DATABASE "+db) 48 | newClient.Close() 49 | return err 50 | } 51 | 52 | // DeleteInfluxDb deletes a influx database on the default influx server. Used for integration tests 53 | func DeleteInfluxDb(db string) error { 54 | queryService := influxqueries.NewInfluxQueryService() 55 | client, err := newInfluxClient() 56 | if err != nil { 57 | return err 58 | } 59 | 60 | _, err = queryService.ExecuteQuery(client, db, "DROP DATABASE "+db) 61 | client.Close() 62 | return err 63 | 64 | } 65 | 66 | // CreateInfluxMeasure creates a measure with the specified name. For each point the tags and field values are given 67 | // as maps 68 | func CreateInfluxMeasure(db, measure string, tags []*map[string]string, values []*map[string]interface{}) error { 69 | client, err := newInfluxClient() 70 | if err != nil { 71 | return err 72 | } 73 | 74 | bp, _ := influx.NewBatchPoints(influx.BatchPointsConfig{Database: db}) 75 | 76 | for i, tagSet := range tags { 77 | point, _ := influx.NewPoint( 78 | measure, 79 | *tagSet, 80 | *values[i], 81 | ) 82 | bp.AddPoint(point) 83 | } 84 | 85 | client.Close() 86 | return client.Write(bp) 87 | } 88 | 89 | // CreateInfluxMeasureWithRP creates a measure with the specified name and specified RP. For each point the tags and field values are given 90 | // as maps 91 | func CreateInfluxMeasureWithRP(db, rp, measure string, tags []*map[string]string, values []*map[string]interface{}) error { 92 | client, err := newInfluxClient() 93 | if err != nil { 94 | return err 95 | } 96 | 97 | bp, _ := influx.NewBatchPoints(influx.BatchPointsConfig{Database: db, RetentionPolicy: rp}) 98 | 99 | for i, tagSet := range tags { 100 | point, _ := influx.NewPoint( 101 | measure, 102 | *tagSet, 103 | *values[i], 104 | ) 105 | bp.AddPoint(point) 106 | } 107 | 108 | client.Close() 109 | return client.Write(bp) 110 | } 111 | 112 | // CreateInfluxRP creates a retention policy with the specified name and 1 day duration 113 | // as maps 114 | func CreateInfluxRP(db, rp string) error { 115 | client, err := newInfluxClient() 116 | if err != nil { 117 | return err 118 | } 119 | 120 | queryStr := fmt.Sprintf(`CREATE RETENTION POLICY "%s" ON %s DURATION 1d REPLICATION 1`, rp, db) 121 | query := influx.NewQuery(queryStr, db, "") 122 | 123 | _, err = client.Query(query) 124 | client.Close() 125 | return err 126 | } 127 | 128 | // CreateTimescaleDb creates a new database on the default server and then creates the extension on it 129 | func CreateTimescaleDb(db string) error { 130 | dbConn, err := OpenTSConn(defaultPgDb) 131 | if err != nil { 132 | return err 133 | } 134 | _, err = dbConn.Exec("CREATE DATABASE " + db) 135 | dbConn.Close() 136 | return err 137 | } 138 | 139 | // CreateTimescaleSchema creates a new schema in the specified db 140 | func CreateTimescaleSchema(db, schema string) error { 141 | dbConn, err := OpenTSConn(db) 142 | if err != nil { 143 | return err 144 | } 145 | _, err = dbConn.Exec("CREATE SCHEMA " + schema) 146 | dbConn.Close() 147 | return err 148 | } 149 | 150 | // OpenTSConn opens a connection to a TimescaleDB with the default (super admin) user/pass 151 | func OpenTSConn(db string) (*pgx.Conn, error) { 152 | connString := fmt.Sprintf(TsConnStringTemplate, db) 153 | connConfig, _ := pgx.ParseConnectionString(connString) 154 | log.Printf("opening ts conn to '%s' with:\n%s", db, connString) 155 | return pgx.Connect(connConfig) 156 | } 157 | 158 | // OpenTsConnWithUser opens a connection to a TimescaleDB with the supplied user and pass 159 | func OpenTsConnWithUser(db, user, pass string) (*pgx.Conn, error) { 160 | connString := fmt.Sprintf("user=%s password=%s port=5433 dbname=%s sslmode=disable", user, pass, db) 161 | connConfig, _ := pgx.ParseConnectionString(connString) 162 | log.Printf("opening ts conn to '%s' with:\n%s", db, connString) 163 | return pgx.Connect(connConfig) 164 | } 165 | 166 | // CreateNonAdminInTS creates a user with login and password 167 | func CreateNonAdminInTS(user, pass string) error { 168 | connString := fmt.Sprintf(TsConnStringTemplate, defaultPgDb) 169 | connConfig, _ := pgx.ParseConnectionString(connString) 170 | log.Printf("opening ts conn to '%s' with: %s", defaultPgDb, connString) 171 | db, err := pgx.Connect(connConfig) 172 | if err != nil { 173 | return err 174 | } 175 | _, err = db.Exec(fmt.Sprintf("create user %s with login password '%s'", user, pass)) 176 | return err 177 | } 178 | 179 | // DeleteTimescaleDb drops a databass on the default server 180 | func DeleteTimescaleDb(db string) error { 181 | dbConn, err := OpenTSConn(defaultPgDb) 182 | if err != nil { 183 | return err 184 | } 185 | 186 | _, err = dbConn.Exec("DROP DATABASE IF EXISTS " + db) 187 | dbConn.Close() 188 | return err 189 | } 190 | -------------------------------------------------------------------------------- /internal/transformation/jsoncombiner/column_combiner.go: -------------------------------------------------------------------------------- 1 | package jsoncombiner 2 | 3 | import ( 4 | "github.com/timescale/outflux/internal/idrf" 5 | ) 6 | 7 | type columnCombiner interface { 8 | combine([]*idrf.Column, map[string]bool, string) []*idrf.Column 9 | } 10 | 11 | type defColCombiner struct{} 12 | 13 | // combine takes an array of the original column definitions (originalColumns), a set 14 | // of column names (columnNamesToReplace) which will be replaced with a single JSON type column, 15 | // and resultColumn is the name of the new JSON column. 16 | // The arguments have already been validated. 17 | // resultColumnName column is placed at the position of the first column designated to be combined/replaced 18 | func (d *defColCombiner) combine( 19 | originalColumns []*idrf.Column, 20 | columnNamesToReplace map[string]bool, 21 | resultColumnName string) []*idrf.Column { 22 | jsonColumnAdded := false 23 | numNewColumns := len(originalColumns) - len(columnNamesToReplace) + 1 24 | newColumns := make([]*idrf.Column, numNewColumns) 25 | currentColumn := 0 26 | for _, originalColumn := range originalColumns { 27 | _, shouldReplaceColumn := columnNamesToReplace[originalColumn.Name] 28 | if shouldReplaceColumn && !jsonColumnAdded { 29 | newColumns[currentColumn], _ = idrf.NewColumn(resultColumnName, idrf.IDRFJson) 30 | jsonColumnAdded = true 31 | } else if shouldReplaceColumn && jsonColumnAdded { 32 | continue 33 | } else { 34 | newColumns[currentColumn] = originalColumn 35 | } 36 | 37 | currentColumn++ 38 | } 39 | 40 | return newColumns 41 | } 42 | -------------------------------------------------------------------------------- /internal/transformation/jsoncombiner/column_combiner_test.go: -------------------------------------------------------------------------------- 1 | package jsoncombiner 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | func TestCombiner(t *testing.T) { 10 | cols := []*idrf.Column{ 11 | {Name: "col1", DataType: idrf.IDRFBoolean}, 12 | {Name: "col2", DataType: idrf.IDRFDouble}, 13 | {Name: "col3", DataType: idrf.IDRFInteger32}, 14 | {Name: "col4", DataType: idrf.IDRFSingle}, 15 | } 16 | 17 | resCol := &idrf.Column{Name: "res", DataType: idrf.IDRFJson} 18 | testCases := []struct { 19 | desc string 20 | cols []*idrf.Column 21 | toCombine map[string]bool 22 | expect []*idrf.Column 23 | }{ 24 | { 25 | desc: "combine cols in the middle", 26 | cols: cols, 27 | toCombine: map[string]bool{cols[1].Name: true, cols[2].Name: true}, 28 | expect: []*idrf.Column{cols[0], resCol, cols[3]}, 29 | }, { 30 | desc: "combine cols at end", 31 | cols: cols, 32 | toCombine: map[string]bool{cols[2].Name: true, cols[3].Name: true}, 33 | expect: []*idrf.Column{cols[0], cols[1], resCol}, 34 | }, { 35 | desc: "combine cols at beginning", 36 | cols: cols, 37 | toCombine: map[string]bool{cols[0].Name: true, cols[1].Name: true}, 38 | expect: []*idrf.Column{resCol, cols[2], cols[3]}, 39 | }, { 40 | desc: "combine cols that are not adjacent", 41 | cols: cols, 42 | toCombine: map[string]bool{cols[1].Name: true, cols[3].Name: true}, 43 | expect: []*idrf.Column{cols[0], resCol, cols[2]}, 44 | }, 45 | } 46 | 47 | combiner := &defColCombiner{} 48 | for _, tc := range testCases { 49 | cols := combiner.combine(tc.cols, tc.toCombine, resCol.Name) 50 | if len(cols) != len(tc.expect) { 51 | t.Errorf("test: %s\nexpected length: %d, got: %d", tc.desc, len(cols), len(tc.expect)) 52 | continue 53 | } 54 | 55 | for i, col := range cols { 56 | if col.Name != tc.expect[i].Name || col.DataType != tc.expect[i].DataType { 57 | t.Errorf("test: %s\nat position %d expected name:%s, type:%d\ngot name:%s, type:%d", 58 | tc.desc, i, tc.expect[i].Name, tc.expect[i].DataType, col.Name, col.DataType) 59 | } 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /internal/transformation/jsoncombiner/json_creator.go: -------------------------------------------------------------------------------- 1 | package jsoncombiner 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | type jsonCreator interface { 10 | toJSON(row idrf.Row) ([]byte, error) 11 | } 12 | 13 | type defCreator struct { 14 | colsToCombine map[int]string 15 | } 16 | 17 | func (d *defCreator) toJSON(row idrf.Row) ([]byte, error) { 18 | data := make(map[string]interface{}) 19 | for colInd, colName := range d.colsToCombine { 20 | val := row[colInd] 21 | data[colName] = val 22 | } 23 | 24 | return json.Marshal(data) 25 | } 26 | -------------------------------------------------------------------------------- /internal/transformation/jsoncombiner/transformer.go: -------------------------------------------------------------------------------- 1 | package jsoncombiner 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/timescale/outflux/internal/idrf" 8 | "github.com/timescale/outflux/internal/utils" 9 | ) 10 | 11 | // Transformer defines a Transformer that combines specified columns into one json column 12 | type Transformer struct { 13 | id string 14 | columnsToCombine map[string]bool 15 | resultColumn string 16 | bufferSize uint16 17 | cachedInputBundle *idrf.Bundle 18 | cachedOutputBundle *idrf.Bundle 19 | combinedIndexes map[int]string 20 | validator validator 21 | colColmbiner columnCombiner 22 | jsonCreator jsonCreator 23 | } 24 | 25 | // NewTransformer returns a new instance of a transformer that combines multiple columns 26 | // into one JSON column 27 | func NewTransformer(id string, columnsToCombine []string, resultColumn string) (*Transformer, error) { 28 | if columnsToCombine == nil || len(columnsToCombine) == 0 { 29 | return nil, fmt.Errorf("at least one column must be selected for combination") 30 | } 31 | 32 | if resultColumn == "" { 33 | return nil, fmt.Errorf("result column can't be an empty string") 34 | } 35 | 36 | columnsSet := make(map[string]bool) 37 | for _, colName := range columnsToCombine { 38 | columnsSet[colName] = true 39 | } 40 | 41 | return &Transformer{ 42 | id: id, columnsToCombine: columnsSet, resultColumn: resultColumn, 43 | validator: &defValidator{id: id}, colColmbiner: &defColCombiner{}, 44 | }, nil 45 | } 46 | 47 | // ID returns a string that identifies the transformer instance 48 | func (c *Transformer) ID() string { 49 | return c.id 50 | } 51 | 52 | // Prepare verifies that the transformation can be executed, creates the output channel 53 | // and the transformed data set definition and returns them as a idrf.Bundle 54 | func (c *Transformer) Prepare(input *idrf.Bundle) (*idrf.Bundle, error) { 55 | originDataSet := input.DataDef 56 | 57 | validationErr := c.validator.validate(originDataSet, c.resultColumn, c.columnsToCombine) 58 | if validationErr != nil { 59 | return nil, validationErr 60 | } 61 | 62 | newColumns := c.colColmbiner.combine(originDataSet.Columns, c.columnsToCombine, c.resultColumn) 63 | newDataSet, err := idrf.NewDataSet(originDataSet.DataSetName, newColumns, originDataSet.TimeColumn) 64 | if err != nil { 65 | return nil, fmt.Errorf("%s: could not generate the transformed data set definition.\nProblem was:%v", c.id, err) 66 | } 67 | 68 | c.cacheItems(input, newDataSet) 69 | return c.cachedOutputBundle, nil 70 | } 71 | 72 | // Start consumes the data channel sent as an argument in Prepare 73 | // for each row in the channel it combines some columns as a single JSON column 74 | // and feeds the transformed row to the channel returned in Prepare 75 | func (c *Transformer) Start(errChan chan error) error { 76 | if c.cachedInputBundle == nil || c.cachedOutputBundle == nil { 77 | return fmt.Errorf("%s: Prepare must be called before Start", c.id) 78 | } 79 | 80 | defer close(c.cachedOutputBundle.DataChan) 81 | log.Printf("%s: starting transformation", c.id) 82 | if err := utils.CheckError(errChan); err != nil { 83 | log.Printf("%s: error received from outside, aborting:%v", c.id, err) 84 | return nil 85 | } 86 | 87 | inputData := c.cachedInputBundle.DataChan 88 | outputChannel := c.cachedOutputBundle.DataChan 89 | for row := range inputData { 90 | transformed, err := c.transformRow(row) 91 | if err != nil { 92 | return err 93 | } 94 | outputChannel <- transformed 95 | } 96 | 97 | return nil 98 | } 99 | 100 | func (c *Transformer) cacheItems(input *idrf.Bundle, output *idrf.DataSet) { 101 | dataDef := input.DataDef 102 | c.cachedInputBundle = input 103 | combinedColumnIndexes := make(map[int]string) 104 | 105 | for i, col := range dataDef.Columns { 106 | _, isCombined := c.columnsToCombine[col.Name] 107 | if isCombined { 108 | combinedColumnIndexes[i] = col.Name 109 | } 110 | } 111 | 112 | c.combinedIndexes = combinedColumnIndexes 113 | 114 | c.cachedOutputBundle = &idrf.Bundle{ 115 | DataDef: output, 116 | DataChan: make(chan idrf.Row, c.bufferSize), 117 | } 118 | 119 | c.jsonCreator = &defCreator{ 120 | colsToCombine: combinedColumnIndexes, 121 | } 122 | } 123 | 124 | func (c *Transformer) transformRow(row idrf.Row) (idrf.Row, error) { 125 | jsonVal, err := c.jsonCreator.toJSON(row) 126 | if err != nil { 127 | return nil, fmt.Errorf("%s: could not combine some of the columns into JSON\n%v", c.id, err) 128 | } 129 | 130 | newRow := make([]interface{}, len(c.cachedOutputBundle.DataDef.Columns)) 131 | currentCol := 0 132 | jsonAdded := false 133 | for i, val := range row { 134 | _, isCombined := c.combinedIndexes[i] 135 | if !jsonAdded && isCombined { 136 | newRow[currentCol] = jsonVal 137 | jsonAdded = true 138 | } else if jsonAdded && isCombined { 139 | continue 140 | } else { 141 | newRow[currentCol] = val 142 | } 143 | 144 | currentCol++ 145 | } 146 | 147 | return newRow, nil 148 | } 149 | -------------------------------------------------------------------------------- /internal/transformation/jsoncombiner/validator.go: -------------------------------------------------------------------------------- 1 | package jsoncombiner 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | const ( 10 | errResColumnEmptyFmt = "%s: resulting column (after combination) can't be an empty string" 11 | errCombinedColumnIsDuplicateFmt = "%s: naming combined columns '%s' will result in duplicate column names to exist in result data set" 12 | errTimeColumnCombinedFmt = "%s: time column '%s' of origin data can't be combined in a JSON column" 13 | errUnknownColumnsForCombinationFmt = "%s: column to be combined '%s' not found in origin data set\nOrigin data set:%s" 14 | ) 15 | 16 | type validator interface { 17 | validate(originData *idrf.DataSet, resCol string, columnsToCombine map[string]bool) error 18 | } 19 | 20 | type defValidator struct { 21 | id string 22 | } 23 | 24 | func (v *defValidator) validate(originData *idrf.DataSet, resCol string, columnsToCombine map[string]bool) error { 25 | _, resColumnNamedAsACombinedColumn := columnsToCombine[resCol] 26 | if !resColumnNamedAsACombinedColumn && originData.ColumnNamed(resCol) != nil { 27 | return fmt.Errorf(errCombinedColumnIsDuplicateFmt, v.id, resCol) 28 | } 29 | 30 | if _, timeIsInCombined := columnsToCombine[originData.TimeColumn]; timeIsInCombined { 31 | return fmt.Errorf(errTimeColumnCombinedFmt, v.id, originData.TimeColumn) 32 | } 33 | 34 | for reqColumnName := range columnsToCombine { 35 | if originData.ColumnNamed(reqColumnName) == nil { 36 | return fmt.Errorf(errUnknownColumnsForCombinationFmt, v.id, reqColumnName, originData.String()) 37 | } 38 | } 39 | 40 | return nil 41 | } 42 | -------------------------------------------------------------------------------- /internal/transformation/jsoncombiner/validator_test.go: -------------------------------------------------------------------------------- 1 | package jsoncombiner 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/timescale/outflux/internal/idrf" 7 | ) 8 | 9 | func TestValidator(t *testing.T) { 10 | twoCol := []*idrf.Column{ 11 | {Name: "col1", DataType: idrf.IDRFTimestamp}, 12 | {Name: "col2", DataType: idrf.IDRFBoolean}, 13 | } 14 | 15 | threeCol := []*idrf.Column{ 16 | {Name: "col1", DataType: idrf.IDRFTimestamp}, 17 | {Name: "col2", DataType: idrf.IDRFBoolean}, 18 | {Name: "col3", DataType: idrf.IDRFDouble}, 19 | } 20 | testCases := []struct { 21 | desc string 22 | originData *idrf.DataSet 23 | toCombine map[string]bool 24 | res string 25 | expectErr bool 26 | }{ 27 | { 28 | desc: "res column named the same as a column not designated for combination", 29 | expectErr: true, 30 | res: threeCol[1].Name, 31 | toCombine: map[string]bool{threeCol[2].Name: true}, 32 | originData: &idrf.DataSet{DataSetName: "ds", Columns: threeCol, TimeColumn: threeCol[0].Name}, 33 | }, { 34 | desc: "time column can't be combined", 35 | originData: &idrf.DataSet{DataSetName: "ds", Columns: twoCol, TimeColumn: twoCol[0].Name}, 36 | toCombine: map[string]bool{twoCol[0].Name: true}, 37 | res: "res", 38 | expectErr: true, 39 | }, { 40 | desc: "column to be combined not in data set", 41 | originData: &idrf.DataSet{DataSetName: "ds", Columns: twoCol, TimeColumn: twoCol[0].Name}, 42 | toCombine: map[string]bool{twoCol[1].Name + "wrong": true}, 43 | res: "res", 44 | expectErr: true, 45 | }, { 46 | desc: "all ok", 47 | originData: &idrf.DataSet{DataSetName: "ds", Columns: threeCol, TimeColumn: threeCol[0].Name}, 48 | toCombine: map[string]bool{threeCol[1].Name: true, threeCol[2].Name: true}, 49 | res: "res", 50 | }, 51 | } 52 | 53 | val := &defValidator{} 54 | for _, testCase := range testCases { 55 | err := val.validate(testCase.originData, testCase.res, testCase.toCombine) 56 | if err == nil && testCase.expectErr { 57 | t.Errorf("test:%s\nexpected error, none got", testCase.desc) 58 | } else if err != nil && !testCase.expectErr { 59 | t.Errorf("test:%s\nunexpected error: %v", testCase.desc, err) 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /internal/transformation/transformer.go: -------------------------------------------------------------------------------- 1 | package transformation 2 | 3 | import "github.com/timescale/outflux/internal/idrf" 4 | 5 | // Transformer takes a data channel of idrf.Rows and transforms them to different rows 6 | type Transformer interface { 7 | // ID returns a string that identifies the transformer instance (all pipeline elements have it) 8 | ID() string 9 | // Prepare must be called before Start. It can be used to verify that the transformation is possible 10 | // Also, the input argument contains the data channel that will be consumed. 11 | // The returned bundle contains the data set definition after running the transformation and 12 | // a channel that will contain the transformed data 13 | Prepare(input *idrf.Bundle) (*idrf.Bundle, error) 14 | // Start consumes the data channel given in Prepare, transforms each Point/Row and feeds it to a channel 15 | // that was returned from Prepare 16 | Start(chan error) error 17 | } 18 | -------------------------------------------------------------------------------- /internal/utils/broadcaster_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "testing" 7 | ) 8 | 9 | func TestBroadcastAlreadyClosed(t *testing.T) { 10 | // an open channel is in the registry 11 | state := idInRegistry("id") 12 | state.alreadyClosed = true 13 | bc := &defaultBroadcaster{state} 14 | // broadcast on already closed, no error is sent to channel 15 | bc.Broadcast("id2", fmt.Errorf("error")) 16 | close(state.registry["id"]) 17 | broadcastErr := <-state.registry["id"] 18 | if broadcastErr != nil { 19 | t.Errorf("no error was expected, got: %v", broadcastErr) 20 | } 21 | } 22 | 23 | func TestBroadcastTwoChannels(t *testing.T) { 24 | // an open channel is in the registry 25 | state := twoIdsInRegistry("id", "id2") 26 | bc := &defaultBroadcaster{state} 27 | // broadcast only one channel receives the error, both get closed 28 | bc.Broadcast("id2", fmt.Errorf("error")) 29 | errFromChan1 := <-state.registry["id"] 30 | errFromChan2 := <-state.registry["id2"] 31 | if errFromChan1 == nil || errFromChan2 != nil { 32 | t.Error("expected only the first channel to receive an error") 33 | } 34 | 35 | if !state.alreadyClosed { 36 | t.Error("state should be closed after broadcast") 37 | } 38 | } 39 | 40 | func twoIdsInRegistry(id, id2 string) *state { 41 | reg := make(map[string]chan error) 42 | reg[id] = make(chan error, 1) 43 | reg[id2] = make(chan error, 1) 44 | return &state{reg, false, &sync.Mutex{}} 45 | } 46 | -------------------------------------------------------------------------------- /internal/utils/check_error_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestCheckError(t *testing.T) { 9 | testCases := []struct { 10 | desc string 11 | expected error 12 | given chan error 13 | }{ 14 | { 15 | desc: "Empty channel given, method doesn't block, returns nil", 16 | expected: nil, 17 | given: make(chan error), 18 | }, { 19 | desc: "Channel with error inside it given, returns that error", 20 | expected: fmt.Errorf("some error"), 21 | given: mockChannelWithError(), 22 | }, 23 | } 24 | 25 | got := CheckError(testCases[0].given) 26 | if got != nil { 27 | t.Errorf("Expected no error, got: %v", got) 28 | } 29 | 30 | got = CheckError(testCases[1].given) 31 | if got.Error() != testCases[1].expected.Error() { 32 | t.Errorf("Expected error: %v\nGot:%v", testCases[1].expected, got) 33 | } 34 | } 35 | 36 | func mockChannelWithError() chan error { 37 | cha := make(chan error, 1) 38 | cha <- fmt.Errorf("some error") 39 | return cha 40 | } 41 | -------------------------------------------------------------------------------- /internal/utils/closer_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "testing" 4 | 5 | func TestClose(t *testing.T) { 6 | state := idInRegistry("id") 7 | state.alreadyClosed = true 8 | close(state.registry["id"]) 9 | 10 | //will panic if Close attempts to close a closed channel 11 | cl := &defaultCloser{state} 12 | cl.Close() 13 | 14 | state = idInRegistry("id") 15 | cl = &defaultCloser{state} 16 | cl.Close() 17 | err := <-state.registry["id"] 18 | if err != nil { 19 | t.Errorf("channel should have been closed, and nil returned\ngot:%v", err) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /internal/utils/error_broadcaster.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | type subscriber interface { 9 | Subscribe(id string) (chan error, error) 10 | } 11 | 12 | type unsubscriber interface { 13 | Unsubscribe(id string) error 14 | } 15 | 16 | type broadcaster interface { 17 | Broadcast(source string, err error) 18 | } 19 | 20 | type closer interface { 21 | Close() 22 | } 23 | 24 | // ErrorBroadcaster allows anything to subscribe to receive errors on a channel 25 | type ErrorBroadcaster interface { 26 | subscriber 27 | unsubscriber 28 | broadcaster 29 | closer 30 | } 31 | 32 | // NewErrorBroadcaster creates a new instance of a specified type 33 | func NewErrorBroadcaster() ErrorBroadcaster { 34 | registry := make(map[string]chan error) 35 | state := &state{ 36 | registry: registry, 37 | alreadyClosed: false, 38 | lock: &sync.Mutex{}, 39 | } 40 | return &defaultErrorBroadcaster{ 41 | subscriber: &defaultSubscriber{state}, 42 | unsubscriber: &defaultUnsubscriber{state}, 43 | broadcaster: &defaultBroadcaster{state}, 44 | closer: &defaultCloser{state}, 45 | } 46 | } 47 | 48 | func newErrorBroadcasterWith(sub subscriber, unsub unsubscriber, brc broadcaster, cls closer) ErrorBroadcaster { 49 | return &defaultErrorBroadcaster{ 50 | sub, unsub, brc, cls, 51 | } 52 | } 53 | 54 | type defaultErrorBroadcaster struct { 55 | subscriber 56 | unsubscriber 57 | broadcaster 58 | closer 59 | } 60 | type state struct { 61 | registry map[string]chan error 62 | alreadyClosed bool 63 | lock *sync.Mutex 64 | } 65 | type defaultSubscriber struct { 66 | state *state 67 | } 68 | 69 | func (sub *defaultSubscriber) Subscribe(id string) (chan error, error) { 70 | state := sub.state 71 | state.lock.Lock() 72 | defer state.lock.Unlock() 73 | if state.alreadyClosed { 74 | return nil, fmt.Errorf("error subscriber is already closed") 75 | } 76 | 77 | if _, exists := state.registry[id]; exists { 78 | return nil, fmt.Errorf("id %s already registered for error updates", id) 79 | } 80 | 81 | newChannel := make(chan error, 1) 82 | state.registry[id] = newChannel 83 | return newChannel, nil 84 | } 85 | 86 | type defaultUnsubscriber struct { 87 | state *state 88 | } 89 | 90 | func (sub *defaultUnsubscriber) Unsubscribe(id string) error { 91 | var channel chan error 92 | var exists bool 93 | 94 | state := sub.state 95 | state.lock.Lock() 96 | defer state.lock.Unlock() 97 | 98 | if channel, exists = state.registry[id]; !exists { 99 | return fmt.Errorf("id %s not registered for error updates", id) 100 | } 101 | 102 | delete(state.registry, id) 103 | 104 | if !state.alreadyClosed { 105 | close(channel) 106 | } 107 | 108 | return nil 109 | } 110 | 111 | type defaultBroadcaster struct { 112 | state *state 113 | } 114 | 115 | func (sub *defaultBroadcaster) Broadcast(source string, err error) { 116 | state := sub.state 117 | state.lock.Lock() 118 | defer state.lock.Unlock() 119 | 120 | if state.alreadyClosed { 121 | return 122 | } 123 | 124 | for subID, errChannel := range state.registry { 125 | if subID != source { 126 | errChannel <- err 127 | } 128 | 129 | close(errChannel) 130 | } 131 | 132 | state.alreadyClosed = true 133 | } 134 | 135 | type defaultCloser struct { 136 | state *state 137 | } 138 | 139 | func (sub *defaultCloser) Close() { 140 | state := sub.state 141 | state.lock.Lock() 142 | defer state.lock.Unlock() 143 | 144 | if state.alreadyClosed { 145 | return 146 | } 147 | 148 | for _, errChannel := range state.registry { 149 | close(errChannel) 150 | } 151 | } 152 | 153 | // CheckError non-blockingly checks whether a channel has received an error 154 | func CheckError(errorChannel chan error) error { 155 | select { 156 | case err := <-errorChannel: 157 | return err 158 | default: 159 | return nil 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /internal/utils/error_broadcaster_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "testing" 4 | 5 | func TestNewErrorBroadcaster(t *testing.T) { 6 | errorBroadcaster := NewErrorBroadcaster() 7 | 8 | castBroadcaster := errorBroadcaster.(*defaultErrorBroadcaster) 9 | 10 | subscriber := castBroadcaster.subscriber.(*defaultSubscriber) 11 | unsubscriber := castBroadcaster.unsubscriber.(*defaultUnsubscriber) 12 | broadcaster := castBroadcaster.broadcaster.(*defaultBroadcaster) 13 | closer := castBroadcaster.closer.(*defaultCloser) 14 | subState := subscriber.state 15 | 16 | if subState != unsubscriber.state || subState != broadcaster.state || subState != closer.state { 17 | t.Errorf("state is not the same in all constituents") 18 | } 19 | 20 | if len(subState.registry) != 0 { 21 | t.Errorf("registry is not empty") 22 | } 23 | 24 | if subState.alreadyClosed { 25 | t.Errorf("new error broadcaster should not be closed") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /internal/utils/subscriber_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | ) 7 | 8 | func TestSubscriber(t *testing.T) { 9 | testCases := []struct { 10 | state *state 11 | id string 12 | expectError bool 13 | }{ 14 | {state: alreadyClosedState(), id: "id", expectError: true}, 15 | {state: idInRegistry("id"), id: "id", expectError: true}, 16 | {state: emptyState(), id: "id", expectError: false}, 17 | } 18 | 19 | for _, tc := range testCases { 20 | sub := &defaultSubscriber{tc.state} 21 | res, err := sub.Subscribe(tc.id) 22 | if err != nil && !tc.expectError { 23 | t.Errorf("error wasn't expected. got: %v", err) 24 | } 25 | 26 | if err == nil && tc.expectError { 27 | t.Error("error was expected, none received") 28 | } 29 | 30 | if tc.expectError { 31 | continue 32 | } 33 | 34 | if tc.state.registry[tc.id] != res { 35 | t.Error("channel returned from Subscribe is not the same as in the registry") 36 | } 37 | } 38 | } 39 | 40 | func alreadyClosedState() *state { 41 | return &state{make(map[string]chan error), true, &sync.Mutex{}} 42 | } 43 | 44 | func idInRegistry(id string) *state { 45 | reg := make(map[string]chan error) 46 | reg[id] = make(chan error) 47 | return &state{reg, false, &sync.Mutex{}} 48 | } 49 | 50 | func emptyState() *state { 51 | return &state{make(map[string]chan error), false, &sync.Mutex{}} 52 | } 53 | -------------------------------------------------------------------------------- /internal/utils/unsubscriber_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestUnsubscriber(t *testing.T) { 8 | a := make(map[string]bool) 9 | delete(a, "kure") 10 | testCases := []struct { 11 | state *state 12 | id string 13 | expectError bool 14 | }{ 15 | {state: emptyState(), id: "id", expectError: true}, 16 | {state: idInRegistry("id"), id: "id", expectError: false}, 17 | } 18 | 19 | for _, tc := range testCases { 20 | sub := &defaultUnsubscriber{tc.state} 21 | err := sub.Unsubscribe(tc.id) 22 | if err != nil && !tc.expectError { 23 | t.Errorf("error wasn't expected. got: %v", err) 24 | } 25 | 26 | if err == nil && tc.expectError { 27 | t.Error("error was expected, none received") 28 | } 29 | 30 | if tc.expectError { 31 | continue 32 | } 33 | 34 | if len(tc.state.registry) != 0 { 35 | t.Error("registry was not empty after unsubscribe") 36 | } 37 | } 38 | } 39 | 40 | func TestUnsubscribeWhenAlreadyClosed(t *testing.T) { 41 | state := idInRegistry("id") 42 | state.alreadyClosed = true 43 | close(state.registry["id"]) 44 | 45 | // if already closed, don't close the channel again 46 | sub := &defaultUnsubscriber{state} 47 | err := sub.Unsubscribe("id") 48 | if err != nil { 49 | t.Errorf("expected no error, got: %v", err) 50 | } 51 | 52 | if len(state.registry) != 0 { 53 | t.Errorf("expected registry to be empty, got: %v", state.registry) 54 | } 55 | } 56 | --------------------------------------------------------------------------------