├── docs
├── .gitbook
│ └── assets
│ │ ├── 1.png
│ │ ├── 2.png
│ │ ├── 3.png
│ │ ├── 4.png
│ │ └── overview.png
├── documentation
│ ├── integrations
│ │ ├── file-formats
│ │ │ ├── README.md
│ │ │ └── csv-format.md
│ │ ├── cloud-blob-storage
│ │ │ ├── README.md
│ │ │ └── amazon-s3.md
│ │ ├── README.md
│ │ └── databases
│ │ │ ├── README.md
│ │ │ ├── mysql.md
│ │ │ ├── mongodb.md
│ │ │ └── postgres.md
│ └── config
│ │ ├── README.md
│ │ ├── models.md
│ │ └── sources.md
├── concepts
│ ├── overview.md
│ ├── sources.md
│ ├── validation.md
│ └── models.md
├── getting-started
│ ├── hello-world
│ │ ├── README.md
│ │ ├── configuring-sources.md
│ │ └── creating-models.md
│ └── installation.md
├── SUMMARY.md
└── README.md
├── .env.example
├── Makefile
├── internal
├── engine
│ ├── fs.go
│ ├── query.go
│ ├── env_test.go
│ ├── config.go
│ ├── insert.go
│ ├── printer.go
│ ├── sources.go
│ ├── logging.go
│ ├── tables.go
│ ├── env.go
│ ├── duckdb.go
│ ├── mongo.go
│ ├── types_test.go
│ ├── retrieve.go
│ ├── snowflake.go
│ ├── mysql.go
│ ├── s3.go
│ ├── postgres.go
│ ├── types.go
│ ├── models.go
│ ├── metadata.go
│ └── columns.go
└── cli
│ ├── repl.go
│ ├── commands.go
│ └── app.go
├── main.go
├── .gitignore
├── .github
└── workflows
│ ├── ci.yaml
│ └── release.yaml
├── go.mod
├── README.md
├── LICENSE
└── go.sum
/docs/.gitbook/assets/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/1.png
--------------------------------------------------------------------------------
/docs/.gitbook/assets/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/2.png
--------------------------------------------------------------------------------
/docs/.gitbook/assets/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/3.png
--------------------------------------------------------------------------------
/docs/.gitbook/assets/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/4.png
--------------------------------------------------------------------------------
/docs/.gitbook/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/overview.png
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # DEBUG | INFO | WARN | ERROR
2 | PREEN_LOG_LEVEL=INFO
3 | # Config path for Preen
4 | PREEN_CONFIG_PATH=./.preen/
5 | # Model path for Preen
6 | PREEN_MODELS_PATH=./models/
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: integration-test
2 | integration-test: build
3 | build/ci/integration-test.sh
4 |
5 | .PHONY: build
6 | build:
7 | go build -o bin/preen main.go
8 |
9 | .PHONY: lint
10 | lint:
11 | golangci-lint run
12 |
13 | .PHONY: install-depenencies
14 | install-depenencies:
15 | brew install golangci-lint
16 |
17 | .PHONY: test
18 | test:
19 | go test -v ./...
--------------------------------------------------------------------------------
/docs/documentation/integrations/file-formats/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to connect to file systems.
3 | ---
4 |
5 | # File Formats
6 |
7 | The file formats are configured as a YAML file and contain configurations specific to the underlying file storage system. A full reference of all options can be found here.
8 |
9 | ## CSV
10 |
11 | Preen supports the following file formats for file-based sources:
12 |
13 | - [CSV](csv-format.md)
14 |
--------------------------------------------------------------------------------
/docs/concepts/overview.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: Show the overall system architecture.
3 | ---
4 |
5 | # Overview
6 |
7 | The core concepts of Preen are:
8 |
9 | - [Sources](sources.md)
10 | - [Models](models.md)
11 |
12 | These concepts are used to define the data you want to retrieve, the shape of the data, and the source of the data. The overall system architecture is as follows:
13 |
14 |
15 |
--------------------------------------------------------------------------------
/internal/engine/fs.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "os"
5 | "path/filepath"
6 | )
7 |
8 | // getYmlorYamlPath returns the path to the sources.yml or sources.yaml file.
9 | func getYmlorYamlPath(path string, fileName string) string {
10 | ymlFile := filepath.Join(path, fileName+".yml")
11 | yamlFile := filepath.Join(path, fileName+".yaml")
12 |
13 | if _, err := os.Stat(ymlFile); err == nil {
14 | return ymlFile
15 | }
16 |
17 | // Default return yaml, up to handlers to create if not exists
18 | return yamlFile
19 |
20 | }
21 |
--------------------------------------------------------------------------------
/docs/documentation/integrations/cloud-blob-storage/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to connect to cloud blob storage services like Amazon S3.
3 | ---
4 |
5 | # Cloud Blob Storage
6 |
7 | Preen can connect to cloud blob storage services like Amazon S3. This is useful for accessing data that is already in a data lake.
8 |
9 | ## Supported Integrations
10 |
11 | Preen currently supports the following cloud blob storage services:
12 |
13 | - [Amazon S3](amazon-s3.md)
14 |
15 | ## Code References
16 |
17 | - [s3.go](https://github.com/preendata/preen/blob/main/internal/engine/s3.go)
18 |
--------------------------------------------------------------------------------
/docs/getting-started/hello-world/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: Getting your first project running.
3 | ---
4 |
5 | # Hello World
6 |
7 | Getting start with Preen is as simple as connecting to the data sources you want to query, defining your model, and querying the result.
8 |
9 | The following pages provide a quick, low detail setup guide for those looking to get up and running on their own data ASAP.
10 |
11 | You can see how to configure Preen in the [Example repository](https://github.com/preendata/preen-template). You can also use this repository as a template for creating your first Preen project.
12 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "log"
5 | "os"
6 |
7 | "github.com/joho/godotenv"
8 | "github.com/preendata/preen/internal/cli"
9 | "github.com/preendata/preen/internal/engine"
10 | )
11 |
12 | func main() {
13 | err := godotenv.Load()
14 | if err != nil {
15 | if os.Getenv("PREEN_DEBUG") == "true" {
16 | log.Print("warn: error loading .env file", err)
17 | }
18 | }
19 |
20 | err = engine.Initialize()
21 | if err != nil {
22 | log.Print("error initializing logging", err)
23 | }
24 |
25 | app := cli.NewApp()
26 | if err := app.Run(os.Args); err != nil {
27 | engine.Fatal(err)
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/internal/engine/query.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | type QueryResults struct {
4 | Rows []map[string]any
5 | Columns []string
6 | ResultsChan chan map[string]any
7 | }
8 |
9 | var err error
10 |
11 | func Execute(statement string) (*QueryResults, error) {
12 | Debug("Executing query: " + statement)
13 | qr := QueryResults{
14 | ResultsChan: make(chan map[string]any),
15 | }
16 |
17 | go qr.collectResults(qr.ResultsChan)
18 |
19 | qr.Columns, err = ddbQuery(statement, qr.ResultsChan)
20 | if err != nil {
21 | return nil, err
22 | }
23 |
24 | return &qr, nil
25 | }
26 |
27 | func (qr *QueryResults) collectResults(c chan map[string]any) {
28 | for row := range c {
29 | qr.Rows = append(qr.Rows, row)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/docs/documentation/integrations/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: details of the integrations preen supports.
3 | ---
4 |
5 | # Integrations
6 |
7 | Preen supports a wide range of integrations, including databases, and file systems. We are currently adding support for more systems.
8 |
9 | ## Databases
10 |
11 | Preen supports the following SQL databases:
12 |
13 | - [Postgres](./databases/postgres.md)
14 | - [MySQL](./databases/mysql.md)
15 | - [MongoDB](./databases/mongodb.md)
16 |
17 | ## Cloud Blob Storage
18 |
19 | Preen supports the following cloud blob storage systems:
20 |
21 | - [Amazon S3](./cloud-blob-storage/amazon-s3.md)
22 |
23 | ## File Formats
24 |
25 | Preen supports the following file formats for file-based sources:
26 |
27 | - [CSV](./file-formats/csv-format.md)
28 |
--------------------------------------------------------------------------------
/docs/documentation/integrations/databases/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to connect to databases.
3 | ---
4 |
5 | # Databases
6 |
7 | Preen can connect to SQL and NoSQL databases. Our current implementation uses the Go [sql](https://pkg.go.dev/database/sql) and [pgx](https://github.com/jackc/pgx) libraries to connect to databases.
8 |
9 | ## Supported Integrations
10 |
11 | Preen currently supports the following SQL databases:
12 |
13 | - [Postgres](postgres.md)
14 | - [MySQL](mysql.md)
15 | - [MongoDB](mongodb.md)
16 |
17 | ## Code References
18 |
19 | - [mysql.go](https://github.com/preendata/preen/blob/main/internal/engine/mysql.go)
20 | - [postgres.go](https://github.com/preendata/preen/blob/main/internal/engine/postgres.go)
21 | - [mongo.go](https://github.com/preendata/preen/blob/main/internal/engine/mongo.go)
--------------------------------------------------------------------------------
/internal/engine/env_test.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "os"
5 | "testing"
6 | )
7 |
8 | func TestGetEnv(t *testing.T) {
9 | // Set up the environment variable
10 | os.Setenv("TEST_KEY", "test_value")
11 | defer os.Unsetenv("TEST_KEY")
12 |
13 | // Test cases
14 | tests := []struct {
15 | key string
16 | defaultValue string
17 | expected string
18 | required bool
19 | }{
20 | {"TEST_KEY", "default_value", "test_value", true},
21 | {"NON_EXISTENT_KEY", "default_value", "default_value", false},
22 | }
23 |
24 | for _, tt := range tests {
25 | t.Run(tt.key, func(t *testing.T) {
26 | result := getEnv(tt.key, tt.defaultValue, tt.required)
27 | if result != tt.expected {
28 | t.Errorf("GetEnv(%s, %s) = %s; want %s", tt.key, tt.defaultValue, result, tt.expected)
29 | }
30 | })
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/internal/engine/config.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import "fmt"
4 |
5 | const Version = "v0.2.4"
6 |
7 | func GetConfig(modelTarget string) (*SourceConfig, *ModelConfig, error) {
8 | sc, err := GetSourceConfig()
9 | if err != nil {
10 | return nil, nil, err
11 | }
12 |
13 | mc, err := GetModelConfigs(modelTarget)
14 | if err != nil {
15 | return nil, nil, err
16 | }
17 |
18 | return sc, mc, nil
19 | }
20 |
21 | func ValidateConfigs(sc *SourceConfig, mc *ModelConfig) error {
22 | if err := errorOnMissingModels(sc, mc); err != nil {
23 | return fmt.Errorf("error on missing models: %w", err)
24 | }
25 |
26 | if err := removeUnusedModels(sc, mc); err != nil {
27 | return fmt.Errorf("error removing unused models: %w", err)
28 | }
29 |
30 | if err := parseModels(mc); err != nil {
31 | return fmt.Errorf("error parsing models: %w", err)
32 | }
33 |
34 | return nil
35 | }
36 |
--------------------------------------------------------------------------------
/docs/documentation/config/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen.
3 | ---
4 |
5 | # Config
6 |
7 | Preen is configured using a YAML file. The config file is used to define the sources, models, and other configurations. You can customize the location of the config file by setting the `PREEN_CONFIG_PATH` environment variable. If no environment variable is set, Preen will look for a file called `~/.preen/sources.yaml`. You can also configure a custom path where Preen will look for model files by setting the `PREEN_MODELS_PATH` environment variable. If no environment variable is set, Preen will look for models configured in `~/.preen/models.yaml`.
8 |
9 | ## Config File Reference
10 |
11 | - [Sources](sources.md)
12 | - [Models](models.md)
13 |
14 | ## Code References
15 |
16 | - [env.go](https://github.com/preendata/preen/blob/main/internal/engine/env.go)
17 | - [config.go](https://github.com/preendata/preen/blob/main/internal/engine/config.go)
18 | - [sources.go](https://github.com/preendata/preen/blob/main/internal/engine/sources.go)
19 | - [models.go](https://github.com/preendata/preen/blob/main/internal/engine/models.go)
20 |
--------------------------------------------------------------------------------
/docs/getting-started/hello-world/configuring-sources.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: Define the data sources your Preen session can connect to
3 | ---
4 |
5 | # Configuring Sources
6 |
7 | Preen maintains a configuration file in `$HOME/.preen/sources.yml` by default. This is can be overridden via the `PREEN_CONFIG_PATH` environment variable.
8 |
9 | A config file might look like this:
10 |
11 | ```yaml
12 | sources:
13 | - name: s3-model
14 | engine: s3
15 | connection:
16 | bucket_name: users
17 | region: us-east-1
18 | - name: postgres-model
19 | engine: postgres
20 | connection:
21 | host: localhost
22 | port: 33061
23 | database: postgres
24 | username: root
25 | password: myp@assword
26 | - name: mongo-model
27 | engine: mongodb
28 | connection:
29 | host: ${MONGO_HOST}
30 | port: ${MONGO_PORT}
31 | database: mongo
32 | ```
33 |
34 | In a nutshell, your configuration is primarily a list of data sources, credentials, and their engine classification (see [config](../../documentation/config/ "mention")for list of supported engines). **Be sure to add this file to your `.gitignore` if you are keeping it somewhere version controlled.**
35 |
--------------------------------------------------------------------------------
/docs/concepts/sources.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: what is a source?
3 | ---
4 |
5 | # Sources
6 |
7 | ## Overview
8 |
9 | A Preen **Source** is any data storage system that is listed under the [integrations](../documentation/integrations/ "mention") section, such as a relational database (e.g. Postgres, MySQL etc.), NoSQL database (MongoDB) or file store (Amazon S3).
10 |
11 | ## Definition
12 |
13 | A Source is a storage system-dependent configuration that specifies:
14 |
15 | 1. The name of the source
16 | 2. The type of the source
17 | 3. The connection details for the source
18 |
19 | ## Examples
20 |
21 | ### Databases
22 |
23 | ```yaml
24 | sources:
25 | - name: users-db-us-east-1
26 | engine: mysql
27 | connection:
28 | host: localhost
29 | port: 5432
30 | database: mydatabase
31 | user: ${DB_USER}
32 | password: ${DB_PASSWORD}
33 | models:
34 | - users
35 | ```
36 |
37 | ### Amazon S3
38 |
39 | ```yaml
40 | sources:
41 | - name: users-s3-us-east-1
42 | engine: s3
43 | connection:
44 | bucket_name: users-bucket
45 | region: us-east-1
46 | models:
47 | - users
48 | ```
49 |
50 | For detailed configuration reference see [sources.md](../documentation/config/sources.md "mention")
--------------------------------------------------------------------------------
/docs/concepts/validation.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how is data validated?
3 | ---
4 |
5 | # Validation
6 |
7 | ## Overview
8 |
9 | When collating data from multiple sources, it is possible that the data types of the columns do not match. For example, a column may be defined as a `string` in one source and as an `int` in another. Preen will attempt to coerce the data types of the columns to the most common data type across all sources. We do this by implementing a [majority voting algorithm](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_majority_vote_algorithm). If we are unable to determine the data type of a column, we will error out and require manual intervention.
10 |
11 | **Note:** There will be cases where you need to manually cast the data types of the columns in your model.
12 |
13 | We store the results of the validation step in a DuckDB table called `preen_information_schema`. You can use this table to inspect the results of the validation step and to cast the data types of the columns in your model.
14 |
15 | ## CLI Commmands
16 |
17 | ```bash
18 | preen source validate
19 | ```
20 |
21 | ## Code References
22 |
23 | - [metadata.go](https://github.com/preendata/preen/blob/main/internal/engine/metadata.go)
24 | - [columns.go](https://github.com/preendata/preen/blob/main/internal/engine/columns.go)
25 |
--------------------------------------------------------------------------------
/docs/SUMMARY.md:
--------------------------------------------------------------------------------
1 | # Table of contents
2 |
3 | * [Preen](README.md)
4 |
5 | ## Getting Started
6 |
7 | * [Installation](getting-started/installation.md)
8 | * [Hello World](getting-started/hello-world/README.md)
9 | * [Configuring Sources](getting-started/hello-world/configuring-sources.md)
10 | * [Creating Models](getting-started/hello-world/creating-models.md)
11 |
12 | ## Concepts
13 |
14 | * [Overview](concepts/overview.md)
15 | * [Sources](concepts/sources.md)
16 | * [Models](concepts/models.md)
17 | * [Validation](concepts/validation.md)
18 |
19 | ## Documentation
20 |
21 | * [Config](documentation/config/README.md)
22 | * [Sources](documentation/config/sources.md)
23 | * [Models](documentation/config/models.md)
24 | * [Integrations](documentation/integrations/README.md)
25 | * [Databases](documentation/integrations/databases/README.md)
26 | * [Postgres](documentation/integrations/databases/postgres.md)
27 | * [MySQL](documentation/integrations/databases/mysql.md)
28 | * [MongoDB](documentation/integrations/databases/mongodb.md)
29 | * [Cloud Blob Storage](documentation/integrations/cloud-blob-storage/README.md)
30 | * [Amazon S3](documentation/integrations/cloud-blob-storage/amazon-s3.md)
31 | * [File Formats](documentation/integrations/file-formats/README.md)
32 | * [CSV](documentation/integrations/file-formats/csv-format.md)
33 |
--------------------------------------------------------------------------------
/docs/documentation/integrations/databases/mysql.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to connect to MySQL databases.
3 | ---
4 |
5 | # MySQL
6 |
7 | Preen uses the [sql](https://pkg.go.dev/database/sql) library to connect to MySQL databases.
8 |
9 | ## Example Preen Source Configuration
10 |
11 | ```yaml
12 | # FILENAME: ~/.preen/sources.yaml
13 | sources:
14 | - name: mysql-example
15 | engine: mysql
16 | connection:
17 | host: localhost
18 | port: 3306
19 | database: mysql
20 | username: ${MYSQL_USER} # You can specify environment variables in the sources.yaml file.
21 | password: ${MYSQL_PASSWORD}
22 | ```
23 |
24 | ## MySQL Models
25 |
26 | MySQL models are defined as a YAML file that contains a SQL query.
27 |
28 | ```yaml
29 | # FILENAME: ~/.preen/models/users.yaml
30 | name: users # This name needs to be unique
31 | type: sql
32 | query: |
33 | select
34 | users.id,
35 | users.first_name,
36 | users.last_name,
37 | users.birthday
38 | from
39 | users;
40 | ```
41 |
42 | ## MySQL Type Mappings
43 |
44 | A comprehensive list of MySQL type mappings can be found [here](https://github.com/preendata/preen/blob/main/internal/engine/types.go#L190-L240).
45 |
46 | ## Code References
47 |
48 | - [types.go](https://github.com/preendata/preen/blob/main/internal/engine/types.go)
49 | - [postgres.go](https://github.com/preendata/preen/blob/main/internal/engine/mysql.go)
--------------------------------------------------------------------------------
/docs/getting-started/installation.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to install preen.
3 | ---
4 |
5 | # Installation
6 |
7 | You can install Preen a few different ways. Note that the binary installation is the easiest method if you want to get started quickly. We support building from source if you want to have a local copy of the application code and make changes.
8 |
9 | ## Homebrew
10 |
11 | Download the executable via our Homebrew cask.
12 |
13 | ```
14 | brew tap preendata/preen
15 | brew install preen
16 | ```
17 |
18 | ## Download binary
19 |
20 | You can download a binary for your operating system and architecture from the [GitHub Releases](https://github.com/preendata/preen/releases) page.
21 |
22 | ```bash
23 | # Using curl
24 | sh -c "$(curl -fsSL https://raw.githubusercontent.com/preendata/preen/main/build/install.sh)"
25 | ```
26 |
27 | ```bash
28 | # Using wget
29 | sh -c "$(wget https://raw.githubusercontent.com/preendata/preen/main/build/install.sh -O -)"
30 | ```
31 |
32 | ## Build from source
33 |
34 | To build Preen from source, you need to have Go 1.23.0 or later installed on your system. Then, you can build the application using the following commands:
35 |
36 | ```bash
37 | git clone https://github.com/preendata/preen.git
38 | cd preen
39 | make build
40 | ```
41 |
42 | This will create a `preen` binary in the `bin` directory. You can add this to your `PATH` if you want to use the `preen` command from anywhere.
43 |
44 | ### Validation
45 |
46 | Test that you've correctly installed the application by executing
47 |
48 | ```bash
49 | preen -h
50 | ```
51 |
--------------------------------------------------------------------------------
/docs/documentation/integrations/databases/mongodb.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to connect to MongoDB databases.
3 | ---
4 |
5 | # MongoDB
6 |
7 | Preen can connect to MongoDB databases. Our current implementation uses the Go [mongo](https://pkg.go.dev/go.mongodb.org/mongo-driver/mongo) library to connect to databases.
8 |
9 | ## Example Preen Source Configuration
10 |
11 | ```yaml
12 | # FILENAME: ~/.preen/sources.yaml
13 | sources:
14 | - name: mongo-example
15 | engine: mongodb
16 | connection:
17 | host: localhost
18 | port: 27117
19 | database: preendb
20 | username: ${MONGODB_USERNAME}
21 | password: ${MONGODB_PASSWORD}
22 | auth_source: admin
23 | ```
24 |
25 | ## Mongo Database Models
26 |
27 | MongoDB models are defined as a YAML file that contains a MongoDB document filter. This filter is used to match documents in the database and return the data that matches the filter. The documents are written to DuckDB as a JSON column for local querying using the native [JSON querying capabilities of DuckDB](https://duckdb.org/docs/extensions/json.html).
28 |
29 | ```yaml
30 | # FILENAME: ~/.preen/models/users.yaml
31 | name: users-mongodb
32 | type: mongodb
33 | collection: users # The name of the collection to query.
34 | query: |
35 | {
36 | "login_attempts": {
37 | "$gt": 1
38 | },
39 | "account_status": {
40 | "$in": ["inactive", "suspended"]
41 | }
42 | }
43 | ```
44 |
45 | ## Code References
46 |
47 | - [mongo.go](https://github.com/preendata/preen/blob/main/internal/engine/mongo.go)
48 |
--------------------------------------------------------------------------------
/docs/documentation/integrations/databases/postgres.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to connect to Postgres databases.
3 | ---
4 |
5 | # Postgres
6 |
7 | Preen uses the [pgx](https://github.com/jackc/pgx) library to connect to Postgres databases.
8 |
9 | ## Example Preen Source Configuration
10 |
11 | ```yaml
12 | # FILENAME: ~/.preen/sources.yaml
13 | sources:
14 | - name: postgres-example
15 | engine: postgres
16 | connection:
17 | host: localhost
18 | port: 5432
19 | database: postgres
20 | username: ${PG_USER} # You can specify environment variables in the sources.yaml file.
21 | password: ${PG_PASSWORD}
22 | ```
23 |
24 | ## Postgres Models
25 |
26 | Postgres models are defined as a YAML file that contains a SQL query.
27 |
28 | ```yaml
29 | # FILENAME: ~/.preen/models/users.yaml
30 | name: users # This name needs to be unique
31 | type: sql
32 | query: |
33 | select
34 | users.id,
35 | users.first_name,
36 | users.last_name,
37 | users.birthday
38 | from
39 | users;
40 | ```
41 |
42 | ## Postgres Type Mappings
43 |
44 | A comprehensive list of Postgres type mappings can be found [here](https://github.com/preendata/preen/blob/main/internal/engine/types.go#L190-L240). We use the [pgtype](https://pkg.go.dev/github.com/jackc/pgtype) library to map Postgres types to Go types, with a few custom mappings for things like `float64`, `duration`, and `time` types.
45 |
46 | ## Code References
47 |
48 | - [types.go](https://github.com/preendata/preen/blob/main/internal/engine/types.go)
49 | - [postgres.go](https://github.com/preendata/preen/blob/main/internal/engine/postgres.go)
50 |
--------------------------------------------------------------------------------
/internal/engine/insert.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "database/sql/driver"
5 | "fmt"
6 | )
7 |
8 | func Insert(modelName ModelName, ic <-chan []driver.Value, dc chan<- []int64) {
9 | connector, err := ddbCreateConnector()
10 | if err != nil {
11 | panic(err)
12 | }
13 | appender, err := ddbNewAppender(connector, "main", string(modelName))
14 | if err != nil {
15 | panic(err)
16 | }
17 | rowCounter := 0
18 | for message := range ic {
19 | if message[0] == "quit" {
20 | break
21 | }
22 | Debug(fmt.Sprintf("Inserting row: %+v", message))
23 |
24 | if err := appender.AppendRow(message...); err != nil {
25 | Error(fmt.Sprintf("Failed to append row: %v", err))
26 | Error(fmt.Sprintf("Row data: %+v", message))
27 | panic(err)
28 | }
29 | rowCounter++
30 | if rowCounter%10000000 == 0 {
31 | Debug(fmt.Sprintf(
32 | "Flushing 10M rows from appender to DuckDB for model: %s, %d", modelName, rowCounter,
33 | ))
34 | if err := appender.Flush(); err != nil {
35 | panic(err)
36 | }
37 | }
38 | }
39 | if err = appender.Close(); err != nil {
40 | panic(err)
41 | }
42 | dc <- []int64{int64(rowCounter)}
43 | }
44 |
45 | func ConfirmInsert(modelName string, dc chan []int64, rowsExpected int64) {
46 | for message := range dc {
47 | if rowsExpected == 0 {
48 | Debug(fmt.Sprintf("Inserted %d rows into model %s", message[0], modelName))
49 | break
50 | }
51 | if message[0] == rowsExpected {
52 | Debug(fmt.Sprintf("Inserted %d rows into model %s. Expected %d rows", message[0], modelName, rowsExpected))
53 | break
54 | }
55 | if message[0] != rowsExpected {
56 | Error(fmt.Sprintf("Inserted %d rows into model %s. Expected %d rows", message[0], modelName, rowsExpected))
57 | break
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/docs/documentation/config/models.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen models.
3 | ---
4 |
5 | # Models
6 |
7 | Preen models are defined as a YAML file. The model file is used to define the data sources, the query to be executed, and the type of query to be executed.
8 |
9 | ## Model Configuration Options
10 |
11 | | Option | Description | Required | Applicable Types |
12 | | --------------- | ----------------------------------------------------------------------- | ----------------------- | ----------------------------------- |
13 | | `name` | The unique name of the model | Yes | All |
14 | | `type` | The type of the model (e.g.`database`, `file`) | Yes | All |
15 | | `format` | The format of the data (e.g. csv) | Only for `file` type | `file` |
16 | | `query` | The query to be executed | Yes for `database` type | `database` |
17 | | `options` | Additional options for the model (e.g., file format, delimiter, header) | No | All (specific options vary by type) |
18 | | `file_patterns` | The file patterns to be used for matching files | Only for `file` type | `file` |
19 | | `collection` | The name of the collection to query | Only for `database` type | Used for MongoDB sources |
20 |
21 | ## Code References
22 |
23 | * [models.go](../../../internal/engine/models.go)
24 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout:
3 | title:
4 | visible: true
5 | description:
6 | visible: false
7 | tableOfContents:
8 | visible: true
9 | outline:
10 | visible: true
11 | pagination:
12 | visible: true
13 | ---
14 |
15 | # Preen
16 |
17 | Preen is a local-first, federated analytics engine built on top of DuckDB. Think of Preen as an open-core version of Fivetran, Hightouch, or Stitch for local data processing. Preen enables accessing data across 10s, 100s or 1000s of data sources. The only thing you need is a laptop, basic SQL and access to your data sources.
18 |
19 | {% embed url="https://www.youtube.com/watch?v=O3IMRaSkEcQ" %}
20 |
21 | ## Why Preen?
22 |
23 | * Run analysis and train AI models directly from the primary copy of your company's data.
24 | * Build data applications with their own local database, think SQLite for data applications.
25 | * No datalakes. No cloud computing. No copies of data that create numerous conflicting versions of data.
26 | * Get instant access to exactly the data you need without waiting on another team.
27 | * Describe your Company's data universe in code. Build ephemeral, versioned, enterprise data warehouses from scratch directly on your laptop.
28 |
29 |
30 |
--------------------------------------------------------------------------------
/internal/cli/repl.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "strings"
7 |
8 | "github.com/chzyer/readline"
9 | "github.com/preendata/preen/internal/engine"
10 | "github.com/urfave/cli/v2"
11 | )
12 |
13 | func Repl(c *cli.Context) error {
14 | outputFormat := c.String("output-format")
15 | fmt.Println("Output format: ", outputFormat)
16 |
17 | rl, err := readline.NewEx(&readline.Config{
18 | Prompt: "preen> ",
19 | HistoryFile: "/tmp/preen-history.tmp",
20 | InterruptPrompt: "^C",
21 | EOFPrompt: "exit",
22 | HistorySearchFold: true,
23 | })
24 | if err != nil {
25 | return fmt.Errorf("failed to initialize readline: %w", err)
26 | }
27 | defer rl.Close()
28 |
29 | fmt.Println("REPL started. Type 'exit' to quit.")
30 | var cmds []string
31 | for {
32 | line, err := rl.Readline()
33 | if err == readline.ErrInterrupt {
34 | if len(line) == 0 {
35 | break
36 | } else {
37 | continue
38 | }
39 | } else if err == io.EOF {
40 | break
41 | } else if err != nil {
42 | return fmt.Errorf("failed to read input: %w", err)
43 | }
44 |
45 | line = strings.TrimSpace(line)
46 |
47 | // Handle exit command
48 | if line == "exit" || line == "quit" {
49 | fmt.Println("Exiting REPL.")
50 | break
51 | }
52 |
53 | cmds = append(cmds, line)
54 | if !strings.HasSuffix(line, ";") {
55 | rl.SetPrompt(">")
56 | continue
57 | }
58 |
59 | cmd := strings.Join(cmds, " ")
60 | cmds = cmds[:0]
61 | rl.SetPrompt("preendb> ")
62 | if err := rl.SaveHistory(cmd); err != nil {
63 | fmt.Printf("failed to save repl history: %v\n", err)
64 | }
65 |
66 | // Execute the input as a query
67 | qr, err := engine.Execute(cmd)
68 | if err != nil {
69 | fmt.Printf("Error: %v\n", err)
70 | continue
71 | }
72 |
73 | if err := engine.WriteToTable(qr.Rows, qr.Columns, outputFormat); err != nil {
74 | fmt.Printf("Error: %v\n", err)
75 | continue
76 | }
77 | }
78 |
79 | return nil
80 | }
81 |
--------------------------------------------------------------------------------
/docs/getting-started/hello-world/creating-models.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: How to create a model to query a source.
3 | ---
4 |
5 | # Creating Models
6 |
7 | [models.md](../../concepts/models.md "mention")are how you define the data you want to work with from a given data source. Don't think of a Model as your final result or query set, rather its all the relevant data from which you may query your final result set.
8 |
9 | Read more about the rationale behind [models.md](../../concepts/models.md "mention")on its concept page.
10 |
11 | ## Defining a Model
12 |
13 | You can define models in two ways, adding a `models.yaml` file to the `PREEN_CONFIG_PATH` or adding individual model files to the `~/.preen/models` directory. You may save a model file anywhere you'd like, so long as its parent directory is specified by `PREEN_MODELS_PATH`
14 |
15 | Here's an example `database` model. **Note that column names need to be fully qualified, i.e. users.id instead of id.**
16 |
17 | ```yaml
18 | # FILENAME: ~/.preen/models/users.yaml
19 | name: users # This name needs to be unique
20 | type: database
21 | query: |
22 | select
23 | users.id,
24 | users.first_name,
25 | users.last_name,
26 | users.birthday
27 | from
28 | users;
29 | ```
30 |
31 | ## Registering a Model with a Source
32 |
33 | Consider a simplified [https://github.com/hyphasql/hypha/blob/main/docs/concepts/source.md](https://github.com/hyphasql/hypha/blob/main/docs/concepts/source.md "mention") config from the last page, pared down to one data source. You register the users model with the source as follows.
34 |
35 | ```yaml
36 | # FILENAME: ~/.preen/sources.yaml
37 | sources:
38 | - name: postgres-model
39 | engine: postgres
40 | connection:
41 | host: localhost
42 | port: 33061
43 | database: postgres
44 | username: root
45 | password: myp@assword
46 | models:
47 | - users
48 | ```
49 |
50 | You can now validate and build your models in Preen.
51 |
52 | ```bash
53 | preen source validate
54 | preen model build
55 | ```
56 |
--------------------------------------------------------------------------------
/docs/documentation/config/sources.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen sources.
3 | ---
4 |
5 | # Sources
6 |
7 | Preen sources are defined as a YAML file. The source file is used to define the data sources, the query to be executed, and the type of query to be executed.
8 |
9 | ## Source Configuration Options
10 |
11 | | Option | Description | Required | Applicable Types |
12 | | --------------- | ----------------------------------------------------------------------- | ----------------------- | ----------------------------------- |
13 | | `name` | The unique name of the source | Yes | All |
14 | | `engine` | The type of the source (e.g.`database`, `file`) | Yes | All |
15 | | `connection` | The connection details for the source (e.g. database connection details) | Yes | All |
16 | | `models` | The models to be used for the source | Yes | All |
17 |
18 | ## Source Connection Details
19 |
20 | | Option | Description |
21 | |---------------|--------------------------------------------|
22 | | `host` | The host of the source |
23 | | `port` | The port of the source |
24 | | `database` | The database of the source |
25 | | `username` | The username of the source |
26 | | `password` | The password of the source |
27 | | `auth_source` | The authentication source for MongoDB |
28 | | `bucket_name` | The bucket name for AWS S3 models |
29 | | `region` | The AWS region for S3 models |
30 |
31 | ## Code References
32 |
33 | - [sources.go](../../../internal/engine/sources.go)
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | preendb
2 | bin/
3 | tmp.txt
4 | .config/
5 | .venv/
6 | target/
7 | dbt_modules/
8 | /logs/
9 | .vscode/
10 | .DS_Store
11 | conf/.user.yml
12 | .idea/
13 |
14 | # Byte-compiled / optimized / DLL files
15 | __pycache__/
16 | *$py.class
17 |
18 | # C extensions
19 | *.so
20 |
21 | # Distribution / packaging
22 | .Python
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 |
37 | # PyInstaller
38 | # Usually these files are written by a python script from a template
39 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | htmlcov/
49 | .tox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | .hypothesis/
57 | .pytest_cache/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # celery beat schedule file
88 | celerybeat-schedule
89 |
90 | # SageMath parsed files
91 | *.sage.py
92 |
93 | # Environments
94 | .env
95 | .venv
96 | env/
97 | venv/
98 | ENV/
99 | env.bak/
100 | venv.bak/
101 |
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 |
106 | # Rope project settings
107 | .ropeproject
108 |
109 | # mkdocs documentation
110 | /site
111 |
112 | # mypy
113 | .mypy_cache/
114 |
115 | # OSX Stuff
116 | .DS_Store
117 |
118 | # Terraform
119 | *.tfvars
120 | **plugins
121 | *.tfstate*
122 |
123 | # Preen
124 | *preenContext.db*
125 | .docker/db/data/**
126 | .preendb/config.yaml
127 | preen
128 | .preen/
129 |
--------------------------------------------------------------------------------
/internal/engine/printer.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "os"
7 |
8 | "github.com/jedib0t/go-pretty/v6/table"
9 | )
10 |
11 | // PrettyPrintJSON pretty prints a slice of maps containing JSON objects.
12 | func prettifyString(data []map[string]interface{}) (string, error) {
13 | prettyJSON, err := json.MarshalIndent(data, "", " ")
14 | if err != nil {
15 | return "", err
16 | }
17 | return string(prettyJSON), nil
18 | }
19 |
20 | // PrintPrettyJSON prints the pretty JSON to the console.
21 | func PrintPrettyJSON(data []map[string]interface{}) error {
22 | prettyJSON, err := prettifyString(data)
23 | if err != nil {
24 | return err
25 | }
26 | fmt.Println(prettyJSON)
27 | return nil
28 | }
29 |
30 | func prettifyStruct(v interface{}) (string, error) {
31 | // Marshal the struct with indentation
32 | prettyJSON, err := json.MarshalIndent(v, "", " ")
33 | if err != nil {
34 | return "", fmt.Errorf("failed to marshal struct: %w", err)
35 | }
36 | return string(prettyJSON), nil
37 | }
38 |
39 | func PrintPrettyStruct(v interface{}) error {
40 | prettyJSON, err := prettifyStruct(v)
41 | if err != nil {
42 | return fmt.Errorf("failed to pretty print struct: %w", err)
43 | }
44 | fmt.Println(prettyJSON)
45 | return nil
46 | }
47 |
48 | func WriteToTable(rows []map[string]any, columns []string, outputFormat string) error {
49 | // Set up
50 | t := table.NewWriter()
51 | t.SetOutputMirror(os.Stdout)
52 | t.SetStyle(table.StyleLight)
53 |
54 | // Set table headers. This is fucked, non-deterministic order of fields.
55 | headers := table.Row{}
56 | for _, header := range columns {
57 | headers = append(headers, header)
58 | }
59 | t.AppendHeader(headers)
60 |
61 | // Populate table with data
62 | for _, row := range rows {
63 | values := table.Row{}
64 | for _, header := range headers {
65 | values = append(values, row[header.(string)])
66 | }
67 | t.AppendRow(values)
68 | }
69 |
70 | switch outputFormat {
71 | case "csv":
72 | t.RenderCSV()
73 | case "markdown":
74 | t.RenderMarkdown()
75 | default:
76 | t.Render()
77 | }
78 |
79 | return nil
80 | }
81 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on: pull_request
4 |
5 | jobs:
6 | tests:
7 | env:
8 | PREEN_CONFIG_PATH: ./build/ci
9 | PREEN_MODELS_PATH: ./build/ci/models
10 | PG_USER: ${{ secrets.PG_USER }}
11 | PG_PASSWORD: ${{ secrets.PG_PASSWORD }}
12 | MYSQL_USER: ${{ secrets.MYSQL_USER }}
13 | MYSQL_PASSWORD: ${{ secrets.MYSQL_PASSWORD }}
14 | MONGO_USER: ${{ secrets.MONGO_USER }}
15 | MONGO_PASSWORD: ${{ secrets.MONGO_PASSWORD }}
16 | runs-on: ubuntu-latest
17 |
18 | steps:
19 | - name: Checkout code
20 | uses: actions/checkout@v3
21 | - name: Set up Go
22 | uses: actions/setup-go@v5
23 | with:
24 | go-version: '1.23.0'
25 |
26 | - name: Build Docker services (PG, MySQL, etc.)
27 | run:
28 | docker compose -f build/ci/docker-compose.yaml up -d
29 |
30 | - name: Install dependencies
31 | run: |
32 | go mod tidy
33 |
34 | - name: golangci-lint
35 | uses: golangci/golangci-lint-action@v6
36 | with:
37 | version: v1.60
38 | args: --timeout=5m
39 |
40 | - name: Build Preen binary
41 | run: make build
42 |
43 | - name: Unit Tests
44 | run: make test
45 |
46 | - name: Integration tests
47 | run: |
48 | sleep 5
49 |
50 | bin/preen model build
51 |
52 | # Test that the MySQL model was built and can be queried. Query should return 1 row.
53 | MYSQL_RESULTS_LENGTH=$(bin/preen query -f json "select * from mysql_data_types_test;" | jq length)
54 | if [[ $MYSQL_RESULTS_LENGTH -ne 1 ]]; then
55 | echo "Expected 1 row in mysql_data_types_test, got $MYSQL_RESULTS_LENGTH"
56 | exit 1
57 | fi
58 |
59 | # Test that the PostgreSQL model was built and can be queried. Query should return 1 row.
60 | PG_RESULTS_LENGTH=$(bin/preen query -f json "select * from pg_data_types_test;" | jq length)
61 | if [[ $PG_RESULTS_LENGTH -ne 1 ]]; then
62 | echo "Expected 1 row in pg_data_types_test, got $PG_RESULTS_LENGTH"
63 | exit 1
64 | fi
65 |
66 | - name: Shut down services
67 | if: always()
68 | run: docker compose -f build/ci/docker-compose.yaml down
--------------------------------------------------------------------------------
/docs/documentation/integrations/cloud-blob-storage/amazon-s3.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to connect to Amazon S3.
3 | ---
4 |
5 | # Amazon S3
6 |
7 | ## Credentials
8 |
9 | Preen's Amazon S3 integration uses the AWS SDK's credential chain to authenticate requests. This means you don't need to explicitly provide access keys in your application code or environment variables. Instead, the SDK will automatically look for credentials in the following order:
10 |
11 | 1. Environment variables
12 | 2. Shared credential file (\~/.aws/credentials)
13 | 3. AWS IAM role for Amazon EC2 or ECS tasks
14 |
15 | ### Setting Up Credentials
16 |
17 | To set up your credentials, you have several options:
18 |
19 | 1. **AWS CLI Configuration**: If you have the AWS CLI installed, you can run `aws configure` to set up your credentials. This will create a shared credential file.
20 | 2. **Shared Credentials File**: Manually create or edit the file `~/.aws/credentials` (on Linux/Mac) or `%UserProfile%\.aws\credentials` (on Windows) with the following content:
21 |
22 | ```conf
23 | [default]
24 | aws_access_key_id = YOUR_ACCESS_KEY
25 | aws_secret_access_key = YOUR_SECRET_KEY
26 | ```
27 |
28 | 3. **Environment Variables**: Set the following environment variables:
29 |
30 | ```bash
31 | export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY
32 | export AWS_SECRET_ACCESS_KEY=YOUR_SECRET_KEY
33 | ```
34 |
35 | 4. **IAM Roles**: If your application is running on an AWS EC2 instance or ECS task, you can assign an IAM role with the necessary permissions to access S3.
36 |
37 | ### Region and Bucket Configuration
38 |
39 | Region and bucket name are specified in your Preen source configuration.
40 |
41 | ### Preen Source and Model Configuration for Amazon S3
42 |
43 | ```yaml
44 | # FILENAME: ~/.preen/models/users.yaml
45 | name: users
46 | type: file
47 | file_patterns:
48 | - "users/v1/**.csv" # This will match all csv files under the users/v1 prefix
49 | format: csv
50 | options:
51 | auto_detect: true
52 | header: true
53 | delim: ","
54 | quote: "\""
55 | escape: "\""
56 | ```
57 |
58 | ```yaml
59 | # FILENAME: ~/.preen/sources.yaml
60 | sources:
61 | - name: users-s3-us-east-1
62 | engine: s3
63 | connection:
64 | bucket_name: users
65 | region: us-east-1
66 | models:
67 | - users
68 | ```
69 |
--------------------------------------------------------------------------------
/internal/engine/sources.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "fmt"
5 | "os"
6 |
7 | yaml "gopkg.in/yaml.v3"
8 | )
9 |
10 | type Connection struct {
11 | Host string `yaml:"host"`
12 | Port int `yaml:"port"`
13 | Database string `yaml:"database"`
14 | Username string `yaml:"username"`
15 | Password string `yaml:"password"`
16 | AuthSource string `yaml:"auth_source"`
17 | BucketName string `yaml:"bucket_name"`
18 | Region string `yaml:"region"`
19 | Schema string `yaml:"schema"`
20 | Warehouse string `yaml:"warehouse"`
21 | Role string `yaml:"role"`
22 | Account string `yaml:"account"`
23 | }
24 |
25 | type Source struct {
26 | Name string `yaml:"name"`
27 | Engine string `yaml:"engine"`
28 | Connection Connection `yaml:"connection"`
29 | Models []string `yaml:"models"`
30 | }
31 |
32 | type SourceConfig struct {
33 | Sources []Source `yaml:"sources"`
34 | Env *Env `yaml:"-"` // not in yaml
35 | }
36 |
37 | func GetSourceConfig() (*SourceConfig, error) {
38 | sc := SourceConfig{}
39 | env, err := EnvInit()
40 | if err != nil {
41 | return nil, fmt.Errorf("error initializing environment: %w", err)
42 | }
43 | sc.Env = env
44 |
45 | // Create directory if not exists
46 | _, err = os.Stat(sc.Env.PreenConfigPath)
47 |
48 | if os.IsNotExist(err) {
49 | err = os.Mkdir(sc.Env.PreenConfigPath, os.ModePerm)
50 |
51 | if err != nil {
52 | return nil, fmt.Errorf("failed to create directory at %s with error %s", sc.Env.PreenConfigPath, err)
53 | }
54 | } else if err != nil {
55 | return nil, fmt.Errorf("failed to access %s with error %s", sc.Env.PreenConfigPath, err)
56 | }
57 |
58 | configFilePath := getYmlorYamlPath(sc.Env.PreenConfigPath, "sources")
59 |
60 | // Create file if not exists
61 | file, err := os.ReadFile(configFilePath)
62 |
63 | if os.IsNotExist(err) {
64 | _, err = os.Create(configFilePath)
65 |
66 | if err != nil {
67 | return nil, fmt.Errorf("failed to create file at %s with error %s", configFilePath, err)
68 | }
69 |
70 | file, err = os.ReadFile(configFilePath)
71 | }
72 |
73 | if err != nil {
74 | return nil, fmt.Errorf("failed to read source config file: %s", err)
75 | }
76 |
77 | // Pull yaml out of config file
78 | if err = yaml.Unmarshal(file, &sc); err != nil {
79 | return nil, fmt.Errorf("failed to parse source file: %w", err)
80 | }
81 |
82 | // Override config with environment variables
83 | fromEnv(&sc)
84 |
85 | return &sc, nil
86 | }
87 |
--------------------------------------------------------------------------------
/docs/concepts/models.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: what is a model?
3 | ---
4 |
5 | # Models
6 |
7 | ## Overview
8 |
9 | A Preen **Model** is a fundamental concept that defines how data is accessed and structured for local querying. It acts as a bridge between your raw data sources and the Preen system, allowing for targeted data retrieval.
10 |
11 | ## Definition
12 |
13 | A Model is a storage system-dependent configuration that specifies:
14 |
15 | 1. The source of the data
16 | 2. The structure or schema of the data
17 | 3. Any filtering or transformation to be applied
18 |
19 | Models narrow down the set of data to be used for local querying, ensuring that only relevant information is processed.
20 |
21 | ## Examples
22 |
23 | Models can be configured for various types of storage systems. Here are some examples:
24 |
25 | ### SQL Databases
26 |
27 | These models are defined as a YAML file that contains a SQL query.
28 |
29 | ```yaml
30 | # FILENAME: ~/.preen/models/users.yaml
31 | name: users # This name needs to be unique
32 | type: database
33 | query: |
34 | select
35 | users.id,
36 | users.first_name,
37 | users.last_name,
38 | users.birthday
39 | from
40 | users;
41 | ```
42 |
43 | ### File Systems
44 |
45 | These models are configured as a YAML file and contain configurations specific to the underlying file storage system. Here is an example of a model using Amazon S3 and a csv file. The full list of options can be found here.
46 |
47 | ```yaml
48 | # FILENAME: ~/.preen/models/users.yaml
49 | name: users # This name needs to be unique
50 | type: file
51 | file_patterns:
52 | - "users/v1/**.csv" # This will match all csv files under the users/v1 prefix
53 | format: csv
54 | options:
55 | auto_detect: true
56 | header: true
57 | delim: ","
58 | quote: "\""
59 | escape: "\""
60 | new_line: "\\r\\n"
61 | filename: true
62 | union_by_name: true
63 | ```
64 |
65 | ## Benefits of Using Models
66 |
67 | 1. **Data Isolation**: Models allow you to work with specific subsets of your data, improving performance and reducing noise.
68 | 2. **Abstraction**: They provide a layer of abstraction between your raw data sources and your Preen queries.
69 | 3. **Flexibility**: Models can be easily adjusted to accommodate changes in data structure or source without affecting the rest of your Preen setup.
70 | 4. **Reusability**: Once defined, Models can be shared and reused by different users and teams within your organization.
71 |
72 | ## CLI Commands
73 |
74 | ```bash
75 | preen model build # Builds all models
76 | preen model build --target users # Target a specific model
77 | ```
78 |
79 | For detailed configuration reference see [models.md](../documentation/config/models.md "mention")
80 |
--------------------------------------------------------------------------------
/internal/engine/logging.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "runtime"
7 |
8 | "github.com/sirupsen/logrus"
9 | )
10 |
11 | var logger *logrus.Logger
12 |
13 | type Fields = logrus.Fields
14 |
15 | func Initialize(logLevels ...string) error {
16 | logger = logrus.New()
17 | logger.Out = os.Stdout
18 |
19 | logger.SetFormatter(&logrus.TextFormatter{
20 | FullTimestamp: true,
21 | })
22 |
23 | // Default log level to info
24 | logLevel := "INFO"
25 |
26 | // If log level in environment, use it
27 | if l := os.Getenv("PREENDB_LOG_LEVEL"); l != "" {
28 | logLevel = l
29 | }
30 |
31 | // If log level passed in flag, prefer it
32 | if len(logLevels) > 0 && logLevels[0] != "" {
33 | logLevel = logLevels[0]
34 | }
35 |
36 | // Set loglevel
37 | level, err := logrus.ParseLevel(logLevel)
38 | if err != nil {
39 | return fmt.Errorf("invalid log level: %v", err)
40 | }
41 | logger.SetLevel(level)
42 |
43 | Debugf("Log level set to %s", level)
44 |
45 | return nil
46 | }
47 |
48 | func getCaller() (string, int) {
49 | _, file, line, ok := runtime.Caller(2)
50 | if !ok {
51 | return "unknown", 0
52 | }
53 |
54 | return file, line
55 | }
56 |
57 | func IsValidLogLevel(logLevel string) error {
58 | _, err := logrus.ParseLevel(logLevel)
59 |
60 | if err != nil {
61 | return fmt.Errorf("invalid log level: %s. Allowed values are: (DEBUG, INFO, WARN, ERROR, FATAL, PANIC)", logLevel)
62 | }
63 |
64 | return nil
65 | }
66 |
67 | func Debug(args ...interface{}) {
68 | file, line := getCaller()
69 | entry := logger.WithFields(Fields{
70 | "caller": fmt.Sprintf("%s:%d", file, line),
71 | })
72 | entry.Debug(args...)
73 | }
74 |
75 | func Debugf(format string, args ...interface{}) {
76 | file, line := getCaller()
77 | entry := logger.WithFields(Fields{
78 | "caller": fmt.Sprintf("%s:%d", file, line),
79 | })
80 | entry.Debugf(format, args...)
81 | }
82 |
83 | func Warn(args ...interface{}) {
84 | logger.Warn(args...)
85 | }
86 |
87 | func Warnf(format string, args ...interface{}) {
88 | logger.Warnf(format, args...)
89 | }
90 | func Info(args ...interface{}) {
91 | logger.Info(args...)
92 | }
93 |
94 | func Infof(format string, args ...interface{}) {
95 | logger.Infof(format, args...)
96 | }
97 |
98 | func Error(args ...interface{}) {
99 | logger.Error(args...)
100 | }
101 |
102 | func Errorf(format string, args ...interface{}) {
103 | logger.Errorf(format, args...)
104 | }
105 |
106 | func Fatal(args ...interface{}) {
107 | logger.Fatal(args...)
108 | }
109 |
110 | func Fatalf(format string, args ...interface{}) {
111 | logger.Fatalf(format, args...)
112 | }
113 |
114 | func WithFields(fields logrus.Fields) *logrus.Entry {
115 | return logger.WithFields(fields)
116 | }
117 |
118 | func WithError(err error) *logrus.Entry {
119 | return logger.WithError(err)
120 | }
121 |
--------------------------------------------------------------------------------
/internal/cli/commands.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 |
7 | "github.com/preendata/preen/internal/engine"
8 | "github.com/urfave/cli/v2"
9 | )
10 |
11 | func Query(c *cli.Context) error {
12 | engine.Debug("Executing cli.query")
13 | format := c.String("format")
14 | stmt := c.Args().First()
15 | engine.Debug("Query: ", stmt)
16 |
17 | qr, err := engine.Execute(stmt)
18 |
19 | if err != nil {
20 | engine.Debug("error executing query", err)
21 | return fmt.Errorf("error executing query %w", err)
22 | }
23 | if format == "json" {
24 | if err := engine.PrintPrettyJSON(qr.Rows); err != nil {
25 | return fmt.Errorf("error pretty printing JSON: %w", err)
26 | }
27 | } else {
28 | if err := engine.WriteToTable(qr.Rows, qr.Columns, "table"); err != nil {
29 | return fmt.Errorf("error writing to table: %w", err)
30 | }
31 | }
32 |
33 | return nil
34 | }
35 |
36 | func BuildModel(c *cli.Context) error {
37 | engine.Debug("Executing cli.buildmodel")
38 | modelTarget := c.String("target")
39 | sc, mc, err := engine.GetConfig(modelTarget)
40 | if err != nil {
41 | return fmt.Errorf("error getting config %w", err)
42 | }
43 |
44 | err = engine.BuildModels(sc, mc)
45 | if err != nil {
46 | return fmt.Errorf("error building model %w", err)
47 | }
48 |
49 | return nil
50 | }
51 |
52 | func BuildMetadata(c *cli.Context) error {
53 | engine.Debug("Executing cli.buildInformationSchema")
54 | modelTarget := ""
55 | sc, mc, err := engine.GetConfig(modelTarget)
56 | if err != nil {
57 | return fmt.Errorf("error getting config %w", err)
58 | }
59 |
60 | err = engine.BuildMetadata(sc, mc)
61 | if err != nil {
62 | return fmt.Errorf("error building metadata %w", err)
63 | }
64 |
65 | return nil
66 | }
67 |
68 | func Validate(c *cli.Context) error {
69 | engine.Debug("Executing cli.validate")
70 | modelTarget := ""
71 | sc, mc, err := engine.GetConfig(modelTarget)
72 | if err != nil {
73 | return fmt.Errorf("error getting config %w", err)
74 | }
75 |
76 | if err := engine.ValidateConfigs(sc, mc); err != nil {
77 | return fmt.Errorf("error parsing models %w", err)
78 | }
79 |
80 | if err = engine.BuildMetadata(sc, mc); err != nil {
81 | return fmt.Errorf("error building metadata %w", err)
82 | }
83 |
84 | _, err = engine.BuildColumnMetadata()
85 | if err != nil {
86 | return fmt.Errorf("error building column metadata %w", err)
87 | }
88 |
89 | return nil
90 | }
91 |
92 | func ListSources(c *cli.Context) error {
93 | engine.Debug("Executing cli.listSources")
94 | modelTarget := ""
95 | sc, _, err := engine.GetConfig(modelTarget)
96 | if err != nil {
97 | return fmt.Errorf("error getting config %w", err)
98 | }
99 |
100 | for _, conn := range sc.Sources {
101 | _, err := json.MarshalIndent(conn, "", " ")
102 |
103 | if err != nil {
104 | return fmt.Errorf("error unmarshalling config %w", err)
105 | }
106 | }
107 | return nil
108 | }
109 |
--------------------------------------------------------------------------------
/internal/engine/tables.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "fmt"
5 | "slices"
6 |
7 | "github.com/preendata/sqlparser"
8 | )
9 |
10 | type TableAlias string
11 | type TableMap map[TableAlias]TableName
12 | type TableSet []TableName
13 |
14 | func ParseModelTables(mc *ModelConfig) error {
15 | for _, model := range mc.Models {
16 | if model.Type == "database" && model.Parsed != nil {
17 | switch stmt := model.Parsed.(type) {
18 | case *sqlparser.Select:
19 | model.TableMap, model.TableSet = getModelTableAliases(stmt)
20 | default:
21 | return fmt.Errorf("model %s failed. non-select queries not supported", model.Name)
22 | }
23 | }
24 | }
25 | return nil
26 | }
27 |
28 | func getModelTableAliases(stmt *sqlparser.Select) (TableMap, TableSet) {
29 | tableMap := make(TableMap)
30 | tableSet := make(TableSet, 0)
31 | table := stmt.From[0]
32 | switch t := table.(type) {
33 | case *sqlparser.AliasedTableExpr:
34 | if t.As.IsEmpty() {
35 | tableName := TableName(t.Expr.(sqlparser.TableName).Name.String())
36 | tableMap[TableAlias(t.Expr.(sqlparser.TableName).Name.String())] = tableName
37 | if !slices.Contains(tableSet, tableName) {
38 | tableSet = append(tableSet, tableName)
39 | }
40 | } else {
41 | tableName := TableName(t.Expr.(sqlparser.TableName).Name.String())
42 | tableMap[TableAlias(t.As.String())] = tableName
43 | if !slices.Contains(tableSet, tableName) {
44 | tableSet = append(tableSet, tableName)
45 | }
46 | }
47 | case *sqlparser.JoinTableExpr:
48 | _, joinTables := parseJoinTables(t, tableMap, tableSet)
49 | tableSet = append(tableSet, joinTables...)
50 | }
51 |
52 | return tableMap, tableSet
53 | }
54 |
55 | func parseJoinTables(j *sqlparser.JoinTableExpr, tableMap TableMap, tableSet TableSet) (*sqlparser.JoinTableExpr, TableSet) {
56 | rightAlias := j.RightExpr.(*sqlparser.AliasedTableExpr).As.String()
57 | rightTable := j.RightExpr.(*sqlparser.AliasedTableExpr).Expr.(sqlparser.TableName).Name.String()
58 | if rightAlias != "" {
59 | tableMap[TableAlias(rightAlias)] = TableName(rightTable)
60 | if !slices.Contains(tableSet, TableName(rightTable)) {
61 | tableSet = append(tableSet, TableName(rightTable))
62 | }
63 | } else {
64 | tableMap[TableAlias(rightTable)] = TableName(rightTable)
65 | if !slices.Contains(tableSet, TableName(rightTable)) {
66 | tableSet = append(tableSet, TableName(rightTable))
67 | }
68 | }
69 |
70 | switch left := j.LeftExpr.(type) {
71 | case *sqlparser.JoinTableExpr:
72 | _, tableSet = parseJoinTables(left, tableMap, tableSet)
73 | case *sqlparser.AliasedTableExpr:
74 | leftAlias := left.As.String()
75 | leftTable := left.Expr.(sqlparser.TableName).Name.String()
76 | if leftAlias != "" {
77 | tableMap[TableAlias(leftAlias)] = TableName(leftTable)
78 | if !slices.Contains(tableSet, TableName(leftTable)) {
79 | tableSet = append(tableSet, TableName(leftTable))
80 | }
81 | } else {
82 | tableMap[TableAlias(leftTable)] = TableName(leftTable)
83 | if !slices.Contains(tableSet, TableName(leftTable)) {
84 | tableSet = append(tableSet, TableName(leftTable))
85 | }
86 | }
87 | }
88 | return j, tableSet
89 | }
90 |
--------------------------------------------------------------------------------
/internal/engine/env.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "log/slog"
7 | "os"
8 | "os/user"
9 | "path/filepath"
10 | "reflect"
11 | "regexp"
12 | "strconv"
13 | "time"
14 | )
15 |
16 | type Env struct {
17 | PreenConfigPath string
18 | PreenModelsPath string
19 | LicenseKey string
20 | }
21 |
22 | func EnvInit() (*Env, error) {
23 | usr, err := user.Current()
24 | if err != nil {
25 | return nil, fmt.Errorf("failed to get current user: %w", err)
26 | }
27 |
28 | return &Env{
29 | PreenConfigPath: getEnv("PREEN_CONFIG_PATH", filepath.Join(usr.HomeDir, ".preen"), false),
30 | PreenModelsPath: getEnv("PREEN_MODELS_PATH", filepath.Join(usr.HomeDir, ".preen/models"), false),
31 | LicenseKey: getEnv("PREEN_LICENSE_KEY", "", false),
32 | }, nil
33 | }
34 |
35 | var envRegex = regexp.MustCompile(`\${(\w+)}`)
36 |
37 | func fromEnv(v interface{}) {
38 | _fromEnv(reflect.ValueOf(v).Elem()) // assumes pointer to struct
39 | }
40 |
41 | // recursive
42 | func _fromEnv(rv reflect.Value) {
43 | for i := 0; i < rv.NumField(); i++ {
44 | fv := rv.Field(i)
45 | if fv.Kind() == reflect.Ptr {
46 | fv = fv.Elem()
47 | }
48 | if fv.Kind() == reflect.Struct {
49 | _fromEnv(fv)
50 | continue
51 | }
52 | if fv.Kind() == reflect.Slice {
53 | for j := 0; j < fv.Len(); j++ {
54 | if fv.Index(j).Kind() == reflect.String {
55 | match := envRegex.FindStringSubmatch(fv.Index(j).String())
56 | if len(match) > 1 {
57 | slog.Debug(
58 | fmt.Sprintf("Setting env var: '%s'", match[1]),
59 | )
60 | fv.SetString(os.Getenv(match[1]))
61 | }
62 | }
63 | if fv.Index(j).Kind() == reflect.Struct {
64 | _fromEnv(fv.Index(j))
65 | continue
66 | }
67 | }
68 | }
69 | if fv.Kind() == reflect.String {
70 | match := envRegex.FindStringSubmatch(fv.String())
71 | if len(match) > 1 {
72 | slog.Debug(
73 | fmt.Sprintf("Setting env var: '%s'", match[1]),
74 | )
75 | fv.SetString(os.Getenv(match[1]))
76 | }
77 | }
78 | }
79 | }
80 |
81 | func getEnv[T float64 | string | int | bool | time.Duration](key string, defaultVal T, required bool) T {
82 | val, ok := os.LookupEnv(key)
83 | if !ok {
84 | if !required {
85 | return defaultVal
86 | } else {
87 | log.Fatalf("missing required environment variable %s", key)
88 | }
89 | }
90 |
91 | var out T
92 | switch ptr := any(&out).(type) {
93 | case *string:
94 | {
95 | *ptr = val
96 | }
97 | case *int:
98 | {
99 | v, err := strconv.Atoi(val)
100 | if err != nil {
101 | return defaultVal
102 | }
103 | *ptr = v
104 | }
105 | case *bool:
106 | {
107 | v, err := strconv.ParseBool(val)
108 | if err != nil {
109 | return defaultVal
110 | }
111 | *ptr = v
112 | }
113 | case *time.Duration:
114 | {
115 | v, err := time.ParseDuration(val)
116 | if err != nil {
117 | return defaultVal
118 | }
119 | *ptr = v
120 | }
121 | case *float64:
122 | {
123 | v, err := strconv.ParseFloat(val, 64)
124 | if err != nil {
125 | return defaultVal
126 | }
127 | *ptr = v
128 | }
129 | default:
130 | {
131 | log.Fatalf("unsupported type %T", out)
132 | }
133 | }
134 |
135 | return out
136 | }
137 |
--------------------------------------------------------------------------------
/internal/engine/duckdb.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "database/sql/driver"
7 |
8 | "github.com/marcboeker/go-duckdb"
9 | )
10 |
11 | // Returns a DuckDB appender instance for bulk loading of data
12 | func ddbNewAppender(connector driver.Connector, schema string, table string) (*duckdb.Appender, error) {
13 | conn, err := connector.Connect(context.Background())
14 | if err != nil {
15 | return nil, err
16 | }
17 |
18 | appender, err := duckdb.NewAppenderFromConn(conn, schema, table)
19 | if err != nil {
20 | return nil, err
21 | }
22 |
23 | return appender, nil
24 | }
25 |
26 | func ddbCreateConnector() (driver.Connector, error) {
27 | connector, err := duckdb.NewConnector("./preenContext.db?threads=4", func(execer driver.ExecerContext) error {
28 | bootQueries := []string{
29 | "INSTALL 'json'",
30 | "LOAD 'json'",
31 | "INSTALL aws",
32 | "LOAD aws",
33 | "INSTALL httpfs",
34 | "LOAD httpfs",
35 | }
36 |
37 | for _, query := range bootQueries {
38 | _, err := execer.ExecContext(context.Background(), query, nil)
39 | if err != nil {
40 | return err
41 | }
42 | }
43 | return nil
44 | })
45 |
46 | if err != nil {
47 | return nil, err
48 | }
49 |
50 | return connector, nil
51 | }
52 |
53 | func ddbOpenDatabase(connector driver.Connector) (*sql.DB, error) {
54 | db := sql.OpenDB(connector)
55 | return db, nil
56 | }
57 |
58 | func ddbExec(queryString string) error {
59 | connector, err := ddbCreateConnector()
60 | if err != nil {
61 | return err
62 | }
63 |
64 | db, err := ddbOpenDatabase(connector)
65 | if err != nil {
66 | return err
67 | }
68 |
69 | defer db.Close()
70 | Debug("querying duckdb database with query: ", queryString)
71 | _, err = db.Exec(queryString)
72 | if err != nil {
73 | return err
74 | }
75 | return err
76 | }
77 |
78 | func ddbQuery(queryString string, c chan map[string]any) ([]string, error) {
79 | connector, err := ddbCreateConnector()
80 | if err != nil {
81 | return nil, err
82 | }
83 |
84 | db, err := ddbOpenDatabase(connector)
85 | if err != nil {
86 | return nil, err
87 | }
88 |
89 | defer db.Close()
90 | Debug("querying duckdb database with query: ", queryString)
91 | rows, err := db.Query(queryString)
92 | if err != nil {
93 | return nil, err
94 | }
95 | columns, err := rows.Columns()
96 | if err != nil {
97 | return nil, err
98 | }
99 |
100 | err = ReadRows(rows, c)
101 |
102 | if err != nil {
103 | return nil, err
104 | }
105 | return columns, err
106 | }
107 |
108 | func ReadRows(rows *sql.Rows, c chan map[string]any) error {
109 | defer rows.Close()
110 |
111 | columns, err := rows.Columns()
112 | if err != nil {
113 | return err
114 | }
115 | numColumns := len(columns)
116 |
117 | values := make([]any, numColumns)
118 | for i := range values {
119 | values[i] = new(interface{})
120 | }
121 |
122 | for rows.Next() {
123 | if err := rows.Scan(values...); err != nil {
124 | return err
125 | }
126 |
127 | dest := make(map[string]interface{}, numColumns)
128 | for i, column := range columns {
129 | dest[column] = *(values[i].(*interface{}))
130 | }
131 | c <- dest
132 | }
133 |
134 | if err := rows.Err(); err != nil {
135 | return err
136 | }
137 | return nil
138 | }
139 |
--------------------------------------------------------------------------------
/internal/engine/mongo.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "context"
5 | "database/sql/driver"
6 | "encoding/json"
7 | "fmt"
8 | "net/url"
9 | "time"
10 |
11 | "go.mongodb.org/mongo-driver/bson"
12 | "go.mongodb.org/mongo-driver/mongo"
13 | "go.mongodb.org/mongo-driver/mongo/options"
14 | "go.mongodb.org/mongo-driver/mongo/readpref"
15 | )
16 |
17 | func mongoConnFromSource(source Source, ctx context.Context) (*mongo.Client, error) {
18 |
19 | url := fmt.Sprintf(
20 | "mongodb://%s:%s@%s:%d/?authSource=%s",
21 | source.Connection.Username,
22 | url.QueryEscape(source.Connection.Password),
23 | url.QueryEscape(source.Connection.Host),
24 | source.Connection.Port,
25 | source.Connection.AuthSource,
26 | )
27 |
28 | client, err := mongo.Connect(ctx, options.Client().ApplyURI(url))
29 | if err != nil {
30 | return nil, err
31 | }
32 | if err = client.Ping(ctx, readpref.Primary()); err != nil {
33 | return nil, err
34 | }
35 | return client, nil
36 | }
37 |
38 | func ingestMongoModel(r *Retriever, ic chan []driver.Value) error {
39 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name))
40 | ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
41 | defer cancel()
42 | mongoClient, err := mongoConnFromSource(r.Source, ctx)
43 | if err != nil {
44 | return err
45 | }
46 |
47 | // If this function returns an error, then it failed to disconnect from the mongo client
48 | defer func() {
49 | if err = mongoClient.Disconnect(context.Background()); err != nil {
50 | Errorf("Error disconnecting from mongo: %s", err)
51 | }
52 | }()
53 |
54 | defer cancel()
55 |
56 | if err = processMongoDocuments(r, mongoClient, ic); err != nil {
57 | return err
58 | }
59 |
60 | return nil
61 | }
62 |
63 | func processMongoDocuments(r *Retriever, client *mongo.Client, ic chan []driver.Value) error {
64 | collection := client.Database(r.Source.Connection.Database).Collection(r.Collection)
65 | ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
66 | defer cancel()
67 | jsonQuery := make(map[string]interface{})
68 | if err := json.Unmarshal([]byte(r.Query), &jsonQuery); err != nil {
69 | return fmt.Errorf("Error unmarshalling json query: %s", err)
70 | }
71 | bsonQuery, err := bson.Marshal(jsonQuery)
72 | if err != nil {
73 | return fmt.Errorf("Error marshalling json query to BSON: %s", err)
74 | }
75 | cur, err := collection.Find(ctx, bsonQuery)
76 | if err != nil {
77 | return fmt.Errorf("Error executing query: %s", err)
78 | }
79 | if err := cur.Err(); err != nil {
80 | return fmt.Errorf("Error iterating cursor: %s", err)
81 | }
82 | defer cur.Close(ctx)
83 | var rowCounter int64
84 | for cur.Next(ctx) {
85 | var result bson.M
86 | if err := cur.Decode(&result); err != nil {
87 | return fmt.Errorf("Error decoding result: %s", err)
88 | }
89 | jsonBytes, err := json.Marshal(result)
90 | if err != nil {
91 | return fmt.Errorf("Error marshalling result: %s", err)
92 | }
93 | rowCounter++
94 | driverRow := make([]driver.Value, 2)
95 | driverRow[0] = r.Source.Name
96 | driverRow[1] = string(jsonBytes)
97 | ic <- driverRow
98 | }
99 | Debug(fmt.Sprintf("Retrieved %d rows for %s - %s\n", rowCounter, r.Source.Name, r.ModelName))
100 | return nil
101 | }
102 |
--------------------------------------------------------------------------------
/internal/engine/types_test.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "net/netip"
5 | "testing"
6 | "time"
7 |
8 | "github.com/jackc/pgx/v5/pgtype"
9 | )
10 |
11 | func TestDuckdbTimeScan(t *testing.T) {
12 | var dt duckdbTime
13 |
14 | // Test case for pgtype.Time
15 | pgTime := pgtype.Time{Microseconds: 3600000000} // 1 hour in microseconds
16 | err := dt.Scan(pgTime)
17 | if err != nil {
18 | t.Errorf("unexpected error: %v", err)
19 | }
20 | expectedTime := time.Now().Truncate(24 * time.Hour).Add(time.Hour).String()
21 | if string(dt) != expectedTime {
22 | t.Errorf("expected %s, got %s", expectedTime, dt)
23 | }
24 |
25 | // Test case for nil
26 | err = dt.Scan(nil)
27 | if err != nil {
28 | t.Errorf("unexpected error: %v", err)
29 | }
30 | if dt != "" {
31 | t.Errorf("expected empty string, got %s", dt)
32 | }
33 |
34 | // Test case for invalid type
35 | err = dt.Scan(123)
36 | if err == nil {
37 | t.Errorf("expected error, got nil")
38 | }
39 | }
40 |
41 | func TestDuckdbTimeValue(t *testing.T) {
42 | dt := duckdbTime("test_time")
43 | val, err := dt.Value()
44 | if err != nil {
45 | t.Errorf("unexpected error: %v", err)
46 | }
47 | if val != "test_time" {
48 | t.Errorf("expected test_time, got %v", val)
49 | }
50 | }
51 |
52 | func TestDuckdbDurationScan(t *testing.T) {
53 | var dd duckdbDuration
54 |
55 | // Test case for pgtype.Interval
56 | pgInterval := pgtype.Interval{Microseconds: 1000000, Days: 1, Months: 1}
57 | err := dd.Scan(pgInterval)
58 | if err != nil {
59 | t.Errorf("unexpected error: %v", err)
60 | }
61 | expectedDuration := "Microseconds: 1000000, Days: 1, Months: 1"
62 | if string(dd) != expectedDuration {
63 | t.Errorf("expected %s, got %s", expectedDuration, dd)
64 | }
65 |
66 | // Test case for nil
67 | err = dd.Scan(nil)
68 | if err != nil {
69 | t.Errorf("unexpected error: %v", err)
70 | }
71 | if dd != "" {
72 | t.Errorf("expected empty string, got %s", dd)
73 | }
74 |
75 | // Test case for invalid type
76 | err = dd.Scan(123)
77 | if err == nil {
78 | t.Errorf("expected error, got nil")
79 | }
80 | }
81 |
82 | func TestDuckdbDurationValue(t *testing.T) {
83 | dd := duckdbDuration("test_duration")
84 | val, err := dd.Value()
85 | if err != nil {
86 | t.Errorf("unexpected error: %v", err)
87 | }
88 | if val != "test_duration" {
89 | t.Errorf("expected test_duration, got %v", val)
90 | }
91 | }
92 |
93 | func TestDuckdbNetIpPrefixScan(t *testing.T) {
94 | var dip duckdbNetIpPrefix
95 |
96 | // Test case for netip.Prefix
97 | prefix, _ := netip.ParsePrefix("192.168.1.0/24")
98 | err := dip.Scan(prefix)
99 | if err != nil {
100 | t.Errorf("unexpected error: %v", err)
101 | }
102 | expectedPrefix := "192.168.1.0/24"
103 | if string(dip) != expectedPrefix {
104 | t.Errorf("expected %s, got %s", expectedPrefix, dip)
105 | }
106 |
107 | // Test case for nil
108 | err = dip.Scan(nil)
109 | if err != nil {
110 | t.Errorf("unexpected error: %v", err)
111 | }
112 | if dip != "" {
113 | t.Errorf("expected empty string, got %s", dip)
114 | }
115 |
116 | // Test case for invalid type
117 | err = dip.Scan(123)
118 | if err == nil {
119 | t.Errorf("expected error, got nil")
120 | }
121 | }
122 |
123 | func TestDuckdbNetIpPrefixValue(t *testing.T) {
124 | dip := duckdbNetIpPrefix("test_prefix")
125 | val, err := dip.Value()
126 | if err != nil {
127 | t.Errorf("unexpected error: %v", err)
128 | }
129 | if val != "test_prefix" {
130 | t.Errorf("expected test_prefix, got %v", val)
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/internal/engine/retrieve.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "database/sql/driver"
5 | "fmt"
6 | "slices"
7 | "strings"
8 |
9 | "golang.org/x/sync/errgroup"
10 | )
11 |
12 | type Retriever struct {
13 | ModelName string
14 | TableName string
15 | Query string
16 | Source Source
17 | Options Options
18 | Format string
19 | FilePatterns *[]string
20 | Collection string
21 | }
22 |
23 | // Retrieve data from sources and insert into the duckDB database.
24 | // Database sources are inserted via the Insert function.
25 | // File sources are inserted via the native duckDB integrations.
26 | func Retrieve(sc *SourceConfig, mc *ModelConfig) error {
27 | for _, model := range mc.Models {
28 | ic := make(chan []driver.Value, 10000)
29 | dc := make(chan []int64)
30 | tableName := strings.ReplaceAll(string(model.Name), "-", "_")
31 | // Only insert database models into DuckDB
32 | if model.Type == "database" {
33 | go Insert(ModelName(tableName), ic, dc)
34 | }
35 | g := errgroup.Group{}
36 | g.SetLimit(200)
37 | for _, source := range sc.Sources {
38 | if !slices.Contains(source.Models, string(model.Name)) {
39 | Debug(fmt.Sprintf("Skipping %s for %s", model.Name, source.Name))
40 | continue
41 | }
42 | r := Retriever{
43 | Source: source,
44 | ModelName: string(model.Name),
45 | Query: model.Query,
46 | Options: model.Options,
47 | Format: model.Format,
48 | FilePatterns: model.FilePatterns,
49 | TableName: tableName,
50 | }
51 | if model.Collection != "" {
52 | r.Collection = model.Collection
53 | } else {
54 | r.Collection = string(model.Name)
55 | }
56 | switch source.Engine {
57 | case "s3":
58 | err := func(r Retriever, ic chan []driver.Value) error {
59 | g.Go(func() error {
60 | if err := ingestS3Model(&r); err != nil {
61 | return err
62 | }
63 | return nil
64 | })
65 |
66 | return nil
67 | }(r, ic)
68 | if err != nil {
69 | return err
70 | }
71 | case "snowflake":
72 | err := func(r Retriever, ic chan []driver.Value) error {
73 | g.Go(func() error {
74 | if err := ingestSnowflakeModel(&r, ic); err != nil {
75 | return err
76 | }
77 | return nil
78 | })
79 | return nil
80 | }(r, ic)
81 | if err != nil {
82 | return err
83 | }
84 | case "postgres":
85 | err := func(r Retriever, ic chan []driver.Value) error {
86 | g.Go(func() error {
87 | if err := ingestPostgresModel(&r, ic); err != nil {
88 | return err
89 | }
90 | return nil
91 | })
92 | return nil
93 | }(r, ic)
94 | if err != nil {
95 | return err
96 | }
97 | case "mysql":
98 | err := func(r Retriever, ic chan []driver.Value) error {
99 | g.Go(func() error {
100 | if err := ingestMysqlModel(&r, ic); err != nil {
101 | return err
102 | }
103 | return nil
104 | })
105 | return nil
106 | }(r, ic)
107 | if err != nil {
108 | return err
109 | }
110 | case "mongodb":
111 | err := func(r Retriever, ic chan []driver.Value) error {
112 | g.Go(func() error {
113 | if err := ingestMongoModel(&r, ic); err != nil {
114 | return err
115 | }
116 | return nil
117 | })
118 | return nil
119 | }(r, ic)
120 | if err != nil {
121 | return err
122 | }
123 | default:
124 | Error(fmt.Sprintf("Engine %s not supported", source.Engine))
125 | }
126 | }
127 | if err := g.Wait(); err != nil {
128 | return err
129 | }
130 | ic <- []driver.Value{"quit"}
131 | if model.Type == "database" {
132 | ConfirmInsert(string(model.Name), dc, 0)
133 | }
134 | }
135 | return nil
136 | }
137 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | workflow_dispatch:
5 | release:
6 | types: [published]
7 |
8 | permissions:
9 | contents: write
10 |
11 | jobs:
12 | build-linux:
13 | runs-on: ubuntu-latest
14 |
15 | strategy:
16 | matrix:
17 | go-version: [1.23.0]
18 | os: [linux]
19 | arch: [amd64]
20 |
21 | steps:
22 | - name: Checkout code
23 | uses: actions/checkout@v2
24 |
25 | - name: Set up Go
26 | uses: actions/setup-go@v2
27 | with:
28 | go-version: ${{ matrix.go-version }}
29 |
30 | - name: Install dependencies
31 | run: go mod tidy
32 |
33 | - name: Build
34 | env:
35 | GOOS: ${{ matrix.os }}
36 | GOARCH: ${{ matrix.arch }}
37 | CGO_ENABLED: 1
38 | run: |
39 | CGO_ENABLED=1 CGO_LDFLAGS="-L/usr/lib" go build -o output/${{ matrix.os }}_${{ matrix.arch }}/preen --ldflags="-extldflags=-static" -tags osusergo,netgo main.go
40 | tar -C output/${{ matrix.os }}_${{ matrix.arch }} -czvf preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz preen
41 | echo "Built for $GOOS $GOARCH"
42 |
43 | - name: Generate checksum
44 | run: |
45 | sha256sum preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz | tee preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum
46 |
47 | - name: Upload binary
48 | uses: svenstaro/upload-release-action@v2
49 | with:
50 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz
51 | repo_token: ${{ secrets.GITHUB_TOKEN }}
52 | tag: ${{ github.ref }}
53 |
54 | - name: Upload checksum
55 | uses: svenstaro/upload-release-action@v2
56 | with:
57 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum
58 | repo_token: ${{ secrets.GITHUB_TOKEN }}
59 | tag: ${{ github.ref }}
60 |
61 | build-macos:
62 | runs-on: macos-latest
63 |
64 | strategy:
65 | matrix:
66 | go-version: [1.23.0]
67 | os: [darwin]
68 | arch: [arm64, amd64]
69 |
70 | steps:
71 | - name: Checkout code
72 | uses: actions/checkout@v2
73 |
74 | - name: Set up Go
75 | uses: actions/setup-go@v2
76 | with:
77 | go-version: ${{ matrix.go-version }}
78 |
79 | - name: Install dependencies
80 | run: go mod tidy
81 |
82 | - name: Build
83 | env:
84 | GOOS: ${{ matrix.os }}
85 | GOARCH: ${{ matrix.arch }}
86 | CGO_ENABLED: 1
87 | run: |
88 | CGO_ENABLED=1 CGO_LDFLAGS="-L/usr/lib" go build -o output/${{ matrix.os }}_${{ matrix.arch }}/preen main.go
89 | tar -C output/${{ matrix.os }}_${{ matrix.arch }} -czvf preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz preen
90 | echo "Built for $GOOS $GOARCH"
91 |
92 | - name: Generate checksum
93 | run: |
94 | shasum -a 256 preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz | tee preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum
95 |
96 | - name: Upload binary
97 | uses: svenstaro/upload-release-action@v2
98 | with:
99 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz
100 | repo_token: ${{ secrets.GITHUB_TOKEN }}
101 | tag: ${{ github.ref }}
102 |
103 | - name: Upload checksum
104 | uses: svenstaro/upload-release-action@v2
105 | with:
106 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum
107 | repo_token: ${{ secrets.GITHUB_TOKEN }}
108 | tag: ${{ github.ref }}
--------------------------------------------------------------------------------
/internal/engine/snowflake.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "database/sql/driver"
7 | "fmt"
8 | "reflect"
9 | "time"
10 |
11 | "github.com/snowflakedb/gosnowflake"
12 | )
13 |
14 | func getSnowflakePoolFromSource(source Source) (*sql.DB, error) {
15 |
16 | config := gosnowflake.Config{
17 | Account: source.Connection.Account,
18 | User: source.Connection.Username,
19 | Password: source.Connection.Password,
20 | Database: source.Connection.Database,
21 | Schema: source.Connection.Schema,
22 | Warehouse: source.Connection.Warehouse,
23 | }
24 | connStr, err := gosnowflake.DSN(&config)
25 |
26 | if err != nil {
27 | panic(err)
28 | }
29 |
30 | db, err := sql.Open("snowflake", connStr)
31 | if err != nil {
32 | panic(err)
33 | }
34 | err = db.PingContext(context.Background())
35 | if err != nil {
36 | return nil, fmt.Errorf("error pinging Snowflake: %w", err)
37 | }
38 |
39 | return db, nil
40 | }
41 |
42 | func ingestSnowflakeModel(r *Retriever, ic chan []driver.Value) error {
43 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name))
44 | clientPool, err := getSnowflakePoolFromSource(r.Source)
45 | if err != nil {
46 | return err
47 | }
48 | defer clientPool.Close()
49 | rows, err := clientPool.Query(r.Query)
50 | if err != nil {
51 | return fmt.Errorf("error querying Snowflake: %w", err)
52 | }
53 | defer rows.Close()
54 |
55 | if err = processSnowflakeRows(r, ic, rows); err != nil {
56 | return err
57 | }
58 |
59 | return nil
60 | }
61 |
62 | func processSnowflakeRows(r *Retriever, ic chan []driver.Value, rows *sql.Rows) error {
63 | valuePtrs, err := processSnowflakeColumns(rows)
64 |
65 | if err != nil {
66 | return fmt.Errorf("error processing Snowflake columns: %w", err)
67 | }
68 | for rows.Next() {
69 | if err = rows.Scan(valuePtrs...); err != nil {
70 | return fmt.Errorf("error scanning Snowflake rows: %w", err)
71 | }
72 | driverRow := make([]driver.Value, len(valuePtrs)+1)
73 | driverRow[0] = r.Source.Name
74 | for i, ptr := range valuePtrs {
75 | switch v := ptr.(type) {
76 | case *duckdbDecimal:
77 | driverRow[i+1], err = v.Value()
78 | if err != nil {
79 | return fmt.Errorf("error converting duckdbDecimal: %w", err)
80 | }
81 | default:
82 | driverRow[i+1] = dereferenceIfPtr(ptr)
83 | }
84 | }
85 | ic <- driverRow
86 | }
87 |
88 | return nil
89 | }
90 |
91 | func dereferenceIfPtr[T any](v T) T {
92 | rv := reflect.ValueOf(v)
93 | if rv.Kind() == reflect.Ptr {
94 | return rv.Elem().Interface().(T)
95 | }
96 | return v
97 | }
98 |
99 | func processSnowflakeColumns(rows *sql.Rows) ([]any, error) {
100 | columnTypes, err := rows.ColumnTypes()
101 | if err != nil {
102 | return nil, err
103 | }
104 | valuePtrs := make([]any, len(columnTypes))
105 |
106 | for i, columnType := range columnTypes {
107 |
108 | switch columnType.DatabaseTypeName() {
109 | case "DECIMAL", "NUMBER", "FLOAT", "DOUBLE", "REAL", "FIXED":
110 | valuePtrs[i] = new(duckdbDecimal)
111 | case "BIGINT":
112 | valuePtrs[i] = new(int64)
113 | case "BOOLEAN":
114 | valuePtrs[i] = new(bool)
115 | case "INT", "MEDIUMINT":
116 | valuePtrs[i] = new(int32)
117 | case "SMALLINT", "YEAR":
118 | valuePtrs[i] = new(int16)
119 | case "TINYINT":
120 | valuePtrs[i] = new(int8)
121 | case "BINARY", "VARBINARY", "VARIANT", "OBJECT", "ARRAY":
122 | valuePtrs[i] = new([]byte)
123 | case "DATE", "DATETIME", "TIMESTAMP_TZ", "TIMESTAMP_LTZ", "TIMESTAMP_NTZ":
124 | valuePtrs[i] = new(time.Time)
125 | case "CHAR", "CHARACTER", "NCHAR", "VARCHAR", "TEXT", "STRING", "NVARCHAR", "NVARCHAR2", "CHAR VARYING", "NCHAR VARYING", "ENUM", "SET", "JSON", "TIME":
126 | Debug(fmt.Sprintf("Column type is a string: %s", columnType.DatabaseTypeName()))
127 | valuePtrs[i] = new(string)
128 | default:
129 | return nil, fmt.Errorf("unsupported column type: %s", columnType.DatabaseTypeName())
130 | }
131 | }
132 |
133 | return valuePtrs, nil
134 | }
135 |
--------------------------------------------------------------------------------
/internal/engine/mysql.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "database/sql"
5 | "database/sql/driver"
6 | "fmt"
7 | "log/slog"
8 | "net/url"
9 | "reflect"
10 | "time"
11 |
12 | _ "github.com/go-sql-driver/mysql"
13 | )
14 |
15 | func GetMysqlPoolFromSource(source Source) (*sql.DB, error) {
16 | // Example url := "root:thisisnotarealpassword@tcp(127.0.0.1:33061)/mysql_db_1"
17 | url := fmt.Sprintf(
18 | "%s:%s@tcp(%s:%d)/%s?parseTime=true",
19 | source.Connection.Username,
20 | url.QueryEscape(source.Connection.Password),
21 | url.QueryEscape(source.Connection.Host),
22 | source.Connection.Port,
23 | source.Connection.Database,
24 | )
25 | dbpool, err := getMysqlPool(url)
26 |
27 | if err != nil {
28 | return nil, err
29 | }
30 |
31 | return dbpool, nil
32 | }
33 |
34 | func getMysqlPool(url string) (*sql.DB, error) {
35 | dbPool, err := sql.Open("mysql", url)
36 |
37 | if err != nil {
38 | slog.Error(
39 | fmt.Sprintf("Unable to connect to database: %v\n", err),
40 | )
41 | return nil, err
42 | }
43 |
44 | return dbPool, nil
45 | }
46 |
47 | // Retrieve retrieves data from a MySQL source and sends it to the insert channel.
48 | func ingestMysqlModel(r *Retriever, ic chan []driver.Value) error {
49 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name))
50 | clientPool, err := GetMysqlPoolFromSource(r.Source)
51 | if err != nil {
52 | return err
53 | }
54 | defer clientPool.Close()
55 | rows, err := clientPool.Query(r.Query)
56 | if err != nil {
57 | return err
58 | }
59 | defer rows.Close()
60 |
61 | if err = processMysqlRows(r, ic, rows); err != nil {
62 | return err
63 | }
64 |
65 | return nil
66 | }
67 |
68 | // processMysqlRows processes rows from a MySQL source and sends them to the insert channel.
69 | func processMysqlRows(r *Retriever, ic chan []driver.Value, rows *sql.Rows) error {
70 | // Get the column types from the rows and create a slice of pointers to scan into.
71 | valuePtrs, err := processMysqlColumns(rows)
72 | if err != nil {
73 | return err
74 | }
75 | for rows.Next() {
76 | if err = rows.Scan(valuePtrs...); err != nil {
77 | return err
78 | }
79 | driverRow := make([]driver.Value, len(valuePtrs)+1)
80 | driverRow[0] = r.Source.Name
81 | for i, ptr := range valuePtrs {
82 | if ptr == nil {
83 | driverRow[i+1] = nil
84 | continue
85 | }
86 | switch reflect.TypeOf(ptr).String() {
87 | case "*engine.duckdbDecimal":
88 | value := reflect.ValueOf(ptr).Elem().Interface()
89 | driverRow[i+1], err = value.(duckdbDecimal).Value()
90 | if err != nil {
91 | return err
92 | }
93 | default:
94 | // If the value is not a custom type, we can just use the value as is.
95 | driverRow[i+1] = reflect.ValueOf(ptr).Elem().Interface()
96 | }
97 | }
98 | ic <- driverRow
99 | }
100 |
101 | return nil
102 | }
103 |
104 | func processMysqlColumns(rows *sql.Rows) ([]any, error) {
105 | columnTypes, err := rows.ColumnTypes()
106 | if err != nil {
107 | return nil, err
108 | }
109 | valuePtrs := make([]any, len(columnTypes))
110 |
111 | for i, columnType := range columnTypes {
112 | switch columnType.DatabaseTypeName() {
113 | case "DECIMAL", "NUMERIC", "FLOAT", "DOUBLE", "REAL":
114 | valuePtrs[i] = new(duckdbDecimal)
115 | case "BIGINT":
116 | valuePtrs[i] = new(int64)
117 | case "INT", "MEDIUMINT":
118 | valuePtrs[i] = new(int32)
119 | case "SMALLINT", "YEAR":
120 | valuePtrs[i] = new(int16)
121 | case "TINYINT":
122 | valuePtrs[i] = new(int8)
123 | case "BIT", "BINARY", "VARBINARY", "TINYBLOB", "MEDIUMBLOB", "LONGBLOB", "BLOB":
124 | valuePtrs[i] = new([]byte)
125 | case "DATE", "DATETIME", "TIMESTAMP":
126 | valuePtrs[i] = new(time.Time)
127 | case "CHAR", "VARCHAR", "TEXT", "TINYTEXT", "MEDIUMTEXT", "LONGTEXT", "ENUM", "SET", "JSON", "TIME":
128 | valuePtrs[i] = new(string)
129 | default:
130 | return nil, fmt.Errorf("unsupported column type: %s", columnType.DatabaseTypeName())
131 | }
132 | }
133 | return valuePtrs, nil
134 | }
135 |
--------------------------------------------------------------------------------
/internal/engine/s3.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "fmt"
5 | "path/filepath"
6 | "reflect"
7 | )
8 |
9 | func ingestS3Model(r *Retriever) error {
10 | switch r.Format {
11 | case "csv":
12 | optionsString, err := getCSVOptions(r.Options)
13 | if err != nil {
14 | return fmt.Errorf("failed to get csv options: %v", err)
15 | }
16 | query := fmt.Sprintf(
17 | `create or replace table main.%s as select * from read_csv(%s,%s)
18 | `, r.TableName, formatFilePatterns(r), *optionsString,
19 | )
20 | Debug(fmt.Sprintf("running query: %s", query))
21 | if err := ddbExec(query); err != nil {
22 | Debug(fmt.Sprintf("running query: %s", query))
23 | return fmt.Errorf("failed to create file model table %s: %v", r.ModelName, err)
24 | }
25 | default:
26 | return fmt.Errorf("unsupported model file format %s", r.Format)
27 | }
28 | return nil
29 | }
30 |
31 | func formatFilePatterns(r *Retriever) string {
32 | queryString := "["
33 | for i, v := range *r.FilePatterns {
34 | if i == len(*r.FilePatterns)-1 {
35 | queryString += fmt.Sprintf("'%s://%s'", r.Source.Engine, filepath.Join(r.Source.Connection.BucketName, v)) + "]"
36 | break
37 | }
38 | queryString += fmt.Sprintf("'%s://%s', ", r.Source.Engine, filepath.Join(r.Source.Connection.BucketName, v))
39 | }
40 | return queryString
41 | }
42 |
43 | func getCSVOptions(o Options) (*string, error) {
44 | options := reflect.VisibleFields(reflect.TypeOf(o))
45 | queryString := new(string)
46 | for _, option := range options {
47 | if _, ok := option.Tag.Lookup("default"); !ok {
48 | return nil, fmt.Errorf("missing default value for option %s", option.Name)
49 | }
50 | if _, ok := option.Tag.Lookup("yaml"); !ok {
51 | return nil, fmt.Errorf("missing yaml tag for option %s", option.Name)
52 | }
53 |
54 | defaultVal := option.Tag.Get("default")
55 | optionName := option.Tag.Get("yaml")
56 | optionValue := getDefaultValue(reflect.ValueOf(o).FieldByName(option.Name).Interface(), defaultVal)
57 | if optionValue == "" {
58 | continue
59 | }
60 | optionString := fmt.Sprintf("%s = %v", optionName, optionValue)
61 | if *queryString == "" {
62 | *queryString += optionString
63 | } else {
64 | *queryString = fmt.Sprintf("%s, %s", *queryString, optionString)
65 | }
66 | }
67 | return queryString, nil
68 | }
69 |
70 | func getDefaultValue(key any, defaultVal any) any {
71 | switch key := key.(type) {
72 | case *[]string:
73 | // If the key is not set and there is not default value, return an empty string
74 | if key == nil && defaultVal == "-" {
75 | return ""
76 | }
77 | // If the key is not set and there is a default value, return the default value
78 | if key == nil && defaultVal != "-" {
79 | return defaultVal
80 | }
81 | // Convert the []string from YAML to a string array for the query
82 | queryString := "["
83 | for i, v := range *key {
84 | if i == len(*key)-1 {
85 | queryString += v + "]"
86 | break
87 | }
88 | queryString += v + ", "
89 | }
90 | return queryString
91 | case *bool:
92 | if key == nil {
93 | return defaultVal
94 | }
95 | return *key
96 | case *string:
97 | // If the key is not set and there is not default value, return an empty string
98 | if key == nil && defaultVal == "-" {
99 | return ""
100 | }
101 | // If the key is not set and there is a default value, return the default value
102 | if key == nil && defaultVal != "-" {
103 | return fmt.Sprintf("'%s'", defaultVal)
104 | }
105 | return fmt.Sprintf("'%s'", *key)
106 | case *int64:
107 | if key == nil {
108 | return defaultVal
109 | }
110 | return *key
111 | case *[]Type:
112 | // If the key is not set and there is not default value, return an empty string
113 | if key == nil && defaultVal == "-" {
114 | return ""
115 | }
116 | // If the key is not set and there is a default value, return the default value
117 | if key == nil && defaultVal != "-" {
118 | return defaultVal
119 | }
120 | // Convert the []Type from YAML to a string object for the query
121 | queryString := "{"
122 | for i, v := range *key {
123 | if i == len(*key)-1 {
124 | queryString += fmt.Sprintf("'%s': '%s'", v.Name, v.Type) + "}"
125 | break
126 | }
127 | queryString += fmt.Sprintf("'%s': '%s',", v.Name, v.Type)
128 | }
129 | return queryString
130 | }
131 | return key
132 | }
133 |
--------------------------------------------------------------------------------
/internal/engine/postgres.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "context"
5 | "database/sql/driver"
6 | "fmt"
7 | "net/url"
8 | "reflect"
9 |
10 | "github.com/jackc/pgx/v5"
11 | "github.com/jackc/pgx/v5/pgxpool"
12 | "github.com/marcboeker/go-duckdb"
13 | )
14 |
15 | type QueryResult struct {
16 | Rows []map[string]any
17 | Columns []string
18 | }
19 |
20 | func getPostgresPool(url string) (*pgxpool.Pool, error) {
21 | // urlExample := "postgres://username:password@localhost:5432/database_name"
22 | dbpool, err := pgxpool.New(context.Background(), url)
23 |
24 | if err != nil {
25 | Error(
26 | fmt.Sprintf("Unable to connect to database: %v\n", err),
27 | )
28 | return nil, err
29 | }
30 | return dbpool, nil
31 | }
32 |
33 | func getPostgresPoolFromSource(source Source) (*pgxpool.Pool, error) {
34 |
35 | url := fmt.Sprintf(
36 | "postgres://%s:%s@%s:%d/%s",
37 | source.Connection.Username,
38 | url.QueryEscape(source.Connection.Password),
39 | url.QueryEscape(source.Connection.Host),
40 | source.Connection.Port,
41 | source.Connection.Database,
42 | )
43 | dbpool, err := getPostgresPool(url)
44 |
45 | if err != nil {
46 | return nil, err
47 | }
48 |
49 | return dbpool, nil
50 | }
51 |
52 | func ingestPostgresModel(r *Retriever, ic chan []driver.Value) error {
53 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name))
54 | clientPool, err := getPostgresPoolFromSource(r.Source)
55 | if err != nil {
56 | return err
57 | }
58 | defer clientPool.Close()
59 | rows, err := clientPool.Query(context.Background(), r.Query)
60 | if err != nil {
61 | return err
62 | }
63 | defer rows.Close()
64 |
65 | if err = processPostgresRows(r, ic, rows); err != nil {
66 | return err
67 | }
68 |
69 | return nil
70 | }
71 |
72 | func processPostgresRows(r *Retriever, ic chan []driver.Value, rows pgx.Rows) error {
73 | var rowCounter int64
74 | for rows.Next() {
75 | values, err := rows.Values()
76 | if err != nil {
77 | return err
78 | }
79 | rowCounter++
80 | driverRow := make([]driver.Value, len(values)+1)
81 | driverRow[0] = r.Source.Name
82 | for i, value := range values {
83 | if value == nil {
84 | driverRow[i+1] = nil
85 | continue
86 | }
87 | switch reflect.TypeOf(value).String() {
88 | case "pgtype.Numeric":
89 | decimal := duckdbDecimal(0)
90 | if err = decimal.Scan(value); err != nil {
91 | return err
92 | }
93 | driverRow[i+1], err = decimal.Value()
94 | if err != nil {
95 | return err
96 | }
97 | case "pgtype.Time":
98 | timeVal := duckdbTime("")
99 | if err = timeVal.Scan(value); err != nil {
100 | return err
101 | }
102 | driverRow[i+1], err = timeVal.Value()
103 | if err != nil {
104 | return err
105 | }
106 | case "pgtype.Interval":
107 | duration := duckdbDuration("")
108 | if err = duration.Scan(value); err != nil {
109 | return err
110 | }
111 | driverRow[i+1], err = duration.Value()
112 | if err != nil {
113 | return err
114 | }
115 | case "netip.Prefix":
116 | prefix := duckdbNetIpPrefix("")
117 | if err = prefix.Scan(value); err != nil {
118 | return err
119 | }
120 | driverRow[i+1], err = prefix.Value()
121 | if err != nil {
122 | return err
123 | }
124 | case "net.HardwareAddr":
125 | hwAddr := duckdbHardwareAddr("")
126 | if err = hwAddr.Scan(value); err != nil {
127 | return err
128 | }
129 | driverRow[i+1], err = hwAddr.Value()
130 | if err != nil {
131 | return err
132 | }
133 | case "map[string]interface {}", "[]interface {}":
134 | jsonVal := duckdbJSON("")
135 | if err = jsonVal.Scan(value); err != nil {
136 | return err
137 | }
138 | driverRow[i+1], err = jsonVal.Value()
139 | if err != nil {
140 | return err
141 | }
142 | // These are UUIDs
143 | case "[16]uint8":
144 | uuid := duckdbUUID(duckdb.UUID{})
145 | if err = uuid.Scan(value); err != nil {
146 | return err
147 | }
148 | driverRow[i+1], err = uuid.Value()
149 | if err != nil {
150 | return err
151 | }
152 | default:
153 | driverRow[i+1] = value
154 | }
155 | }
156 | ic <- driverRow
157 | }
158 | Debug(fmt.Sprintf("Retrieved %d rows for %s - %s\n", rowCounter, r.Source.Name, r.ModelName))
159 | if err := rows.Err(); err != nil {
160 | return err
161 | }
162 | return nil
163 | }
164 |
--------------------------------------------------------------------------------
/internal/cli/app.go:
--------------------------------------------------------------------------------
1 | package cli
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/preendata/preen/internal/engine"
7 | "github.com/urfave/cli/v2"
8 | )
9 |
10 | func NewApp() *cli.App {
11 | app := &cli.App{
12 | Name: "preen",
13 | Usage: "A command-line application for preen",
14 | Flags: []cli.Flag{
15 | &cli.StringFlag{
16 | Name: "log-level",
17 | Aliases: []string{"l"},
18 | Usage: "Set the log level (DEBUG, INFO, WARN, ERROR, FATAL, PANIC)",
19 | },
20 | &cli.BoolFlag{
21 | Name: "verbose",
22 | Aliases: []string{"v"},
23 | Usage: "Set the log level to DEBUG",
24 | },
25 | },
26 | Commands: []*cli.Command{
27 | {
28 | Name: "repl",
29 | Aliases: []string{"r"},
30 | Usage: "Initiate interactive query session",
31 | Action: Repl,
32 | Flags: []cli.Flag{
33 | &cli.StringFlag{
34 | Name: "output-format",
35 | Aliases: []string{"o"},
36 | Usage: "Set output format. Options are 'table', 'csv', 'markdown'",
37 | DefaultText: "table",
38 | Action: func(c *cli.Context, v string) error {
39 | format := c.String("output-format")
40 | if format != "table" && format != "csv" && format != "markdown" {
41 | return fmt.Errorf("invalid format: %s. Allowed values are 'table', 'csv', 'markdown'", format)
42 | }
43 | return nil
44 | },
45 | },
46 | },
47 | },
48 | {
49 | Name: "query",
50 | Aliases: []string{"q"},
51 | Usage: "Execute a query",
52 | Action: Query,
53 | Flags: []cli.Flag{
54 | &cli.StringFlag{
55 | Name: "format",
56 | Aliases: []string{"f"},
57 | Usage: "Set output format. Options are 'table' or 'json'",
58 | DefaultText: "table",
59 | Action: func(c *cli.Context, v string) error {
60 | format := c.String("format")
61 | if format != "table" && format != "json" {
62 | return fmt.Errorf("invalid format: %s. Allowed values are 'table' or 'json'", format)
63 | }
64 | return nil
65 | },
66 | },
67 | },
68 | },
69 | {
70 | Name: "model",
71 | Aliases: []string{"m"},
72 | Usage: "Commands to manage models",
73 | Subcommands: []*cli.Command{
74 | {
75 | Name: "build",
76 | Action: BuildModel,
77 | Aliases: []string{"b"},
78 | Usage: "Build model",
79 | Flags: []cli.Flag{
80 | &cli.StringFlag{
81 | Name: "target",
82 | Aliases: []string{"t"},
83 | Usage: "Target a specific model(s). The default is all models. This is relative to the PREEN_MODELS_PATH.",
84 | },
85 | &cli.BoolFlag{
86 | Name: "source-name",
87 | Aliases: []string{"sn"},
88 | Usage: "Target a specific source",
89 | },
90 | },
91 | },
92 | },
93 | },
94 | {
95 | Name: "source",
96 | Aliases: []string{"s"},
97 | Usage: "Commands to manage sources",
98 | Subcommands: []*cli.Command{
99 | {
100 | Name: "list",
101 | Aliases: []string{"l"},
102 | Usage: "Print stored sources.",
103 | Action: ListSources,
104 | },
105 | {
106 | Name: "validate",
107 | Aliases: []string{"v"},
108 | Usage: "Validate config file and retrieve source data types",
109 | Action: Validate,
110 | },
111 | {
112 | Name: "metadata",
113 | Aliases: []string{"i"},
114 | Usage: "Build source metadata",
115 | Action: BuildMetadata,
116 | },
117 | },
118 | },
119 | {
120 | Name: "version",
121 | Usage: "Print the version of the application",
122 | Action: func(c *cli.Context) error {
123 | fmt.Println("Preen version:", engine.Version)
124 | return nil
125 | },
126 | },
127 | },
128 | Before: func(c *cli.Context) error {
129 | logLevel := ""
130 |
131 | // Check if log-level flag is set
132 | if c.IsSet("log-level") {
133 | logLevel = c.String("log-level")
134 | }
135 |
136 | // Check if verbose flag is set
137 | if c.Bool("verbose") {
138 | logLevel = "DEBUG"
139 | }
140 |
141 | err := engine.IsValidLogLevel(logLevel)
142 | if logLevel != "" && err != nil {
143 | return fmt.Errorf("invalid log level: %s. Allowed values are: DEBUG, INFO, WARN, ERROR, FATAL, PANIC", logLevel)
144 | }
145 |
146 | // Initialize logger, passes empty string if no flag set which is handled by variadic Intialize function
147 | if err := engine.Initialize(logLevel); err != nil {
148 | return err
149 | }
150 |
151 | return nil
152 | },
153 | }
154 | return app
155 | }
156 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/preendata/preen
2 |
3 | go 1.23.1
4 |
5 | require (
6 | github.com/aws/aws-sdk-go-v2/config v1.29.2
7 | github.com/aws/aws-sdk-go-v2/service/s3 v1.75.0
8 | github.com/chzyer/readline v1.5.1
9 | github.com/go-sql-driver/mysql v1.8.1
10 | github.com/jackc/pgx/v5 v5.7.2
11 | github.com/jedib0t/go-pretty/v6 v6.6.5
12 | github.com/joho/godotenv v1.5.1
13 | github.com/marcboeker/go-duckdb v1.8.3
14 | github.com/preendata/sqlparser v0.0.1
15 | github.com/sirupsen/logrus v1.9.3
16 | github.com/snowflakedb/gosnowflake v1.13.0
17 | github.com/urfave/cli/v2 v2.27.5
18 | go.mongodb.org/mongo-driver v1.17.2
19 | golang.org/x/sync v0.10.0
20 | gopkg.in/yaml.v3 v3.0.1
21 | )
22 |
23 | require (
24 | filippo.io/edwards25519 v1.1.0 // indirect
25 | github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect
26 | github.com/99designs/keyring v1.2.2 // indirect
27 | github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0 // indirect
28 | github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect
29 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.0 // indirect
30 | github.com/BurntSushi/toml v1.4.0 // indirect
31 | github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect
32 | github.com/apache/arrow-go/v18 v18.1.0 // indirect
33 | github.com/apache/arrow/go/v16 v16.1.0 // indirect
34 | github.com/aws/aws-sdk-go-v2 v1.34.0 // indirect
35 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.8 // indirect
36 | github.com/aws/aws-sdk-go-v2/credentials v1.17.55 // indirect
37 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.25 // indirect
38 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.55 // indirect
39 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.29 // indirect
40 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.29 // indirect
41 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 // indirect
42 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.29 // indirect
43 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 // indirect
44 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.3 // indirect
45 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.10 // indirect
46 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.10 // indirect
47 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.12 // indirect
48 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.11 // indirect
49 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.10 // indirect
50 | github.com/aws/smithy-go v1.22.2 // indirect
51 | github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
52 | github.com/danieljoos/wincred v1.2.2 // indirect
53 | github.com/dvsekhvalnov/jose2go v1.8.0 // indirect
54 | github.com/gabriel-vasile/mimetype v1.4.8 // indirect
55 | github.com/goccy/go-json v0.10.5 // indirect
56 | github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
57 | github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
58 | github.com/golang/snappy v0.0.4 // indirect
59 | github.com/google/flatbuffers v25.1.24+incompatible // indirect
60 | github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c // indirect
61 | github.com/jackc/pgpassfile v1.0.0 // indirect
62 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
63 | github.com/jackc/puddle/v2 v2.2.2 // indirect
64 | github.com/klauspost/compress v1.17.11 // indirect
65 | github.com/klauspost/cpuid/v2 v2.2.9 // indirect
66 | github.com/mattn/go-runewidth v0.0.16 // indirect
67 | github.com/mitchellh/mapstructure v1.5.0 // indirect
68 | github.com/montanaflynn/stats v0.7.1 // indirect
69 | github.com/mtibben/percent v0.2.1 // indirect
70 | github.com/pierrec/lz4/v4 v4.1.22 // indirect
71 | github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
72 | github.com/rivo/uniseg v0.4.7 // indirect
73 | github.com/russross/blackfriday/v2 v2.1.0 // indirect
74 | github.com/xdg-go/pbkdf2 v1.0.0 // indirect
75 | github.com/xdg-go/scram v1.1.2 // indirect
76 | github.com/xdg-go/stringprep v1.0.4 // indirect
77 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
78 | github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 // indirect
79 | github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect
80 | github.com/zeebo/xxh3 v1.0.2 // indirect
81 | golang.org/x/crypto v0.32.0 // indirect
82 | golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c // indirect
83 | golang.org/x/mod v0.22.0 // indirect
84 | golang.org/x/net v0.34.0 // indirect
85 | golang.org/x/sys v0.29.0 // indirect
86 | golang.org/x/term v0.28.0 // indirect
87 | golang.org/x/text v0.21.0 // indirect
88 | golang.org/x/tools v0.29.0 // indirect
89 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
90 | )
91 |
--------------------------------------------------------------------------------
/docs/documentation/integrations/file-formats/csv-format.md:
--------------------------------------------------------------------------------
1 | ---
2 | description: how to configure preen to read CSV files.
3 | ---
4 |
5 | # CSV Format
6 |
7 | Preen supports the following options for CSV format. This is largely a wrapper on the [DuckDB CSV scan options](https://duckdb.org/docs/data/csv/overview.html#parameters).
8 |
9 | | Option | Description | Default Value |
10 | | -------------------- | ---------------------------------------------------- | ------------- |
11 | | all\_varchar | Interpret all columns as varchar | false |
12 | | allow\_quoted\_nulls | Allow NULL values in quotes | true |
13 | | auto\_detect | Automatically detect CSV dialect | true |
14 | | columns | Specify column names | - |
15 | | compression | Compression type (auto, none, gzip, zstd) | auto |
16 | | dateformat | Specifies the date format to use | - |
17 | | decimal\_separator | Specifies the decimal separator | . |
18 | | delim | Specifies the delimiter character | , |
19 | | escape | Specifies the escape character | " |
20 | | filename | Include filename in the result | false |
21 | | force\_not\_null | Do not convert blank values to NULL | \[] |
22 | | header | Whether or not the CSV file has a header | false |
23 | | ignore\_errors | Ignore parsing errors | false |
24 | | max\_line\_size | Maximum line size in bytes | 2097152 |
25 | | names | Specify column names | - |
26 | | new\_line | Specifies the newline character | - |
27 | | normalize\_names | Normalize column names | false |
28 | | null\_padding | Pad columns with null values if row is too short | false |
29 | | nullstr | Specifies the string that represents NULL values | - |
30 | | parallel | Use multi-threading for reading CSV files | true |
31 | | quote | Specifies the quote character | " |
32 | | sample\_size | Number of sample rows for dialect and type detection | 20480 |
33 | | skip | Number of rows to skip | 0 |
34 | | timestampformat | Specifies the timestamp format | - |
35 | | types | Specify column types | - |
36 | | union\_by\_name | Union by name when reading multiple files | false |
37 |
38 | ## Examples
39 |
40 | ### Basic Auto-Detection
41 |
42 | This is the most common case. Preen will auto-detect the CSV format and use the default options.
43 |
44 | ```yaml
45 | # FILENAME: ~/.preen/models/users.yaml
46 | name: users
47 | type: file
48 | file_patterns:
49 | - "users/v1/**.csv" # This will match all csv files under the users/v1 prefix
50 | format: csv
51 | options:
52 | auto_detect: true
53 | header: true
54 | delim: ","
55 | quote: "\""
56 | escape: "\""
57 | union_by_name: true
58 | ```
59 |
60 | ### Fully Specifying Options without auto-detection
61 |
62 | This is useful if you want to override the auto-detection and specify the options manually. This will save time and avoid the memory overhead of auto-detection.
63 |
64 | ```yaml
65 | # FILENAME: ~/.preen/models/users.yaml
66 | name: users
67 | type: file
68 | file_patterns:
69 | - "users/v1/**.csv"
70 | format: csv
71 | options:
72 | auto_detect: false
73 | header: true
74 | delim: ","
75 | quote: "\""
76 | escape: "\""
77 | columns: # List of all columns in the CSV file along with their DuckDB types
78 | - name: id
79 | type: bigint
80 | - name: name
81 | type: varchar
82 | - name: email
83 | type: varchar
84 | - name: birthday
85 | type: date
86 | ```
87 |
88 | ### Partially Specifying Options to override auto-detection
89 |
90 | ```yaml
91 | # FILENAME: ~/.preen/models/users.yaml
92 | name: users
93 | type: file
94 | file_patterns:
95 | - "users/v1/**.csv"
96 | format: csv
97 | options:
98 | auto_detect: true
99 | header: true
100 | delim: ","
101 | quote: "\""
102 | escape: "\""
103 | types: # This overrides the DuckDB auto-detection for the specified columns
104 | - name: birthday
105 | type: date
106 | ```
107 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | # Preen
4 |
5 | Preen is a powerful command-line application for querying from multiple sources locally from your laptop. Under the hood, Preen uses [DuckDB](https://duckdb.org/) to build an ephemeral, in-memory data warehouse and then uses DuckDB's SQL engine to query the data. Think of Preen as a mix of Fivetran and DBT for your DuckDB use cases. You describe the data you want to query using a declarative language and Preen takes care of the rest.
6 |
7 | Preen is currently in the alpha stage and not all features are available. We are working on adding more features and improving the user experience. If you have any questions or feedback, please feel free to open an issue on GitHub.
8 |
9 | ## Documentation
10 | See our [Gitbook](https://preen.gitbook.io/preen-docs) page for rich documentation on Preen.
11 |
12 | ## Features
13 |
14 | - Query data from multiple sources using a single interface
15 | - Support for MongoDB, PostgreSQL, MySQL, and AWS S3
16 | - Model-based data retrieval and collation
17 | - Identify and resolve data type discrepancies between sources
18 | - Interactive REPL for querying data
19 | - Configurable output formats (table, CSV, markdown, JSON)
20 | - Extensible architecture for adding new data sources
21 |
22 | ## Installation
23 |
24 | ### Homebrew
25 |
26 | Download the executable via our Homebrew cask.
27 |
28 | ```bash
29 | brew tap preendata/preen
30 | brew install preen
31 | ```
32 |
33 | ### Download pre-built binary
34 |
35 | You can download a pre-built binary for your operating system and architecture from the [GitHub Releases](https://github.com/preendata/preen/releases) page.
36 |
37 | ```bash
38 | # Using curl
39 | sh -c "$(curl -fsSL https://raw.githubusercontent.com/preendata/preen/main/build/install.sh)"
40 |
41 | # Using wget
42 | sh -c "$(wget https://raw.githubusercontent.com/preendata/preen/main/build/install.sh -O -)"
43 | ```
44 |
45 | ### Build from source
46 |
47 | To build Preen from source, you need to have Go 1.23.0 or later installed on your system. Then, you can build the application using the following commands:
48 |
49 | ```bash
50 | git clone https://github.com/preendata/preen.git
51 | cd preen
52 | make build
53 | ```
54 |
55 | This will create a `preen` binary in the `bin` directory. You can add this to your `PATH` if you want to use the `preen` command from anywhere.
56 |
57 | ## Configuration
58 |
59 | [Example repository](https://github.com/preendata/preen-template)
60 |
61 | Preen uses two configuration files: `sources.yaml` and `models.yaml`. The `sources.yaml` file is used to configure the data sources that Preen will query. The `models.yaml` file is used to define the models that Preen will build. The directory Preen will look for source and model configurations is configurable via the `PREEN_CONFIG_PATH` environment variable. You can see an example of the environment configuation in the [.env.example](.env.example) file.The `models.yaml` file is optional. If it is not present, Preen will look for `.yaml` files in the `models` directory.
62 |
63 | Here is an example `sources.yaml` file:
64 |
65 | ```yaml
66 | sources:
67 | - name: mongo-db-us-west-1 # This has to be unique
68 | engine: mongodb
69 | connection:
70 | host: localhost
71 | port: 27117
72 | database: preen
73 | username: root
74 | password: ${MONGO_PASSWORD} # You can also use environment variables.
75 | auth_source: admin
76 | models:
77 | - users
78 | - orders
79 | - products
80 | ```
81 |
82 | Here is an example `models.yaml` file:
83 |
84 | ```yaml
85 | models:
86 | - name: preen-users-model
87 | type: database
88 | query: |
89 | SELECT users.user_id, users.name, users.email FROM users
90 | ```
91 |
92 | You can validate your configuration by running:
93 |
94 | ```bash
95 | preen source validate
96 | ```
97 |
98 | ## Usage
99 |
100 | ### Building Models
101 |
102 | Building a model will fetch the data from the source and save it to the DuckDB database. To build your models, run:
103 |
104 | ```bash
105 | preen model build
106 | ```
107 |
108 | ### Querying Data
109 |
110 | You can query data using the interactive REPL. You can also specify the output format of the data (table, csv, markdown, json).
111 |
112 | ```bash
113 | preen repl
114 |
115 | # Specify output format
116 | preen repl --output-format csv
117 | ```
118 |
119 | For one-off queries, use the `query` command:
120 |
121 | ```bash
122 | preen query "select * from your_model limit 10" --output-format csv
123 | ```
124 |
125 | ## Development
126 |
127 | To set up the development environment:
128 |
129 | 1. Clone the repository
130 | 2. Copy `.env.example` to `.env` and adjust the values as needed
131 | 3. Install dependencies: `go mod tidy`
132 | 4. Run Unit tests: `make test`
133 | 5. Run Integration tests: `make integration-test`
134 | 6. Run linter: `make lint`
135 |
136 | ## License
137 |
138 | This project is licensed under the LGPL-3.0 License. See the [LICENSE](LICENSE) file for details.
139 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/internal/engine/types.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "database/sql/driver"
5 | "encoding/json"
6 | "fmt"
7 | "math"
8 | "net"
9 | "net/netip"
10 | "strconv"
11 | "time"
12 |
13 | "github.com/jackc/pgx/v5/pgtype"
14 | "github.com/marcboeker/go-duckdb"
15 | )
16 |
17 | // Implements the Scanner and Valuer interfaces for custom data types.
18 | // https://pkg.go.dev/database/sql#Scanner
19 |
20 | // duckdbDecimal is a custom type for scanning and valuing float64 values.
21 | // The MySQL driver returns numeric types as strings, so we need to convert them to float64.
22 | // The PG driver returns numeric types as a custom type, so we need to convert them to float64.
23 | type duckdbDecimal float64
24 |
25 | func (d *duckdbDecimal) Scan(s any) error {
26 | switch v := s.(type) {
27 | // The string is from the Snowflake driver.
28 | case string:
29 | Debug(fmt.Sprintf("Scanning duckdbDecimal: %s", v))
30 | if float, err := strconv.ParseFloat(v, 64); err == nil {
31 | *d = duckdbDecimal(float)
32 | } else {
33 | Debug(fmt.Sprintf("Error scanning duckdbDecimal: %s", err))
34 | return fmt.Errorf("error scanning duckdbDecimal: %w", err)
35 | }
36 | // The byte array is from the MySQL driver.
37 | case []byte:
38 | if float, err := strconv.ParseFloat(string(v), 64); err == nil {
39 | *d = duckdbDecimal(float)
40 | } else {
41 | return fmt.Errorf("error scanning duckdbDecimal: %w", err)
42 | }
43 | // The float32 type is from the MySQL driver.
44 | case float32:
45 | *d = duckdbDecimal(v)
46 | // The float64 type is from the MySQL driver.
47 | case float64:
48 | *d = duckdbDecimal(v)
49 | // The numeric type is from the PG driver.
50 | case pgtype.Numeric:
51 | numericType := s.(pgtype.Numeric)
52 | decimal := duckdb.Decimal{Value: numericType.Int, Scale: uint8(math.Abs(float64(numericType.Exp)))}
53 | *d = duckdbDecimal(decimal.Float64())
54 | case nil:
55 | *d = duckdbDecimal(0)
56 | default:
57 | fmt.Printf("type: %T\n", s)
58 | return fmt.Errorf("cannot sql.Scan() duckdbDecimal from: %#v", s)
59 | }
60 | return nil
61 | }
62 |
63 | func (d duckdbDecimal) Value() (driver.Value, error) {
64 | return float64(d), nil
65 | }
66 |
67 | // duckdbTime is a custom type for scanning and valuing time.Time values.
68 | // The PG driver returns time types as a custom type, so we need to convert them to string.
69 | // The database/sql driver doesn't respect time data types.
70 | type duckdbTime string
71 |
72 | func (t *duckdbTime) Scan(s any) error {
73 | switch v := s.(type) {
74 | case pgtype.Time:
75 | timeType := v
76 | // Create a Time object for midnight of the current day
77 | midnight := time.Now().Truncate(24 * time.Hour)
78 | resultTime := midnight.Add(time.Duration(timeType.Microseconds) * time.Microsecond)
79 | *t = duckdbTime(resultTime.String())
80 | case nil:
81 | *t = duckdbTime("")
82 | default:
83 | return fmt.Errorf("cannot sql.Scan() duckdbTime from: %#v", s)
84 | }
85 | return nil
86 | }
87 |
88 | func (t duckdbTime) Value() (driver.Value, error) {
89 | return fmt.Sprint(t), nil
90 | }
91 |
92 | // duckdbDuration is a custom type for scanning and valuing string values.
93 | // The PG driver returns interval types as a custom type, so we need to convert them to string.
94 | // The database/sql driver doesn't respect interval data types.
95 | type duckdbDuration string
96 |
97 | func (d *duckdbDuration) Scan(s any) error {
98 | switch v := s.(type) {
99 | case pgtype.Interval:
100 | stringVal := fmt.Sprintf("Microseconds: %d, Days: %d, Months: %d", v.Microseconds, v.Days, v.Months)
101 | *d = duckdbDuration(stringVal)
102 | case nil:
103 | *d = duckdbDuration("")
104 | default:
105 | return fmt.Errorf("cannot sql.Scan() strfmt.Duration from: %#v", v)
106 | }
107 | return nil
108 | }
109 |
110 | func (d duckdbDuration) Value() (driver.Value, error) {
111 | return string(d), nil
112 | }
113 |
114 | // duckdbNetIPPrefix is a custom type for scanning and valuing netip.Prefix values.
115 | // The PG driver returns inet types as a custom type, so we need to convert them to string.
116 | type duckdbNetIpPrefix string
117 |
118 | func (d *duckdbNetIpPrefix) Scan(s any) error {
119 | switch v := s.(type) {
120 | case netip.Prefix:
121 | *d = duckdbNetIpPrefix(v.String())
122 | case nil:
123 | *d = duckdbNetIpPrefix("")
124 | default:
125 | return fmt.Errorf("cannot sql.Scan() netip.Prefix from: %#v", v)
126 | }
127 | return nil
128 | }
129 |
130 | func (d duckdbNetIpPrefix) Value() (driver.Value, error) {
131 | return string(d), nil
132 | }
133 |
134 | // duckdbHardwareAddr is a custom type for scanning and valuing net.HardwareAddr values.
135 | // The PG driver returns macaddr types as a custom type, so we need to convert them to string.
136 | type duckdbHardwareAddr string
137 |
138 | func (d *duckdbHardwareAddr) Scan(s any) error {
139 | switch v := s.(type) {
140 | case net.HardwareAddr:
141 | *d = duckdbHardwareAddr(v.String())
142 | case nil:
143 | *d = duckdbHardwareAddr("")
144 | default:
145 | return fmt.Errorf("cannot sql.Scan() net.HardwareAddr from: %#v", v)
146 | }
147 | return nil
148 | }
149 |
150 | func (d duckdbHardwareAddr) Value() (driver.Value, error) {
151 | return string(d), nil
152 | }
153 |
154 | // duckdbJSON is a custom type for scanning and valuing json values.
155 | // The PG driver returns json types as a custom type, so we need to convert them to string.
156 | type duckdbJSON string
157 |
158 | func (j *duckdbJSON) Scan(s any) error {
159 | switch v := s.(type) {
160 | case map[string]interface{}, []interface{}:
161 | jsonVal, err := json.Marshal(v)
162 | if err != nil {
163 | return fmt.Errorf("error scanning duckdbJSON: %w", err)
164 | }
165 | *j = duckdbJSON(jsonVal)
166 | case nil:
167 | *j = duckdbJSON("")
168 | default:
169 | return fmt.Errorf("cannot sql.Scan() duckdbJSON from: %#v", v)
170 | }
171 | return nil
172 | }
173 |
174 | func (j duckdbJSON) Value() (driver.Value, error) {
175 | return string(j), nil
176 | }
177 |
178 | // duckdbUUID is a custom type for scanning and valuing UUID values.
179 | // The PG driver returns UUID types as a custom type, so we need to convert them to string.
180 | type duckdbUUID duckdb.UUID
181 |
182 | func (u *duckdbUUID) Scan(s any) error {
183 | switch v := s.(type) {
184 | case [16]uint8:
185 | value := duckdb.UUID(v)
186 | *u = duckdbUUID(value)
187 | case nil:
188 | *u = duckdbUUID(duckdb.UUID([]uint8{}))
189 | default:
190 | return fmt.Errorf("cannot sql.Scan() duckdbUUID from: %#v", v)
191 | }
192 | return nil
193 | }
194 |
195 | func (u duckdbUUID) Value() (driver.Value, error) {
196 | return duckdb.UUID(u), nil
197 | }
198 |
199 | var duckdbTypeMap = map[string]string{
200 | "integer": "integer",
201 | "bigint": "bigint",
202 | "smallint": "smallint",
203 | "mediumint": "integer",
204 | "int": "integer",
205 | "year": "smallint",
206 | "double precision": "double",
207 | "double": "double",
208 | "number": "double", //snowflake
209 | "numeric": "double",
210 | "decimal": "double",
211 | "real": "real",
212 | "float4": "real",
213 | "float": "real",
214 | "boolean": "boolean",
215 | "date": "date",
216 | "timestamp": "timestamp",
217 | "datetime": "timestamp",
218 | "timestamp_tz": "timestamp", //snowflake
219 | "timestamp_ltz": "timestamp", //snowflake
220 | "timestamp_ntz": "timestamp", //snowflake
221 | "timestamp without time zone": "timestamp",
222 | "timestamp with time zone": "timestamp",
223 | "binary": "blob",
224 | "varbinary": "blob",
225 | "tinyblob": "blob",
226 | "blob": "blob",
227 | "mediumblob": "blob",
228 | "longblob": "blob",
229 | "bytea": "blob",
230 | "variant": "blob", // snowflake
231 | "object": "json", // snowflake
232 | "json": "json",
233 | "jsonb": "json",
234 | "inet": "varchar",
235 | "cidr": "varchar",
236 | "macaddr": "varchar",
237 | "array": "json",
238 | "xml": "varchar",
239 | "int4range": "varchar",
240 | "varchar": "varchar",
241 | "tinyint": "tinyint",
242 | "char": "varchar",
243 | "tinytext": "varchar",
244 | "mediumtext": "varchar",
245 | "longtext": "varchar",
246 | "character varying": "varchar",
247 | "text": "varchar",
248 | "character": "varchar",
249 | "enum": "varchar",
250 | "set": "varchar",
251 | "time without time zone": "varchar",
252 | "time": "varchar",
253 | "interval": "varchar",
254 | "uuid": "uuid",
255 | }
256 |
--------------------------------------------------------------------------------
/internal/engine/models.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "path/filepath"
7 | "slices"
8 | "strings"
9 |
10 | "github.com/preendata/sqlparser"
11 | yaml "gopkg.in/yaml.v3"
12 | )
13 |
14 | type ModelName string
15 |
16 | type Type struct {
17 | Name string `yaml:"name"`
18 | Type string `yaml:"type"`
19 | }
20 |
21 | type Options struct {
22 | AllVarchar *bool `default:"false" yaml:"all_varchar"`
23 | AllowQuotedNulls *bool `default:"true" yaml:"allow_quoted_nulls"`
24 | AutoDetect *bool `default:"true" yaml:"auto_detect"`
25 | AutoTypeCandidates *[]string `default:"-" yaml:"auto_type_candidates"`
26 | Columns *[]Type `default:"-" yaml:"columns"`
27 | Compression *string `default:"auto" yaml:"compression"`
28 | DateFormat *string `default:"-" yaml:"date_format"`
29 | DecimalSeparator *string `default:"." yaml:"decimal_separator"`
30 | Delim *string `default:"," yaml:"delim"`
31 | Escape *string `default:"\"" yaml:"escape"`
32 | FileName *bool `default:"false" yaml:"filename"`
33 | ForceNotNull *[]string `default:"[]" yaml:"force_not_null"`
34 | Header *bool `default:"false" yaml:"header"`
35 | HivePartitioning *bool `default:"false" yaml:"hive_partitioning"`
36 | IgnoreErrors *bool `default:"false" yaml:"ignore_errors"`
37 | MaxLineSize *int64 `default:"2097152" yaml:"max_line_size"`
38 | Names *[]string `default:"-" yaml:"names"`
39 | NewLine *string `default:"-" yaml:"new_line"`
40 | NormalizeNames *bool `default:"false" yaml:"normalize_names"`
41 | NullPadding *bool `default:"false" yaml:"null_padding"`
42 | NullString *[]string `default:"-" yaml:"null_string"`
43 | Parallel *bool `default:"true" yaml:"parallel"`
44 | Quote *string `default:"\"" yaml:"quote"`
45 | SampleSize *int64 `default:"20480" yaml:"sample_size"`
46 | Skip *int64 `default:"0" yaml:"skip"`
47 | TimestampFormat *string `default:"-" yaml:"timestamp_format"`
48 | Types *[]Type `default:"-" yaml:"types"`
49 | UnionByName *bool `default:"false" yaml:"union_by_name"`
50 | }
51 |
52 | type Model struct {
53 | Name ModelName `yaml:"name"`
54 | Type string `yaml:"type"`
55 | Format string `yaml:"format"`
56 | Options Options `yaml:"options"`
57 | Query string `yaml:"query"`
58 | FilePatterns *[]string `yaml:"file_patterns"`
59 | Collection string `yaml:"collection"`
60 | Parsed sqlparser.Statement
61 | DDLString string
62 | Columns map[TableName]map[ColumnName]Column
63 | TableMap TableMap
64 | TableSet TableSet
65 | }
66 |
67 | type ModelConfig struct {
68 | Models []*Model `yaml:"models"`
69 | Env *Env `yaml:"-"`
70 | }
71 |
72 | // Models can be defined in a models.yaml file in the preen config directory.
73 | // Models can also be defined in individual .yaml files in the preen models directory.
74 |
75 | func GetModelConfigs(modelTarget string) (*ModelConfig, error) {
76 | mc := ModelConfig{}
77 | env, err := EnvInit()
78 | if err != nil {
79 | return nil, fmt.Errorf("error initializing environment: %w", err)
80 | }
81 | mc.Env = env
82 |
83 | configFilePath := getYmlorYamlPath(mc.Env.PreenConfigPath, "models")
84 | modelsDir := mc.Env.PreenModelsPath
85 |
86 | // Check if a models.yaml file exists in the config directory.
87 | // If it does, parse it.
88 | if _, err = os.Stat(configFilePath); err == nil {
89 | err = parseModelsYamlFile(configFilePath, &mc)
90 | if err != nil {
91 | return nil, fmt.Errorf("error parsing models.yaml file: %w", err)
92 | }
93 | }
94 |
95 | // Process any .yaml files in the models directory
96 | err = parseModelDirectoryFiles(modelsDir, modelTarget, &mc)
97 | if err != nil {
98 | return nil, fmt.Errorf("error parsing models directory: %w", err)
99 | }
100 |
101 | // If no models are detected, return an error
102 | if len(mc.Models) == 0 {
103 | return nil, fmt.Errorf(
104 | "no models detected in %s/models.yaml file or %s directory",
105 | mc.Env.PreenConfigPath, mc.Env.PreenModelsPath,
106 | )
107 | }
108 |
109 | // Override config with environment variables
110 | fromEnv(&mc)
111 | if err = parseModels(&mc); err != nil {
112 | return nil, fmt.Errorf("error parsing models: %w", err)
113 | }
114 |
115 | if err = ParseModelTables(&mc); err != nil {
116 | return nil, fmt.Errorf("error parsing model tables: %w", err)
117 | }
118 |
119 | return &mc, nil
120 | }
121 |
122 | // This is the main entry point for building models. The CLI commands call this function.
123 | func BuildModels(sc *SourceConfig, mc *ModelConfig) error {
124 | if err := BuildMetadata(sc, mc); err != nil {
125 | return fmt.Errorf("error building information schema: %w", err)
126 | }
127 |
128 | if err := removeUnusedModels(sc, mc); err != nil {
129 | return fmt.Errorf("error removing unused models: %w", err)
130 | }
131 |
132 | columnMetadata, err := BuildColumnMetadata()
133 | if err != nil {
134 | return fmt.Errorf("error building column metadata: %w", err)
135 | }
136 |
137 | if err = ParseModelColumns(mc, columnMetadata); err != nil {
138 | return fmt.Errorf("error parsing model columns: %w", err)
139 | }
140 |
141 | if err = buildDuckDBTables(mc); err != nil {
142 | return fmt.Errorf("error building model tables: %w", err)
143 | }
144 |
145 | Info(fmt.Sprintf("Fetching data from %d configured sources", len(sc.Sources)))
146 | if err = Retrieve(sc, mc); err != nil {
147 | return fmt.Errorf("error retrieving data: %w", err)
148 | }
149 |
150 | return nil
151 | }
152 |
153 | // Parse the models.yaml file in the preen config directory. This file can contain multiple models.
154 | // It is optional, but if it exists, it will be parsed.
155 | func parseModelsYamlFile(filePath string, mc *ModelConfig) error {
156 | file, err := os.ReadFile(filePath)
157 | if err != nil {
158 | return fmt.Errorf("failed to read model file: %w", err)
159 | }
160 |
161 | if err = yaml.Unmarshal(file, &mc); err != nil {
162 | return fmt.Errorf("failed to parse model file: %w", err)
163 | }
164 |
165 | return nil
166 | }
167 |
168 | // Parse the models directory which is supplied as a possible environment value.
169 | // The modelTarget is the user input prefix of any model files that should be used.
170 | // Each .yaml file in this directory is a model.
171 | func parseModelDirectoryFiles(modelsDir string, modelTarget string, mc *ModelConfig) error {
172 | _, err := os.ReadDir(modelsDir)
173 | if err != nil {
174 | return fmt.Errorf("failed to read models directory: %w", err)
175 | }
176 |
177 | err = filepath.WalkDir(modelsDir, func(path string, d os.DirEntry, err error) error {
178 | if err != nil {
179 | return fmt.Errorf("error walking directory: %w", err)
180 | }
181 |
182 | if d.IsDir() {
183 | return nil
184 | }
185 | if (strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml")) &&
186 | (modelTarget == "" || strings.HasPrefix(path, filepath.Join(modelsDir, modelTarget))) {
187 |
188 | file, err := os.ReadFile(path)
189 | if err != nil {
190 | return fmt.Errorf("error reading model file %s: %w", path, err)
191 | }
192 | m := Model{}
193 | err = yaml.Unmarshal(file, &m)
194 | if err != nil {
195 | return fmt.Errorf("error parsing model file %s: %w", path, err)
196 | }
197 | if m.Name != "" {
198 | mc.Models = append(mc.Models, &m)
199 | } else {
200 | Warn(fmt.Sprintf("Unrecognized model file %s: no model name detected", path))
201 | }
202 | }
203 | return nil
204 | })
205 | if err != nil {
206 | return fmt.Errorf("error parsing model directory: %w", err)
207 | }
208 | return nil
209 | }
210 |
211 | // Parse the models and create a parsed version of the model's required fields.
212 | // This is where the SQL models are parsed into ASTs.
213 | // This is where the file models are validated.
214 | func parseModels(mc *ModelConfig) error {
215 | for modelName, model := range mc.Models {
216 | switch model.Type {
217 | case "database":
218 | // Database models require a query
219 | if model.Query == "" {
220 | return fmt.Errorf("error parsing database model %v: query required", modelName)
221 | }
222 | // If the query is a SELECT statement, parse it
223 | if strings.HasPrefix(strings.ToLower(model.Query), "select") {
224 | stmt, err := sqlparser.Parse(model.Query)
225 | if err != nil {
226 | return fmt.Errorf("error parsing sql model %v: %w", modelName, err)
227 | }
228 | model.Parsed = stmt
229 | mc.Models[modelName] = model
230 | // If the query is not a SELECT statement, set the parsed statement to nil
231 | } else {
232 | model.Parsed = nil
233 | mc.Models[modelName] = model
234 | }
235 | case "file":
236 | if model.FilePatterns == nil {
237 | return fmt.Errorf("error parsing file model %v: file_pattern required", modelName)
238 | }
239 | }
240 | }
241 | return nil
242 | }
243 |
244 | // Create each model's destination table in DuckDB
245 | func buildDuckDBTables(mc *ModelConfig) error {
246 | for _, model := range mc.Models {
247 | switch model.Type {
248 | case "database":
249 | Debug(fmt.Sprintf("Creating table %s", model.Name))
250 | tableName := strings.ReplaceAll(string(model.Name), "-", "_")
251 | createTableStmt := fmt.Sprintf("create or replace table main.%s (%s);", tableName, model.DDLString)
252 | if err := ddbExec(createTableStmt); err != nil {
253 | return fmt.Errorf("error creating table %s: %w", tableName, err)
254 | }
255 | case "file":
256 | Debug("Tables for file models will be created on model retrieval")
257 | }
258 | }
259 | return nil
260 | }
261 |
262 | // If a model file is referenced in a source, but no model file exists, return an error.
263 | func errorOnMissingModels(sc *SourceConfig, mc *ModelConfig) error {
264 | missingModels := make([]string, 0)
265 | for _, source := range sc.Sources {
266 | for _, modelName := range source.Models {
267 | modelFound := false
268 | for _, model := range mc.Models {
269 | if model.Name == ModelName(modelName) {
270 | modelFound = true
271 | break
272 | }
273 | }
274 | if !modelFound && !slices.Contains(missingModels, string(modelName)) {
275 | missingModels = append(missingModels, string(modelName))
276 | }
277 | }
278 | }
279 | if len(missingModels) > 0 {
280 | return fmt.Errorf("no model file detected for models: %s", strings.Join(missingModels, ", "))
281 | }
282 | return nil
283 | }
284 |
285 | // Remove unused models from ModelConfig. If a model is not referenced in any source, it is unused.
286 | func removeUnusedModels(sc *SourceConfig, mc *ModelConfig) error {
287 | usedModels := make([]string, 0)
288 | for _, source := range sc.Sources {
289 | for _, modelName := range source.Models {
290 | usedModels = append(usedModels, string(modelName))
291 | }
292 | }
293 |
294 | for i, model := range mc.Models {
295 | if !slices.Contains(usedModels, string(model.Name)) {
296 | Info(fmt.Sprintf("Removing unused model: %s", model.Name))
297 | mc.Models = append(mc.Models[:i], mc.Models[i+1:]...)
298 | }
299 | }
300 |
301 | return nil
302 | }
303 |
--------------------------------------------------------------------------------
/internal/engine/metadata.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "context"
5 | "database/sql/driver"
6 | "fmt"
7 | "slices"
8 | "strings"
9 |
10 | "github.com/aws/aws-sdk-go-v2/config"
11 | "github.com/aws/aws-sdk-go-v2/service/s3"
12 | "golang.org/x/sync/errgroup"
13 | )
14 |
15 | // BuildMetadata builds any required metadata for the sources in the sources.yaml config.
16 | // Postgres and MySQL sources require an information schema to be built.
17 | // S3 sources require duckDB secrets to be stored.
18 | func BuildMetadata(sc *SourceConfig, mc *ModelConfig) error {
19 | // Ensure info schema table exists
20 | if err := prepareDDBInformationSchema(); err != nil {
21 | return err
22 | }
23 |
24 | // Reuse the insert function to insert data to the information schema
25 | ic := make(chan []driver.Value, 10)
26 | dc := make(chan []int64)
27 |
28 | go Insert("preen_information_schema", ic, dc)
29 |
30 | // Group sources by engine to distribute across specific engine handlers
31 | preenSourcesByEngine := groupSourceByEngine(sc)
32 |
33 | sourceErrGroup := new(errgroup.Group)
34 |
35 | for engine, sources := range preenSourcesByEngine {
36 | sourceErrGroup.Go(func() error {
37 | switch engine {
38 | case "postgres":
39 | if err := buildPostgresInformationSchema(sources, ic, mc); err != nil {
40 | return fmt.Errorf("error building postgres information schema: %w", err)
41 | }
42 | case "mysql":
43 | if err := buildMySQLInformationSchema(sources, ic, mc); err != nil {
44 | return fmt.Errorf("error building mysql information schema: %w", err)
45 | }
46 | case "snowflake":
47 | if err := buildSnowflakeInformationSchema(sources, ic, mc); err != nil {
48 | return fmt.Errorf("error building snowflake information schema: %w", err)
49 | }
50 | case "mongodb":
51 | Debug("No information schema required for MongoDB")
52 | case "s3":
53 | if len(sources) > 1 {
54 | return fmt.Errorf("only one s3 source is supported")
55 | }
56 | if err := buildS3Secrets(sources[0]); err != nil {
57 | return fmt.Errorf("error configuring s3 access: %w", err)
58 | }
59 | if err := confirmS3Connection(sources[0]); err != nil {
60 | return fmt.Errorf("error confirming s3 objects: %w", err)
61 | }
62 | default:
63 | return fmt.Errorf("unsupported engine: %s", engine)
64 | }
65 |
66 | return nil
67 | })
68 | }
69 |
70 | if err := sourceErrGroup.Wait(); err != nil {
71 | return err
72 | }
73 | ic <- []driver.Value{"quit"}
74 | ConfirmInsert("preen_information_schema", dc, 0)
75 | Info("Metadata build completed successfully")
76 |
77 | return nil
78 | }
79 |
80 | // buildS3Secrets builds the secrets for all s3 sources in the config
81 | // This is required to access the S3 bucket, https://duckdb.org/docs/extensions/httpfs/s3api.html
82 | func buildS3Secrets(s Source) error {
83 | query := fmt.Sprintf(`
84 | install aws;
85 | load aws;
86 | create or replace persistent secret aws_s3 (
87 | type S3,
88 | region '%s',
89 | provider CREDENTIAL_CHAIN
90 | )
91 | `, s.Connection.Region)
92 | if err := ddbExec(query); err != nil {
93 | return err
94 | }
95 | return nil
96 | }
97 |
98 | // confirmS3Connection confirms that the S3 connection is working,
99 | // and that at least one object is present inside the bucket.
100 | func confirmS3Connection(s Source) error {
101 | ctx := context.Background()
102 | cfg, err := config.LoadDefaultConfig(
103 | ctx,
104 | config.WithRegion(s.Connection.Region),
105 | )
106 | if err != nil {
107 | return fmt.Errorf("error loading default config: %w", err)
108 | }
109 |
110 | s3Client := s3.NewFromConfig(cfg)
111 | input := &s3.ListObjectsV2Input{
112 | Bucket: &s.Connection.BucketName,
113 | }
114 |
115 | result, err := s3Client.ListObjectsV2(ctx, input)
116 | if err != nil {
117 | return fmt.Errorf("unable to list items in bucket %q: %w", s.Connection.BucketName, err)
118 | }
119 | if len(result.Contents) == 0 {
120 | return fmt.Errorf("no objects found in bucket %q", s.Connection.BucketName)
121 | } else {
122 | Debug(fmt.Sprintf("Found %d objects in bucket %q", len(result.Contents), s.Connection.BucketName))
123 | }
124 | return nil
125 | }
126 |
127 | // buildMySQLInformationSchema builds the information schema for all mysql sources in the config
128 | func buildMySQLInformationSchema(sources []Source, ic chan<- []driver.Value, mc *ModelConfig) error {
129 | schemaErrGroup := new(errgroup.Group)
130 |
131 | for _, source := range sources {
132 | err := func(source Source) error {
133 | schemaErrGroup.Go(func() error {
134 | // Open new pool for every source
135 | pool, err := GetMysqlPoolFromSource(source)
136 | if err != nil {
137 | return err
138 | }
139 |
140 | defer pool.Close()
141 |
142 | // Iterate over all models and get the tables for each model
143 | for _, model := range mc.Models {
144 | if model.Type == "database" && model.Parsed != nil && slices.Contains(source.Models, string(model.Name)) {
145 | tablesQueryString := ""
146 | for _, tableName := range model.TableSet {
147 | if tablesQueryString != "" {
148 | tablesQueryString += fmt.Sprintf(",'%s'", tableName)
149 | } else {
150 | tablesQueryString += fmt.Sprintf("'%s'", tableName)
151 | }
152 | }
153 |
154 | // MySQL does not have schemas, so we use the database name
155 | schema := source.Connection.Database
156 |
157 | query := fmt.Sprintf(`
158 | select table_name, column_name, data_type from information_schema.columns
159 | where table_schema = '%s' and table_name in (%s);
160 | `, schema, tablesQueryString)
161 |
162 | rows, err := pool.Query(query)
163 | if err != nil {
164 | return err
165 | }
166 |
167 | defer rows.Close()
168 |
169 | for rows.Next() {
170 | var table_name string
171 | var column_name string
172 | var data_type string
173 | err = rows.Scan(&table_name, &column_name, &data_type)
174 |
175 | if err != nil {
176 | return err
177 | }
178 | ic <- []driver.Value{source.Name, string(model.Name), table_name, column_name, data_type}
179 | }
180 | }
181 | }
182 | return nil
183 | })
184 | return nil
185 | }(source)
186 | if err != nil {
187 | return err
188 | }
189 | }
190 | if err := schemaErrGroup.Wait(); err != nil {
191 | return err
192 | }
193 |
194 | return nil
195 | }
196 |
197 | // buildSnowflakeInformationSchema builds the information schema for all snowflake sources in the config
198 | func buildSnowflakeInformationSchema(sources []Source, ic chan<- []driver.Value, mc *ModelConfig) error {
199 | schemaErrGroup := new(errgroup.Group)
200 |
201 | for _, source := range sources {
202 | schemaErrGroup.Go(func() error {
203 | pool, err := getSnowflakePoolFromSource(source)
204 | if err != nil {
205 | return err
206 | }
207 | defer pool.Close()
208 | schema := "'PUBLIC'"
209 |
210 | for _, model := range mc.Models {
211 | if model.Type == "database" && model.Parsed != nil && slices.Contains(source.Models, string(model.Name)) {
212 | tablesQueryString := ""
213 | for _, tableName := range model.TableSet {
214 | if tablesQueryString != "" {
215 | tablesQueryString += fmt.Sprintf(",'%s'", tableName)
216 | } else {
217 | tablesQueryString += fmt.Sprintf("'%s'", tableName)
218 | }
219 | }
220 |
221 | query := fmt.Sprintf(`
222 | select table_name, column_name, data_type from %s.information_schema.columns
223 | where TABLE_SCHEMA = upper(%s) and table_name = upper(%s);
224 | `, source.Connection.Database, schema, tablesQueryString)
225 | rows, err := pool.Query(query)
226 | if err != nil {
227 | return err
228 | }
229 |
230 | defer rows.Close()
231 |
232 | for rows.Next() {
233 | var table_name string
234 | var column_name string
235 | var data_type string
236 | err = rows.Scan(&table_name, &column_name, &data_type)
237 |
238 | if err != nil {
239 | return err
240 | }
241 | ic <- []driver.Value{source.Name, string(model.Name), table_name, column_name, data_type}
242 | }
243 | }
244 | }
245 | return nil
246 | })
247 | }
248 | if err := schemaErrGroup.Wait(); err != nil {
249 | return err
250 | }
251 |
252 | return nil
253 | }
254 |
255 | // buildPostgresInformationSchema builds the information schema for all postgres sources in the config
256 | func buildPostgresInformationSchema(sources []Source, ic chan<- []driver.Value, mc *ModelConfig) error {
257 | schemaErrGroup := new(errgroup.Group)
258 |
259 | for _, source := range sources {
260 | err := func(source Source) error {
261 | schemaErrGroup.Go(func() error {
262 | // Open new pool for every source
263 | pool, err := getPostgresPoolFromSource(source)
264 | if err != nil {
265 | return err
266 | }
267 |
268 | defer pool.Close()
269 | schema := "public"
270 |
271 | // Iterate over all models and get the tables for each model
272 | for _, model := range mc.Models {
273 | if model.Type == "database" && model.Parsed != nil && slices.Contains(source.Models, string(model.Name)) {
274 | tablesQueryString := ""
275 | for _, tableName := range model.TableSet {
276 | if tablesQueryString != "" {
277 | tablesQueryString += fmt.Sprintf(",'%s'", tableName)
278 | } else {
279 | tablesQueryString += fmt.Sprintf("'%s'", tableName)
280 | }
281 | }
282 |
283 | query := fmt.Sprintf(`
284 | select table_name, column_name, data_type from information_schema.columns
285 | where table_schema = '%s' and table_name in (%s);
286 | `, schema, tablesQueryString)
287 |
288 | rows, err := pool.Query(context.Background(), query)
289 | if err != nil {
290 | return fmt.Errorf("error querying postgres information schema: %w", err)
291 | }
292 |
293 | defer rows.Close()
294 |
295 | for rows.Next() {
296 | values, err := rows.Values()
297 | if err != nil {
298 | return err
299 | }
300 | ic <- []driver.Value{source.Name, string(model.Name), values[0], values[1], values[2]}
301 | }
302 | }
303 | }
304 | return nil
305 | })
306 | return nil
307 | }(source)
308 | if err != nil {
309 | return err
310 | }
311 | }
312 | if err := schemaErrGroup.Wait(); err != nil {
313 | return err
314 | }
315 |
316 | return nil
317 | }
318 |
319 | // groupSourceByEngine reduces the raw config.Sources into a map of engine -> sources
320 | func groupSourceByEngine(sc *SourceConfig) map[string][]Source {
321 | engines := make(map[string][]Source)
322 | for _, source := range sc.Sources {
323 | engines[source.Engine] = append(engines[source.Engine], source)
324 | }
325 |
326 | return engines
327 | }
328 |
329 | // prepareDDBInformationSchema creates the table for the information schema in duckDB
330 | func prepareDDBInformationSchema() error {
331 | informationSchemaColumnNames := []string{"source_name varchar", "model_name varchar", "table_name varchar", "column_name varchar", "data_type varchar"}
332 | informationSchemaTableName := "main.preen_information_schema"
333 | Debug(fmt.Sprintf("Creating table %s", informationSchemaTableName))
334 | err := ddbExec(fmt.Sprintf("create or replace table %s (%s)", informationSchemaTableName, strings.Join(informationSchemaColumnNames, ", ")))
335 | if err != nil {
336 | return err
337 | }
338 |
339 | return nil
340 | }
341 |
--------------------------------------------------------------------------------
/internal/engine/columns.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "strings"
7 |
8 | "github.com/preendata/sqlparser"
9 | )
10 |
11 | type FuncName string
12 |
13 | type Column struct {
14 | TableName *TableName
15 | ModelName ModelName
16 | FuncName FuncName
17 | IsJoin bool
18 | Position int
19 | Alias string
20 | }
21 |
22 | type columnParser struct {
23 | columns map[TableName]map[ColumnName]Column
24 | ddlString string
25 | tableName TableName
26 | modelName ModelName
27 | selectIdx int
28 | columnMetadata ColumnMetadata
29 | }
30 |
31 | type TableName string
32 | type ColumnName string
33 | type MajorityType string
34 | type ColumnType struct {
35 | // Types is a slice of every data type found for a column from its sources
36 | Types []string `json:"types"`
37 | MajorityType MajorityType `json:"majority_type"`
38 | }
39 |
40 | type ColumnMetadata map[TableName]map[ColumnName]ColumnType
41 |
42 | // BuildColumnMetadata does 2 things:
43 | // 1) Acts as the interface between information schema data stored in DuckDB and the parts of the application that will
44 | // need to consume that data, in particular the model builder
45 | // 2) Performs type validation against each column pulled from the source databases, via the Boyer-Moore majority voting
46 | // algorithm. This majority type is then packaged into the ColumnMetadata and return to the caller. This is important
47 | // for typing the model tables created in DuckDB
48 | func BuildColumnMetadata() (ColumnMetadata, error) {
49 | // query data from preen_information_schema
50 | results, err := Execute("SELECT column_name, data_type, table_name FROM preen_information_schema")
51 | if err != nil {
52 | return nil, err
53 | }
54 |
55 | columnMetadata := buildColumnMetadataDataStructure(&results.Rows)
56 | // For each column in each table as sourced from InformationSchema, determine the majority type
57 | for tableName, tableStruct := range columnMetadata {
58 | for columnName, columnStruct := range tableStruct {
59 | majorityType, err := identifyMajorityType(columnName, columnStruct.Types)
60 | if err != nil {
61 | return nil, err
62 | }
63 | columnMetadata[tableName][columnName] = ColumnType{
64 | Types: columnStruct.Types,
65 | MajorityType: majorityType,
66 | }
67 | }
68 | }
69 |
70 | return columnMetadata, nil
71 | }
72 |
73 | // Rearranges the result set from the information schema to make it easier to process for the majority type calculator
74 | func buildColumnMetadataDataStructure(rows *[]map[string]any) ColumnMetadata {
75 | columnMetadata := make(ColumnMetadata)
76 |
77 | for _, row := range *rows {
78 |
79 | // Runtime panic waiting to happen. This depends on the information schema being built correctly and only with
80 | // type string
81 | tableName := TableName(row["table_name"].(string))
82 | columnName := ColumnName(row["column_name"].(string))
83 | dataType := (row["data_type"].(string))
84 | // Create table map if not exists
85 | _, exists := columnMetadata[tableName]
86 | if !exists {
87 | columnMetadata[tableName] = make(map[ColumnName]ColumnType)
88 | }
89 |
90 | // Create column map if not exists
91 | _, exists = columnMetadata[tableName][columnName]
92 | if !exists {
93 | columnMetadata[tableName][columnName] = ColumnType{
94 | Types: make([]string, 0),
95 | }
96 | }
97 |
98 | // Append data type to column map
99 | localTypesCopy := append(columnMetadata[tableName][columnName].Types, dataType)
100 | columnMetadata[tableName][columnName] = ColumnType{
101 | Types: localTypesCopy,
102 | }
103 |
104 | }
105 |
106 | return columnMetadata
107 | }
108 |
109 | // Select majority type of input column via Boyer-Moore majority vote algorithm
110 | func identifyMajorityType(columnName ColumnName, types []string) (MajorityType, error) {
111 | // Implement Boyer-Moore majority vote algorithm
112 | var majority MajorityType
113 | votes := 0
114 |
115 | for _, candidate := range types {
116 | mtCandidate := MajorityType(candidate)
117 | if votes == 0 {
118 | majority = mtCandidate
119 | }
120 | if mtCandidate == majority {
121 | votes++
122 | } else {
123 | votes--
124 | }
125 | }
126 |
127 | count := 0
128 |
129 | // Checking if majority candidate occurs more than n/2 times
130 | for _, candidate := range types {
131 | if MajorityType(candidate) == majority {
132 | count += 1
133 | }
134 | }
135 | if majority == "" {
136 | Warn(
137 | fmt.Sprintf("Column: '%s' is missing from majority of tables!", columnName),
138 | )
139 | } else if count > len(types)/2 && count == len(types) {
140 | Debug(
141 | fmt.Sprintf("Data type for column '%s' is: %s", columnName, majority),
142 | )
143 | return majority, nil
144 |
145 | } else if count > len(types)/2 && count != len(types) {
146 | Warn(
147 | fmt.Sprintf("Discrepancy in data types for column '%s'! Using majority data type of %s", columnName, majority),
148 | )
149 | return majority, nil
150 | }
151 |
152 | Warn(
153 | fmt.Sprintf("No majority data type found for column '%s'!", columnName),
154 | )
155 | // This needs to be made unreachable
156 | return "unknown", fmt.Errorf("no majority data type found for column '%s'", columnName)
157 | }
158 |
159 | func ParseModelColumns(mc *ModelConfig, columnMetadata ColumnMetadata) error {
160 | cp := columnParser{
161 | columns: make(map[TableName]map[ColumnName]Column),
162 | columnMetadata: columnMetadata,
163 | }
164 | for _, model := range mc.Models {
165 | switch model.Type {
166 | case "database":
167 | if model.Parsed == nil {
168 | if err := parseNoSQLDatabaseModelColumns(model, &cp); err != nil {
169 | return fmt.Errorf("error parsing noSQL database model columns: %w", err)
170 | }
171 | } else {
172 | if err := parseSQLDatabaseModelColumns(model, &cp); err != nil {
173 | return fmt.Errorf("error parsing SQL database model columns: %w", err)
174 | }
175 | }
176 | case "file":
177 | Debug("no columns to parse for file model")
178 | default:
179 | return fmt.Errorf("model type %s not supported", model.Type)
180 | }
181 | model.Columns = cp.columns
182 | model.DDLString = cp.ddlString
183 | }
184 |
185 | return nil
186 | }
187 |
188 | func parseSQLDatabaseModelColumns(model *Model, cp *columnParser) error {
189 | cp.ddlString = "preen_source_name varchar"
190 | selectStmt := model.Parsed.(*sqlparser.Select)
191 | for selectIdx := range selectStmt.SelectExprs {
192 | cp.selectIdx = selectIdx
193 | switch expr := selectStmt.SelectExprs[selectIdx].(type) {
194 | case *sqlparser.AliasedExpr:
195 | switch expr.Expr.(type) {
196 | // Process normal column.
197 | case *sqlparser.ColName:
198 | tableAlias := expr.Expr.(*sqlparser.ColName).Qualifier.Name.String()
199 | cp.tableName = model.TableMap[TableAlias(tableAlias)]
200 | if err := processModelColumn(expr, cp); err != nil {
201 | return err
202 | }
203 | // Process function expression column.
204 | case *sqlparser.FuncExpr:
205 | cp.tableName = "model_generated"
206 | if err := processFunction(expr, cp); err != nil {
207 | return err
208 | }
209 | // Process case expression column
210 | case *sqlparser.CaseExpr:
211 | cp.tableName = "model_generated"
212 | if err := processCase(expr, cp); err != nil {
213 | return err
214 | }
215 | // Process cast expression column
216 | case *sqlparser.ConvertExpr:
217 | tableAlias := expr.Expr.(*sqlparser.ConvertExpr).Expr.(*sqlparser.ColName).Qualifier.Name.String()
218 | cp.tableName = model.TableMap[TableAlias(tableAlias)]
219 | if err := processConvertColumn(expr, cp); err != nil {
220 | return err
221 | }
222 | }
223 | case *sqlparser.StarExpr:
224 | return errors.New("star expressions are not supported. please specify columns explicitly")
225 | }
226 | }
227 | return nil
228 | }
229 |
230 | func parseNoSQLDatabaseModelColumns(model *Model, cp *columnParser) error {
231 | cp.modelName = ModelName(model.Name)
232 | cp.tableName = TableName(model.Name)
233 | cp.ddlString = "preen_source_name varchar, document json"
234 | cp.columns[cp.tableName] = make(map[ColumnName]Column)
235 | sourceColumn := Column{
236 | ModelName: model.Name,
237 | TableName: &cp.tableName,
238 | IsJoin: false,
239 | Position: 0,
240 | Alias: "preen_source_name",
241 | }
242 | sourceColumnHashKey := ColumnName(fmt.Sprintf("%s.preen_source_name", model.Name))
243 | cp.columns[cp.tableName][sourceColumnHashKey] = sourceColumn
244 | documentColumn := Column{
245 | ModelName: model.Name,
246 | TableName: &cp.tableName,
247 | IsJoin: false,
248 | Position: 1,
249 | Alias: "document",
250 | }
251 | documentColumnHashKey := ColumnName(fmt.Sprintf("%s.document", model.Name))
252 | cp.columns[cp.tableName][documentColumnHashKey] = documentColumn
253 |
254 | return nil
255 | }
256 |
257 | func processModelColumn(expr *sqlparser.AliasedExpr, cp *columnParser) error {
258 | // We require fully qualified column names, i.e. table.column, users.user_id.
259 | if expr.Expr.(*sqlparser.ColName).Qualifier.Name.String() == "" {
260 | return errors.New("column names must be fully qualified, e.g. table.column")
261 | }
262 | if _, ok := cp.columns[cp.tableName]; !ok {
263 | cp.columns[cp.tableName] = make(map[ColumnName]Column)
264 | }
265 |
266 | col := Column{
267 | TableName: &cp.tableName,
268 | Position: cp.selectIdx,
269 | }
270 | if expr.As.String() != "" {
271 | col.Alias = expr.As.String()
272 | } else {
273 | col.Alias = expr.Expr.(*sqlparser.ColName).Name.String()
274 | }
275 | colName := expr.Expr.(*sqlparser.ColName).Name.String()
276 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, colName)
277 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col
278 |
279 | // Check to see if the table and column exists in the columnMetadata structure
280 | // If it does not exist, then we return an error since we are unable to determine
281 | // the appropriate data type.
282 | if _, ok := cp.columnMetadata[TableName(cp.tableName)][ColumnName(colName)]; !ok {
283 | return fmt.Errorf("column not found in table: %s.%s. check that your model query is valid", cp.tableName, colName)
284 | }
285 |
286 | // Look up the data type and append it to the table creation DDL string.
287 | // ToLower is necessary because Snowflake is an upper case-aholic
288 | colType := duckdbTypeMap[strings.ToLower(string(cp.columnMetadata[TableName(cp.tableName)][ColumnName(colName)].MajorityType))]
289 | if colType == "" {
290 | return fmt.Errorf("data type not found for column: %s.%s", cp.tableName, colName)
291 | }
292 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, colType)
293 |
294 | return nil
295 | }
296 |
297 | func processFunction(expr *sqlparser.AliasedExpr, cp *columnParser) error {
298 | funcExpr := expr.Expr.(*sqlparser.FuncExpr)
299 | if _, ok := cp.columns[cp.tableName]; !ok {
300 | cp.columns[cp.tableName] = make(map[ColumnName]Column)
301 | }
302 | col := Column{
303 | TableName: &cp.tableName,
304 | FuncName: FuncName(funcExpr.Name.String()),
305 | Position: cp.selectIdx,
306 | }
307 | if expr.As.String() != "" {
308 | col.Alias = expr.As.String()
309 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias)
310 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col
311 | } else {
312 | col.Alias = fmt.Sprintf("\"%s\"", sqlparser.String(expr))
313 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias)
314 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col
315 | }
316 |
317 | switch col.FuncName {
318 | // Count always returns an integer type
319 | case "count":
320 | cp.ddlString = fmt.Sprintf("%s, %s bigint", cp.ddlString, col.Alias)
321 | // Average always returns a double
322 | case "avg":
323 | cp.ddlString = fmt.Sprintf("%s, %s double", cp.ddlString, col.Alias)
324 | // Look up the data type of the column inside the function and use that data type
325 | default:
326 | selectExpr := funcExpr.Exprs[0].(*sqlparser.AliasedExpr).Expr
327 | colName := selectExpr.(*sqlparser.ColName).Name.String()
328 | tableName := TableName(selectExpr.(*sqlparser.ColName).Qualifier.Name.String())
329 | if _, ok := cp.columnMetadata[tableName][ColumnName(colName)]; !ok {
330 | return fmt.Errorf("column not found in table: %s.%s. check that your model query is valid", cp.tableName, colName)
331 | }
332 | colType := duckdbTypeMap[string(cp.columnMetadata[tableName][ColumnName(colName)].MajorityType)]
333 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, colType)
334 | }
335 |
336 | return nil
337 | }
338 |
339 | func processCase(expr *sqlparser.AliasedExpr, cp *columnParser) error {
340 | if _, ok := cp.columns[cp.tableName]; !ok {
341 | cp.columns[cp.tableName] = make(map[ColumnName]Column)
342 | }
343 | col := Column{
344 | TableName: &cp.tableName,
345 | Position: cp.selectIdx,
346 | }
347 |
348 | if expr.As.String() != "" {
349 | col.Alias = expr.As.String()
350 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias)
351 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col
352 | } else {
353 | col.Alias = fmt.Sprintf("\"%s\"", sqlparser.String(expr))
354 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias)
355 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col
356 | }
357 |
358 | colType := new(string)
359 | whens := expr.Expr.(*sqlparser.CaseExpr).Whens
360 | // Check the first when clause to determine the data type of the column
361 | // If any of the when clauses have a different data type, then the SQL engine
362 | // will throw an error for us.
363 | switch expr := whens[0].Val.(type) {
364 | case sqlparser.BoolVal:
365 | *colType = "boolean"
366 | case *sqlparser.SQLVal:
367 | switch expr.Type {
368 | case sqlparser.IntVal:
369 | *colType = "bigint"
370 | case sqlparser.StrVal:
371 | *colType = "varchar"
372 | case sqlparser.FloatVal:
373 | *colType = "double"
374 | default:
375 | return errors.New("unsupported data type in case expression")
376 | }
377 | default:
378 | return errors.New("unsupported data type in case expression")
379 | }
380 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, *colType)
381 |
382 | return nil
383 | }
384 |
385 | func processConvertColumn(expr *sqlparser.AliasedExpr, cp *columnParser) error {
386 | convertExpr := expr.Expr.(*sqlparser.ConvertExpr)
387 | if _, ok := cp.columns[cp.tableName]; !ok {
388 | cp.columns[cp.tableName] = make(map[ColumnName]Column)
389 | }
390 | col := Column{
391 | TableName: &cp.tableName,
392 | Position: cp.selectIdx,
393 | }
394 | if expr.As.String() != "" {
395 | col.Alias = expr.As.String()
396 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias)
397 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col
398 | } else {
399 | col.Alias = fmt.Sprintf("\"%s\"", sqlparser.String(expr))
400 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias)
401 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col
402 | }
403 | colType := convertExpr.Type.Type
404 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, colType)
405 |
406 | return nil
407 | }
408 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
2 | filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
3 | github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 h1:/vQbFIOMbk2FiG/kXiLl8BRyzTWDw7gX/Hz7Dd5eDMs=
4 | github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4/go.mod h1:hN7oaIRCjzsZ2dE+yG5k+rsdt3qcwykqK6HVGcKwsw4=
5 | github.com/99designs/keyring v1.2.2 h1:pZd3neh/EmUzWONb35LxQfvuY7kiSXAq3HQd97+XBn0=
6 | github.com/99designs/keyring v1.2.2/go.mod h1:wes/FrByc8j7lFOAGLGSNEg8f/PaI3cgTBqhFkHUrPk=
7 | github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0 h1:g0EZJwz7xkXQiZAI5xi9f3WWFYBlX1CPTrR+NDToRkQ=
8 | github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0/go.mod h1:XCW7KnZet0Opnr7HccfUw1PLc4CjHqpcaxW8DHklNkQ=
9 | github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0 h1:B/dfvscEQtew9dVuoxqxrUKKv8Ih2f55PydknDamU+g=
10 | github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0/go.mod h1:fiPSssYvltE08HJchL04dOy+RD4hgrjph0cwGGMntdI=
11 | github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY=
12 | github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY=
13 | github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.6.0 h1:PiSrjRPpkQNjrM8H0WwKMnZUdu1RGMtd/LdGKUrOo+c=
14 | github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.6.0/go.mod h1:oDrbWx4ewMylP7xHivfgixbfGBT6APAwsSoHRKotnIc=
15 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.0 h1:UXT0o77lXQrikd1kgwIPQOUect7EoR/+sbP4wQKdzxM=
16 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.0/go.mod h1:cTvi54pg19DoT07ekoeMgE/taAwNtCShVeZqA+Iv2xI=
17 | github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 h1:kYRSnvJju5gYVyhkij+RTJ/VR6QIUaCfWeaFm2ycsjQ=
18 | github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
19 | github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0=
20 | github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
21 | github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU=
22 | github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk=
23 | github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
24 | github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
25 | github.com/apache/arrow-go/v18 v18.1.0 h1:agLwJUiVuwXZdwPYVrlITfx7bndULJ/dggbnLFgDp/Y=
26 | github.com/apache/arrow-go/v18 v18.1.0/go.mod h1:tigU/sIgKNXaesf5d7Y95jBBKS5KsxTqYBKXFsvKzo0=
27 | github.com/apache/arrow/go/v16 v16.1.0 h1:dwgfOya6s03CzH9JrjCBx6bkVb4yPD4ma3haj9p7FXI=
28 | github.com/apache/arrow/go/v16 v16.1.0/go.mod h1:9wnc9mn6vEDTRIm4+27pEjQpRKuTvBaessPoEXQzxWA=
29 | github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE=
30 | github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw=
31 | github.com/aws/aws-sdk-go-v2 v1.34.0 h1:9iyL+cjifckRGEVpRKZP3eIxVlL06Qk1Tk13vreaVQU=
32 | github.com/aws/aws-sdk-go-v2 v1.34.0/go.mod h1:JgstGg0JjWU1KpVJjD5H0y0yyAIpSdKEq556EI6yOOM=
33 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.8 h1:zAxi9p3wsZMIaVCdoiQp2uZ9k1LsZvmAnoTBeZPXom0=
34 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.8/go.mod h1:3XkePX5dSaxveLAYY7nsbsZZrKxCyEuE5pM4ziFxyGg=
35 | github.com/aws/aws-sdk-go-v2/config v1.29.2 h1:JuIxOEPcSKpMB0J+khMjznG9LIhIBdmqNiEcPclnwqc=
36 | github.com/aws/aws-sdk-go-v2/config v1.29.2/go.mod h1:HktTHregOZwNSM/e7WTfVSu9RCX+3eOv+6ij27PtaYs=
37 | github.com/aws/aws-sdk-go-v2/credentials v1.17.55 h1:CDhKnDEaGkLA5ZszV/qw5uwN5M8rbv9Cl0JRN+PRsaM=
38 | github.com/aws/aws-sdk-go-v2/credentials v1.17.55/go.mod h1:kPD/vj+RB5MREDUky376+zdnjZpR+WgdBBvwrmnlmKE=
39 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.25 h1:kU7tmXNaJ07LsyN3BUgGqAmVmQtq0w6duVIHAKfp0/w=
40 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.25/go.mod h1:OiC8+OiqrURb1wrwmr/UbOVLFSWEGxjinj5C299VQdo=
41 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.55 h1:yfz56qEKO2MqTV0m81KtZS7swlP335FMrmoC1GBgU5k=
42 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.55/go.mod h1:O/fEJxrOLSCbhA8jL1xHwo8gzbN/iNcT+y7aq7c6KHE=
43 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.29 h1:Ej0Rf3GMv50Qh4G4852j2djtoDb7AzQ7MuQeFHa3D70=
44 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.29/go.mod h1:oeNTC7PwJNoM5AznVr23wxhLnuJv0ZDe5v7w0wqIs9M=
45 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.29 h1:6e8a71X+9GfghragVevC5bZqvATtc3mAMgxpSNbgzF0=
46 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.29/go.mod h1:c4jkZiQ+BWpNqq7VtrxjwISrLrt/VvPq3XiopkUIolI=
47 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 h1:Pg9URiobXy85kgFev3og2CuOZ8JZUBENF+dcgWBaYNk=
48 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc=
49 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.29 h1:g9OUETuxA8i/Www5Cby0R3WSTe7ppFTZXHVLNskNS4w=
50 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.29/go.mod h1:CQk+koLR1QeY1+vm7lqNfFii07DEderKq6T3F1L2pyc=
51 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 h1:D4oz8/CzT9bAEYtVhSBmFj2dNOtaHOtMKc2vHBwYizA=
52 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2/go.mod h1:Za3IHqTQ+yNcRHxu1OFucBh0ACZT4j4VQFF0BqpZcLY=
53 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.3 h1:EP1ITDgYVPM2dL1bBBntJ7AW5yTjuWGz9XO+CZwpALU=
54 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.3/go.mod h1:5lWNWeAgWenJ/BZ/CP9k9DjLbC0pjnM045WjXRPPi14=
55 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.10 h1:hN4yJBGswmFTOVYqmbz1GBs9ZMtQe8SrYxPwrkrlRv8=
56 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.10/go.mod h1:TsxON4fEZXyrKY+D+3d2gSTyJkGORexIYab9PTf56DA=
57 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.10 h1:fXoWC2gi7tdJYNTPnnlSGzEVwewUchOi8xVq/dkg8Qs=
58 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.10/go.mod h1:cvzBApD5dVazHU8C2rbBQzzzsKc8m5+wNJ9mCRZLKPc=
59 | github.com/aws/aws-sdk-go-v2/service/s3 v1.75.0 h1:UPQJDyqUXICUt60X4PwbiEf+2QQ4VfXUhDk8OEiGtik=
60 | github.com/aws/aws-sdk-go-v2/service/s3 v1.75.0/go.mod h1:hHnELVnIHltd8EOF3YzahVX6F6y2C6dNqpRj1IMkS5I=
61 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.12 h1:kznaW4f81mNMlREkU9w3jUuJvU5g/KsqDV43ab7Rp6s=
62 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.12/go.mod h1:bZy9r8e0/s0P7BSDHgMLXK2KvdyRRBIQ2blKlvLt0IU=
63 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.11 h1:mUwIpAvILeKFnRx4h1dEgGEFGuV8KJ3pEScZWVFYuZA=
64 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.11/go.mod h1:JDJtD+b8HNVv71axz8+S5492KM8wTzHRFpMKQbPlYxw=
65 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.10 h1:g9d+TOsu3ac7SgmY2dUf1qMgu/uJVTlQ4VCbH6hRxSw=
66 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.10/go.mod h1:WZfNmntu92HO44MVZAubQaz3qCuIdeOdog2sADfU6hU=
67 | github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ=
68 | github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
69 | github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM=
70 | github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ=
71 | github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI=
72 | github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk=
73 | github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04=
74 | github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8=
75 | github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0=
76 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
77 | github.com/danieljoos/wincred v1.2.2 h1:774zMFJrqaeYCK2W57BgAem/MLi6mtSE47MB6BOJ0i0=
78 | github.com/danieljoos/wincred v1.2.2/go.mod h1:w7w4Utbrz8lqeMbDAK0lkNJUv5sAOkFi7nd/ogr0Uh8=
79 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
80 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
81 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
82 | github.com/dvsekhvalnov/jose2go v1.8.0 h1:LqkkVKAlHFfH9LOEl5fe4p/zL02OhWE7pCufMBG2jLA=
83 | github.com/dvsekhvalnov/jose2go v1.8.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU=
84 | github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
85 | github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
86 | github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
87 | github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
88 | github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
89 | github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
90 | github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 h1:ZpnhV/YsD2/4cESfV5+Hoeu/iUR3ruzNvZ+yQfO03a0=
91 | github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4=
92 | github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
93 | github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
94 | github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
95 | github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
96 | github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o=
97 | github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
98 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
99 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
100 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
101 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
102 | github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c h1:6rhixN/i8ZofjG1Y75iExal34USq5p+wiN1tpie8IrU=
103 | github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c/go.mod h1:NMPJylDgVpX0MLRlPy15sqSwOFv/U1GZ2m21JhFfek0=
104 | github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
105 | github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
106 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
107 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
108 | github.com/jackc/pgx/v5 v5.7.2 h1:mLoDLV6sonKlvjIEsV56SkWNCnuNv531l94GaIzO+XI=
109 | github.com/jackc/pgx/v5 v5.7.2/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ=
110 | github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
111 | github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
112 | github.com/jedib0t/go-pretty/v6 v6.6.5 h1:9PgMJOVBedpgYLI56jQRJYqngxYAAzfEUua+3NgSqAo=
113 | github.com/jedib0t/go-pretty/v6 v6.6.5/go.mod h1:Uq/HrbhuFty5WSVNfjpQQe47x16RwVGXIveNGEyGtHs=
114 | github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
115 | github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
116 | github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
117 | github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
118 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
119 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
120 | github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
121 | github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
122 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
123 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
124 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
125 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
126 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
127 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
128 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
129 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
130 | github.com/marcboeker/go-duckdb v1.8.3 h1:ZkYwiIZhbYsT6MmJsZ3UPTHrTZccDdM4ztoqSlEMXiQ=
131 | github.com/marcboeker/go-duckdb v1.8.3/go.mod h1:C9bYRE1dPYb1hhfu/SSomm78B0FXmNgRvv6YBW/Hooc=
132 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
133 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
134 | github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
135 | github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
136 | github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
137 | github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE=
138 | github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
139 | github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
140 | github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
141 | github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
142 | github.com/mtibben/percent v0.2.1 h1:5gssi8Nqo8QU/r2pynCm+hBQHpkB/uNK7BJCFogWdzs=
143 | github.com/mtibben/percent v0.2.1/go.mod h1:KG9uO+SZkUp+VkRHsCdYQV3XSZrrSpR3O9ibNBTZrns=
144 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
145 | github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU=
146 | github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
147 | github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
148 | github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
149 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
150 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
151 | github.com/preendata/sqlparser v0.0.1 h1:b6rQhOPudlKhTjfWiW51mPFNa9S6en0cnOiKPharJzs=
152 | github.com/preendata/sqlparser v0.0.1/go.mod h1:34zYH6Q7NIW6XT4buKfY4S3QalQwgeeehwHW7DD9Se8=
153 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
154 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
155 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
156 | github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
157 | github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
158 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
159 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
160 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
161 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
162 | github.com/snowflakedb/gosnowflake v1.13.0 h1:NQoy4mnHUmBuruJhzAGVRO9YLpFxayYTCLf+dxvG7bk=
163 | github.com/snowflakedb/gosnowflake v1.13.0/go.mod h1:nwiPNHaS3EGxnW1rr10ascVYFLA4EKrqMX2TxPt0+N4=
164 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
165 | github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
166 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
167 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
168 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
169 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
170 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
171 | github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w=
172 | github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
173 | github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
174 | github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
175 | github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
176 | github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
177 | github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
178 | github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
179 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
180 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
181 | github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 h1:zzrxE1FKn5ryBNl9eKOeqQ58Y/Qpo3Q9QNxKHX5uzzQ=
182 | github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2/go.mod h1:hzfGeIUDq/j97IG+FhNqkowIyEcD88LrW6fyU3K3WqY=
183 | github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM=
184 | github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI=
185 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
186 | github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
187 | github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
188 | github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
189 | github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
190 | go.mongodb.org/mongo-driver v1.17.2 h1:gvZyk8352qSfzyZ2UMWcpDpMSGEr1eqE4T793SqyhzM=
191 | go.mongodb.org/mongo-driver v1.17.2/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ=
192 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
193 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
194 | golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
195 | golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
196 | golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc=
197 | golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
198 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
199 | golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
200 | golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
201 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
202 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
203 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
204 | golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
205 | golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
206 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
207 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
208 | golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
209 | golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
210 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
211 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
212 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
213 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
214 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
215 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
216 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
217 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
218 | golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
219 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
220 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
221 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
222 | golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
223 | golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
224 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
225 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
226 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
227 | golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
228 | golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
229 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
230 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
231 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
232 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
233 | golang.org/x/tools v0.29.0 h1:Xx0h3TtM9rzQpQuR4dKLrdglAmCEN5Oi+P74JdhdzXE=
234 | golang.org/x/tools v0.29.0/go.mod h1:KMQVMRsVxU6nHCFXrBPhDB8XncLNLM0lIy/F14RP588=
235 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
236 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
237 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
238 | gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
239 | gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
240 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
241 | gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
242 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
243 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
244 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
245 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
246 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
247 |
--------------------------------------------------------------------------------