├── docs ├── .gitbook │ └── assets │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ └── overview.png ├── documentation │ ├── integrations │ │ ├── file-formats │ │ │ ├── README.md │ │ │ └── csv-format.md │ │ ├── cloud-blob-storage │ │ │ ├── README.md │ │ │ └── amazon-s3.md │ │ ├── README.md │ │ └── databases │ │ │ ├── README.md │ │ │ ├── mysql.md │ │ │ ├── mongodb.md │ │ │ └── postgres.md │ └── config │ │ ├── README.md │ │ ├── models.md │ │ └── sources.md ├── concepts │ ├── overview.md │ ├── sources.md │ ├── validation.md │ └── models.md ├── getting-started │ ├── hello-world │ │ ├── README.md │ │ ├── configuring-sources.md │ │ └── creating-models.md │ └── installation.md ├── SUMMARY.md └── README.md ├── .env.example ├── Makefile ├── internal ├── engine │ ├── fs.go │ ├── query.go │ ├── env_test.go │ ├── config.go │ ├── insert.go │ ├── printer.go │ ├── sources.go │ ├── logging.go │ ├── tables.go │ ├── env.go │ ├── duckdb.go │ ├── mongo.go │ ├── types_test.go │ ├── retrieve.go │ ├── snowflake.go │ ├── mysql.go │ ├── s3.go │ ├── postgres.go │ ├── types.go │ ├── models.go │ ├── metadata.go │ └── columns.go └── cli │ ├── repl.go │ ├── commands.go │ └── app.go ├── main.go ├── .gitignore ├── .github └── workflows │ ├── ci.yaml │ └── release.yaml ├── go.mod ├── README.md ├── LICENSE └── go.sum /docs/.gitbook/assets/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/1.png -------------------------------------------------------------------------------- /docs/.gitbook/assets/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/2.png -------------------------------------------------------------------------------- /docs/.gitbook/assets/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/3.png -------------------------------------------------------------------------------- /docs/.gitbook/assets/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/4.png -------------------------------------------------------------------------------- /docs/.gitbook/assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scalecraft-dev/preen/HEAD/docs/.gitbook/assets/overview.png -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # DEBUG | INFO | WARN | ERROR 2 | PREEN_LOG_LEVEL=INFO 3 | # Config path for Preen 4 | PREEN_CONFIG_PATH=./.preen/ 5 | # Model path for Preen 6 | PREEN_MODELS_PATH=./models/ -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: integration-test 2 | integration-test: build 3 | build/ci/integration-test.sh 4 | 5 | .PHONY: build 6 | build: 7 | go build -o bin/preen main.go 8 | 9 | .PHONY: lint 10 | lint: 11 | golangci-lint run 12 | 13 | .PHONY: install-depenencies 14 | install-depenencies: 15 | brew install golangci-lint 16 | 17 | .PHONY: test 18 | test: 19 | go test -v ./... -------------------------------------------------------------------------------- /docs/documentation/integrations/file-formats/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to connect to file systems. 3 | --- 4 | 5 | # File Formats 6 | 7 | The file formats are configured as a YAML file and contain configurations specific to the underlying file storage system. A full reference of all options can be found here. 8 | 9 | ## CSV 10 | 11 | Preen supports the following file formats for file-based sources: 12 | 13 | - [CSV](csv-format.md) 14 | -------------------------------------------------------------------------------- /docs/concepts/overview.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: Show the overall system architecture. 3 | --- 4 | 5 | # Overview 6 | 7 | The core concepts of Preen are: 8 | 9 | - [Sources](sources.md) 10 | - [Models](models.md) 11 | 12 | These concepts are used to define the data you want to retrieve, the shape of the data, and the source of the data. The overall system architecture is as follows: 13 | 14 |
15 | -------------------------------------------------------------------------------- /internal/engine/fs.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | ) 7 | 8 | // getYmlorYamlPath returns the path to the sources.yml or sources.yaml file. 9 | func getYmlorYamlPath(path string, fileName string) string { 10 | ymlFile := filepath.Join(path, fileName+".yml") 11 | yamlFile := filepath.Join(path, fileName+".yaml") 12 | 13 | if _, err := os.Stat(ymlFile); err == nil { 14 | return ymlFile 15 | } 16 | 17 | // Default return yaml, up to handlers to create if not exists 18 | return yamlFile 19 | 20 | } 21 | -------------------------------------------------------------------------------- /docs/documentation/integrations/cloud-blob-storage/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to connect to cloud blob storage services like Amazon S3. 3 | --- 4 | 5 | # Cloud Blob Storage 6 | 7 | Preen can connect to cloud blob storage services like Amazon S3. This is useful for accessing data that is already in a data lake. 8 | 9 | ## Supported Integrations 10 | 11 | Preen currently supports the following cloud blob storage services: 12 | 13 | - [Amazon S3](amazon-s3.md) 14 | 15 | ## Code References 16 | 17 | - [s3.go](https://github.com/preendata/preen/blob/main/internal/engine/s3.go) 18 | -------------------------------------------------------------------------------- /docs/getting-started/hello-world/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: Getting your first project running. 3 | --- 4 | 5 | # Hello World 6 | 7 | Getting start with Preen is as simple as connecting to the data sources you want to query, defining your model, and querying the result. 8 | 9 | The following pages provide a quick, low detail setup guide for those looking to get up and running on their own data ASAP. 10 | 11 | You can see how to configure Preen in the [Example repository](https://github.com/preendata/preen-template). You can also use this repository as a template for creating your first Preen project. 12 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | 7 | "github.com/joho/godotenv" 8 | "github.com/preendata/preen/internal/cli" 9 | "github.com/preendata/preen/internal/engine" 10 | ) 11 | 12 | func main() { 13 | err := godotenv.Load() 14 | if err != nil { 15 | if os.Getenv("PREEN_DEBUG") == "true" { 16 | log.Print("warn: error loading .env file", err) 17 | } 18 | } 19 | 20 | err = engine.Initialize() 21 | if err != nil { 22 | log.Print("error initializing logging", err) 23 | } 24 | 25 | app := cli.NewApp() 26 | if err := app.Run(os.Args); err != nil { 27 | engine.Fatal(err) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /internal/engine/query.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | type QueryResults struct { 4 | Rows []map[string]any 5 | Columns []string 6 | ResultsChan chan map[string]any 7 | } 8 | 9 | var err error 10 | 11 | func Execute(statement string) (*QueryResults, error) { 12 | Debug("Executing query: " + statement) 13 | qr := QueryResults{ 14 | ResultsChan: make(chan map[string]any), 15 | } 16 | 17 | go qr.collectResults(qr.ResultsChan) 18 | 19 | qr.Columns, err = ddbQuery(statement, qr.ResultsChan) 20 | if err != nil { 21 | return nil, err 22 | } 23 | 24 | return &qr, nil 25 | } 26 | 27 | func (qr *QueryResults) collectResults(c chan map[string]any) { 28 | for row := range c { 29 | qr.Rows = append(qr.Rows, row) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /docs/documentation/integrations/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: details of the integrations preen supports. 3 | --- 4 | 5 | # Integrations 6 | 7 | Preen supports a wide range of integrations, including databases, and file systems. We are currently adding support for more systems. 8 | 9 | ## Databases 10 | 11 | Preen supports the following SQL databases: 12 | 13 | - [Postgres](./databases/postgres.md) 14 | - [MySQL](./databases/mysql.md) 15 | - [MongoDB](./databases/mongodb.md) 16 | 17 | ## Cloud Blob Storage 18 | 19 | Preen supports the following cloud blob storage systems: 20 | 21 | - [Amazon S3](./cloud-blob-storage/amazon-s3.md) 22 | 23 | ## File Formats 24 | 25 | Preen supports the following file formats for file-based sources: 26 | 27 | - [CSV](./file-formats/csv-format.md) 28 | -------------------------------------------------------------------------------- /docs/documentation/integrations/databases/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to connect to databases. 3 | --- 4 | 5 | # Databases 6 | 7 | Preen can connect to SQL and NoSQL databases. Our current implementation uses the Go [sql](https://pkg.go.dev/database/sql) and [pgx](https://github.com/jackc/pgx) libraries to connect to databases. 8 | 9 | ## Supported Integrations 10 | 11 | Preen currently supports the following SQL databases: 12 | 13 | - [Postgres](postgres.md) 14 | - [MySQL](mysql.md) 15 | - [MongoDB](mongodb.md) 16 | 17 | ## Code References 18 | 19 | - [mysql.go](https://github.com/preendata/preen/blob/main/internal/engine/mysql.go) 20 | - [postgres.go](https://github.com/preendata/preen/blob/main/internal/engine/postgres.go) 21 | - [mongo.go](https://github.com/preendata/preen/blob/main/internal/engine/mongo.go) -------------------------------------------------------------------------------- /internal/engine/env_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestGetEnv(t *testing.T) { 9 | // Set up the environment variable 10 | os.Setenv("TEST_KEY", "test_value") 11 | defer os.Unsetenv("TEST_KEY") 12 | 13 | // Test cases 14 | tests := []struct { 15 | key string 16 | defaultValue string 17 | expected string 18 | required bool 19 | }{ 20 | {"TEST_KEY", "default_value", "test_value", true}, 21 | {"NON_EXISTENT_KEY", "default_value", "default_value", false}, 22 | } 23 | 24 | for _, tt := range tests { 25 | t.Run(tt.key, func(t *testing.T) { 26 | result := getEnv(tt.key, tt.defaultValue, tt.required) 27 | if result != tt.expected { 28 | t.Errorf("GetEnv(%s, %s) = %s; want %s", tt.key, tt.defaultValue, result, tt.expected) 29 | } 30 | }) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /internal/engine/config.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import "fmt" 4 | 5 | const Version = "v0.2.4" 6 | 7 | func GetConfig(modelTarget string) (*SourceConfig, *ModelConfig, error) { 8 | sc, err := GetSourceConfig() 9 | if err != nil { 10 | return nil, nil, err 11 | } 12 | 13 | mc, err := GetModelConfigs(modelTarget) 14 | if err != nil { 15 | return nil, nil, err 16 | } 17 | 18 | return sc, mc, nil 19 | } 20 | 21 | func ValidateConfigs(sc *SourceConfig, mc *ModelConfig) error { 22 | if err := errorOnMissingModels(sc, mc); err != nil { 23 | return fmt.Errorf("error on missing models: %w", err) 24 | } 25 | 26 | if err := removeUnusedModels(sc, mc); err != nil { 27 | return fmt.Errorf("error removing unused models: %w", err) 28 | } 29 | 30 | if err := parseModels(mc); err != nil { 31 | return fmt.Errorf("error parsing models: %w", err) 32 | } 33 | 34 | return nil 35 | } 36 | -------------------------------------------------------------------------------- /docs/documentation/config/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen. 3 | --- 4 | 5 | # Config 6 | 7 | Preen is configured using a YAML file. The config file is used to define the sources, models, and other configurations. You can customize the location of the config file by setting the `PREEN_CONFIG_PATH` environment variable. If no environment variable is set, Preen will look for a file called `~/.preen/sources.yaml`. You can also configure a custom path where Preen will look for model files by setting the `PREEN_MODELS_PATH` environment variable. If no environment variable is set, Preen will look for models configured in `~/.preen/models.yaml`. 8 | 9 | ## Config File Reference 10 | 11 | - [Sources](sources.md) 12 | - [Models](models.md) 13 | 14 | ## Code References 15 | 16 | - [env.go](https://github.com/preendata/preen/blob/main/internal/engine/env.go) 17 | - [config.go](https://github.com/preendata/preen/blob/main/internal/engine/config.go) 18 | - [sources.go](https://github.com/preendata/preen/blob/main/internal/engine/sources.go) 19 | - [models.go](https://github.com/preendata/preen/blob/main/internal/engine/models.go) 20 | -------------------------------------------------------------------------------- /docs/getting-started/hello-world/configuring-sources.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: Define the data sources your Preen session can connect to 3 | --- 4 | 5 | # Configuring Sources 6 | 7 | Preen maintains a configuration file in `$HOME/.preen/sources.yml` by default. This is can be overridden via the `PREEN_CONFIG_PATH` environment variable. 8 | 9 | A config file might look like this: 10 | 11 | ```yaml 12 | sources: 13 | - name: s3-model 14 | engine: s3 15 | connection: 16 | bucket_name: users 17 | region: us-east-1 18 | - name: postgres-model 19 | engine: postgres 20 | connection: 21 | host: localhost 22 | port: 33061 23 | database: postgres 24 | username: root 25 | password: myp@assword 26 | - name: mongo-model 27 | engine: mongodb 28 | connection: 29 | host: ${MONGO_HOST} 30 | port: ${MONGO_PORT} 31 | database: mongo 32 | ``` 33 | 34 | In a nutshell, your configuration is primarily a list of data sources, credentials, and their engine classification (see [config](../../documentation/config/ "mention")for list of supported engines). **Be sure to add this file to your `.gitignore` if you are keeping it somewhere version controlled.** 35 | -------------------------------------------------------------------------------- /docs/concepts/sources.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: what is a source? 3 | --- 4 | 5 | # Sources 6 | 7 | ## Overview 8 | 9 | A Preen **Source** is any data storage system that is listed under the [integrations](../documentation/integrations/ "mention") section, such as a relational database (e.g. Postgres, MySQL etc.), NoSQL database (MongoDB) or file store (Amazon S3). 10 | 11 | ## Definition 12 | 13 | A Source is a storage system-dependent configuration that specifies: 14 | 15 | 1. The name of the source 16 | 2. The type of the source 17 | 3. The connection details for the source 18 | 19 | ## Examples 20 | 21 | ### Databases 22 | 23 | ```yaml 24 | sources: 25 | - name: users-db-us-east-1 26 | engine: mysql 27 | connection: 28 | host: localhost 29 | port: 5432 30 | database: mydatabase 31 | user: ${DB_USER} 32 | password: ${DB_PASSWORD} 33 | models: 34 | - users 35 | ``` 36 | 37 | ### Amazon S3 38 | 39 | ```yaml 40 | sources: 41 | - name: users-s3-us-east-1 42 | engine: s3 43 | connection: 44 | bucket_name: users-bucket 45 | region: us-east-1 46 | models: 47 | - users 48 | ``` 49 | 50 | For detailed configuration reference see [sources.md](../documentation/config/sources.md "mention") -------------------------------------------------------------------------------- /docs/concepts/validation.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how is data validated? 3 | --- 4 | 5 | # Validation 6 | 7 | ## Overview 8 | 9 | When collating data from multiple sources, it is possible that the data types of the columns do not match. For example, a column may be defined as a `string` in one source and as an `int` in another. Preen will attempt to coerce the data types of the columns to the most common data type across all sources. We do this by implementing a [majority voting algorithm](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_majority_vote_algorithm). If we are unable to determine the data type of a column, we will error out and require manual intervention. 10 | 11 | **Note:** There will be cases where you need to manually cast the data types of the columns in your model. 12 | 13 | We store the results of the validation step in a DuckDB table called `preen_information_schema`. You can use this table to inspect the results of the validation step and to cast the data types of the columns in your model. 14 | 15 | ## CLI Commmands 16 | 17 | ```bash 18 | preen source validate 19 | ``` 20 | 21 | ## Code References 22 | 23 | - [metadata.go](https://github.com/preendata/preen/blob/main/internal/engine/metadata.go) 24 | - [columns.go](https://github.com/preendata/preen/blob/main/internal/engine/columns.go) 25 | -------------------------------------------------------------------------------- /docs/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Table of contents 2 | 3 | * [Preen](README.md) 4 | 5 | ## Getting Started 6 | 7 | * [Installation](getting-started/installation.md) 8 | * [Hello World](getting-started/hello-world/README.md) 9 | * [Configuring Sources](getting-started/hello-world/configuring-sources.md) 10 | * [Creating Models](getting-started/hello-world/creating-models.md) 11 | 12 | ## Concepts 13 | 14 | * [Overview](concepts/overview.md) 15 | * [Sources](concepts/sources.md) 16 | * [Models](concepts/models.md) 17 | * [Validation](concepts/validation.md) 18 | 19 | ## Documentation 20 | 21 | * [Config](documentation/config/README.md) 22 | * [Sources](documentation/config/sources.md) 23 | * [Models](documentation/config/models.md) 24 | * [Integrations](documentation/integrations/README.md) 25 | * [Databases](documentation/integrations/databases/README.md) 26 | * [Postgres](documentation/integrations/databases/postgres.md) 27 | * [MySQL](documentation/integrations/databases/mysql.md) 28 | * [MongoDB](documentation/integrations/databases/mongodb.md) 29 | * [Cloud Blob Storage](documentation/integrations/cloud-blob-storage/README.md) 30 | * [Amazon S3](documentation/integrations/cloud-blob-storage/amazon-s3.md) 31 | * [File Formats](documentation/integrations/file-formats/README.md) 32 | * [CSV](documentation/integrations/file-formats/csv-format.md) 33 | -------------------------------------------------------------------------------- /docs/documentation/integrations/databases/mysql.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to connect to MySQL databases. 3 | --- 4 | 5 | # MySQL 6 | 7 | Preen uses the [sql](https://pkg.go.dev/database/sql) library to connect to MySQL databases. 8 | 9 | ## Example Preen Source Configuration 10 | 11 | ```yaml 12 | # FILENAME: ~/.preen/sources.yaml 13 | sources: 14 | - name: mysql-example 15 | engine: mysql 16 | connection: 17 | host: localhost 18 | port: 3306 19 | database: mysql 20 | username: ${MYSQL_USER} # You can specify environment variables in the sources.yaml file. 21 | password: ${MYSQL_PASSWORD} 22 | ``` 23 | 24 | ## MySQL Models 25 | 26 | MySQL models are defined as a YAML file that contains a SQL query. 27 | 28 | ```yaml 29 | # FILENAME: ~/.preen/models/users.yaml 30 | name: users # This name needs to be unique 31 | type: sql 32 | query: | 33 | select 34 | users.id, 35 | users.first_name, 36 | users.last_name, 37 | users.birthday 38 | from 39 | users; 40 | ``` 41 | 42 | ## MySQL Type Mappings 43 | 44 | A comprehensive list of MySQL type mappings can be found [here](https://github.com/preendata/preen/blob/main/internal/engine/types.go#L190-L240). 45 | 46 | ## Code References 47 | 48 | - [types.go](https://github.com/preendata/preen/blob/main/internal/engine/types.go) 49 | - [postgres.go](https://github.com/preendata/preen/blob/main/internal/engine/mysql.go) -------------------------------------------------------------------------------- /docs/getting-started/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to install preen. 3 | --- 4 | 5 | # Installation 6 | 7 | You can install Preen a few different ways. Note that the binary installation is the easiest method if you want to get started quickly. We support building from source if you want to have a local copy of the application code and make changes. 8 | 9 | ## Homebrew 10 | 11 | Download the executable via our Homebrew cask. 12 | 13 | ``` 14 | brew tap preendata/preen 15 | brew install preen 16 | ``` 17 | 18 | ## Download binary 19 | 20 | You can download a binary for your operating system and architecture from the [GitHub Releases](https://github.com/preendata/preen/releases) page. 21 | 22 | ```bash 23 | # Using curl 24 | sh -c "$(curl -fsSL https://raw.githubusercontent.com/preendata/preen/main/build/install.sh)" 25 | ``` 26 | 27 | ```bash 28 | # Using wget 29 | sh -c "$(wget https://raw.githubusercontent.com/preendata/preen/main/build/install.sh -O -)" 30 | ``` 31 | 32 | ## Build from source 33 | 34 | To build Preen from source, you need to have Go 1.23.0 or later installed on your system. Then, you can build the application using the following commands: 35 | 36 | ```bash 37 | git clone https://github.com/preendata/preen.git 38 | cd preen 39 | make build 40 | ``` 41 | 42 | This will create a `preen` binary in the `bin` directory. You can add this to your `PATH` if you want to use the `preen` command from anywhere. 43 | 44 | ### Validation 45 | 46 | Test that you've correctly installed the application by executing 47 | 48 | ```bash 49 | preen -h 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/documentation/integrations/databases/mongodb.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to connect to MongoDB databases. 3 | --- 4 | 5 | # MongoDB 6 | 7 | Preen can connect to MongoDB databases. Our current implementation uses the Go [mongo](https://pkg.go.dev/go.mongodb.org/mongo-driver/mongo) library to connect to databases. 8 | 9 | ## Example Preen Source Configuration 10 | 11 | ```yaml 12 | # FILENAME: ~/.preen/sources.yaml 13 | sources: 14 | - name: mongo-example 15 | engine: mongodb 16 | connection: 17 | host: localhost 18 | port: 27117 19 | database: preendb 20 | username: ${MONGODB_USERNAME} 21 | password: ${MONGODB_PASSWORD} 22 | auth_source: admin 23 | ``` 24 | 25 | ## Mongo Database Models 26 | 27 | MongoDB models are defined as a YAML file that contains a MongoDB document filter. This filter is used to match documents in the database and return the data that matches the filter. The documents are written to DuckDB as a JSON column for local querying using the native [JSON querying capabilities of DuckDB](https://duckdb.org/docs/extensions/json.html). 28 | 29 | ```yaml 30 | # FILENAME: ~/.preen/models/users.yaml 31 | name: users-mongodb 32 | type: mongodb 33 | collection: users # The name of the collection to query. 34 | query: | 35 | { 36 | "login_attempts": { 37 | "$gt": 1 38 | }, 39 | "account_status": { 40 | "$in": ["inactive", "suspended"] 41 | } 42 | } 43 | ``` 44 | 45 | ## Code References 46 | 47 | - [mongo.go](https://github.com/preendata/preen/blob/main/internal/engine/mongo.go) 48 | -------------------------------------------------------------------------------- /docs/documentation/integrations/databases/postgres.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to connect to Postgres databases. 3 | --- 4 | 5 | # Postgres 6 | 7 | Preen uses the [pgx](https://github.com/jackc/pgx) library to connect to Postgres databases. 8 | 9 | ## Example Preen Source Configuration 10 | 11 | ```yaml 12 | # FILENAME: ~/.preen/sources.yaml 13 | sources: 14 | - name: postgres-example 15 | engine: postgres 16 | connection: 17 | host: localhost 18 | port: 5432 19 | database: postgres 20 | username: ${PG_USER} # You can specify environment variables in the sources.yaml file. 21 | password: ${PG_PASSWORD} 22 | ``` 23 | 24 | ## Postgres Models 25 | 26 | Postgres models are defined as a YAML file that contains a SQL query. 27 | 28 | ```yaml 29 | # FILENAME: ~/.preen/models/users.yaml 30 | name: users # This name needs to be unique 31 | type: sql 32 | query: | 33 | select 34 | users.id, 35 | users.first_name, 36 | users.last_name, 37 | users.birthday 38 | from 39 | users; 40 | ``` 41 | 42 | ## Postgres Type Mappings 43 | 44 | A comprehensive list of Postgres type mappings can be found [here](https://github.com/preendata/preen/blob/main/internal/engine/types.go#L190-L240). We use the [pgtype](https://pkg.go.dev/github.com/jackc/pgtype) library to map Postgres types to Go types, with a few custom mappings for things like `float64`, `duration`, and `time` types. 45 | 46 | ## Code References 47 | 48 | - [types.go](https://github.com/preendata/preen/blob/main/internal/engine/types.go) 49 | - [postgres.go](https://github.com/preendata/preen/blob/main/internal/engine/postgres.go) 50 | -------------------------------------------------------------------------------- /internal/engine/insert.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "database/sql/driver" 5 | "fmt" 6 | ) 7 | 8 | func Insert(modelName ModelName, ic <-chan []driver.Value, dc chan<- []int64) { 9 | connector, err := ddbCreateConnector() 10 | if err != nil { 11 | panic(err) 12 | } 13 | appender, err := ddbNewAppender(connector, "main", string(modelName)) 14 | if err != nil { 15 | panic(err) 16 | } 17 | rowCounter := 0 18 | for message := range ic { 19 | if message[0] == "quit" { 20 | break 21 | } 22 | Debug(fmt.Sprintf("Inserting row: %+v", message)) 23 | 24 | if err := appender.AppendRow(message...); err != nil { 25 | Error(fmt.Sprintf("Failed to append row: %v", err)) 26 | Error(fmt.Sprintf("Row data: %+v", message)) 27 | panic(err) 28 | } 29 | rowCounter++ 30 | if rowCounter%10000000 == 0 { 31 | Debug(fmt.Sprintf( 32 | "Flushing 10M rows from appender to DuckDB for model: %s, %d", modelName, rowCounter, 33 | )) 34 | if err := appender.Flush(); err != nil { 35 | panic(err) 36 | } 37 | } 38 | } 39 | if err = appender.Close(); err != nil { 40 | panic(err) 41 | } 42 | dc <- []int64{int64(rowCounter)} 43 | } 44 | 45 | func ConfirmInsert(modelName string, dc chan []int64, rowsExpected int64) { 46 | for message := range dc { 47 | if rowsExpected == 0 { 48 | Debug(fmt.Sprintf("Inserted %d rows into model %s", message[0], modelName)) 49 | break 50 | } 51 | if message[0] == rowsExpected { 52 | Debug(fmt.Sprintf("Inserted %d rows into model %s. Expected %d rows", message[0], modelName, rowsExpected)) 53 | break 54 | } 55 | if message[0] != rowsExpected { 56 | Error(fmt.Sprintf("Inserted %d rows into model %s. Expected %d rows", message[0], modelName, rowsExpected)) 57 | break 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /docs/documentation/config/models.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen models. 3 | --- 4 | 5 | # Models 6 | 7 | Preen models are defined as a YAML file. The model file is used to define the data sources, the query to be executed, and the type of query to be executed. 8 | 9 | ## Model Configuration Options 10 | 11 | | Option | Description | Required | Applicable Types | 12 | | --------------- | ----------------------------------------------------------------------- | ----------------------- | ----------------------------------- | 13 | | `name` | The unique name of the model | Yes | All | 14 | | `type` | The type of the model (e.g.`database`, `file`) | Yes | All | 15 | | `format` | The format of the data (e.g. csv) | Only for `file` type | `file` | 16 | | `query` | The query to be executed | Yes for `database` type | `database` | 17 | | `options` | Additional options for the model (e.g., file format, delimiter, header) | No | All (specific options vary by type) | 18 | | `file_patterns` | The file patterns to be used for matching files | Only for `file` type | `file` | 19 | | `collection` | The name of the collection to query | Only for `database` type | Used for MongoDB sources | 20 | 21 | ## Code References 22 | 23 | * [models.go](../../../internal/engine/models.go) 24 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: 3 | title: 4 | visible: true 5 | description: 6 | visible: false 7 | tableOfContents: 8 | visible: true 9 | outline: 10 | visible: true 11 | pagination: 12 | visible: true 13 | --- 14 | 15 | # Preen 16 | 17 | Preen is a local-first, federated analytics engine built on top of DuckDB. Think of Preen as an open-core version of Fivetran, Hightouch, or Stitch for local data processing. Preen enables accessing data across 10s, 100s or 1000s of data sources. The only thing you need is a laptop, basic SQL and access to your data sources. 18 | 19 | {% embed url="https://www.youtube.com/watch?v=O3IMRaSkEcQ" %} 20 | 21 | ## Why Preen? 22 | 23 | * Run analysis and train AI models directly from the primary copy of your company's data. 24 | * Build data applications with their own local database, think SQLite for data applications. 25 | * No datalakes. No cloud computing. No copies of data that create numerous conflicting versions of data. 26 | * Get instant access to exactly the data you need without waiting on another team. 27 | * Describe your Company's data universe in code. Build ephemeral, versioned, enterprise data warehouses from scratch directly on your laptop. 28 | 29 |
Quick StartInstall and use Preeninstallation.md
ConceptsLearn the basics of PreenBroken link
Github RepoOpen an Issue or view sourcehttps://github.com/preendata/preen
30 | -------------------------------------------------------------------------------- /internal/cli/repl.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "strings" 7 | 8 | "github.com/chzyer/readline" 9 | "github.com/preendata/preen/internal/engine" 10 | "github.com/urfave/cli/v2" 11 | ) 12 | 13 | func Repl(c *cli.Context) error { 14 | outputFormat := c.String("output-format") 15 | fmt.Println("Output format: ", outputFormat) 16 | 17 | rl, err := readline.NewEx(&readline.Config{ 18 | Prompt: "preen> ", 19 | HistoryFile: "/tmp/preen-history.tmp", 20 | InterruptPrompt: "^C", 21 | EOFPrompt: "exit", 22 | HistorySearchFold: true, 23 | }) 24 | if err != nil { 25 | return fmt.Errorf("failed to initialize readline: %w", err) 26 | } 27 | defer rl.Close() 28 | 29 | fmt.Println("REPL started. Type 'exit' to quit.") 30 | var cmds []string 31 | for { 32 | line, err := rl.Readline() 33 | if err == readline.ErrInterrupt { 34 | if len(line) == 0 { 35 | break 36 | } else { 37 | continue 38 | } 39 | } else if err == io.EOF { 40 | break 41 | } else if err != nil { 42 | return fmt.Errorf("failed to read input: %w", err) 43 | } 44 | 45 | line = strings.TrimSpace(line) 46 | 47 | // Handle exit command 48 | if line == "exit" || line == "quit" { 49 | fmt.Println("Exiting REPL.") 50 | break 51 | } 52 | 53 | cmds = append(cmds, line) 54 | if !strings.HasSuffix(line, ";") { 55 | rl.SetPrompt(">") 56 | continue 57 | } 58 | 59 | cmd := strings.Join(cmds, " ") 60 | cmds = cmds[:0] 61 | rl.SetPrompt("preendb> ") 62 | if err := rl.SaveHistory(cmd); err != nil { 63 | fmt.Printf("failed to save repl history: %v\n", err) 64 | } 65 | 66 | // Execute the input as a query 67 | qr, err := engine.Execute(cmd) 68 | if err != nil { 69 | fmt.Printf("Error: %v\n", err) 70 | continue 71 | } 72 | 73 | if err := engine.WriteToTable(qr.Rows, qr.Columns, outputFormat); err != nil { 74 | fmt.Printf("Error: %v\n", err) 75 | continue 76 | } 77 | } 78 | 79 | return nil 80 | } 81 | -------------------------------------------------------------------------------- /docs/getting-started/hello-world/creating-models.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: How to create a model to query a source. 3 | --- 4 | 5 | # Creating Models 6 | 7 | [models.md](../../concepts/models.md "mention")are how you define the data you want to work with from a given data source. Don't think of a Model as your final result or query set, rather its all the relevant data from which you may query your final result set. 8 | 9 | Read more about the rationale behind [models.md](../../concepts/models.md "mention")on its concept page. 10 | 11 | ## Defining a Model 12 | 13 | You can define models in two ways, adding a `models.yaml` file to the `PREEN_CONFIG_PATH` or adding individual model files to the `~/.preen/models` directory. You may save a model file anywhere you'd like, so long as its parent directory is specified by `PREEN_MODELS_PATH` 14 | 15 | Here's an example `database` model. **Note that column names need to be fully qualified, i.e. users.id instead of id.** 16 | 17 | ```yaml 18 | # FILENAME: ~/.preen/models/users.yaml 19 | name: users # This name needs to be unique 20 | type: database 21 | query: | 22 | select 23 | users.id, 24 | users.first_name, 25 | users.last_name, 26 | users.birthday 27 | from 28 | users; 29 | ``` 30 | 31 | ## Registering a Model with a Source 32 | 33 | Consider a simplified [https://github.com/hyphasql/hypha/blob/main/docs/concepts/source.md](https://github.com/hyphasql/hypha/blob/main/docs/concepts/source.md "mention") config from the last page, pared down to one data source. You register the users model with the source as follows. 34 | 35 | ```yaml 36 | # FILENAME: ~/.preen/sources.yaml 37 | sources: 38 | - name: postgres-model 39 | engine: postgres 40 | connection: 41 | host: localhost 42 | port: 33061 43 | database: postgres 44 | username: root 45 | password: myp@assword 46 | models: 47 | - users 48 | ``` 49 | 50 | You can now validate and build your models in Preen. 51 | 52 | ```bash 53 | preen source validate 54 | preen model build 55 | ``` 56 | -------------------------------------------------------------------------------- /docs/documentation/config/sources.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen sources. 3 | --- 4 | 5 | # Sources 6 | 7 | Preen sources are defined as a YAML file. The source file is used to define the data sources, the query to be executed, and the type of query to be executed. 8 | 9 | ## Source Configuration Options 10 | 11 | | Option | Description | Required | Applicable Types | 12 | | --------------- | ----------------------------------------------------------------------- | ----------------------- | ----------------------------------- | 13 | | `name` | The unique name of the source | Yes | All | 14 | | `engine` | The type of the source (e.g.`database`, `file`) | Yes | All | 15 | | `connection` | The connection details for the source (e.g. database connection details) | Yes | All | 16 | | `models` | The models to be used for the source | Yes | All | 17 | 18 | ## Source Connection Details 19 | 20 | | Option | Description | 21 | |---------------|--------------------------------------------| 22 | | `host` | The host of the source | 23 | | `port` | The port of the source | 24 | | `database` | The database of the source | 25 | | `username` | The username of the source | 26 | | `password` | The password of the source | 27 | | `auth_source` | The authentication source for MongoDB | 28 | | `bucket_name` | The bucket name for AWS S3 models | 29 | | `region` | The AWS region for S3 models | 30 | 31 | ## Code References 32 | 33 | - [sources.go](../../../internal/engine/sources.go) 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | preendb 2 | bin/ 3 | tmp.txt 4 | .config/ 5 | .venv/ 6 | target/ 7 | dbt_modules/ 8 | /logs/ 9 | .vscode/ 10 | .DS_Store 11 | conf/.user.yml 12 | .idea/ 13 | 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # Environments 94 | .env 95 | .venv 96 | env/ 97 | venv/ 98 | ENV/ 99 | env.bak/ 100 | venv.bak/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | 115 | # OSX Stuff 116 | .DS_Store 117 | 118 | # Terraform 119 | *.tfvars 120 | **plugins 121 | *.tfstate* 122 | 123 | # Preen 124 | *preenContext.db* 125 | .docker/db/data/** 126 | .preendb/config.yaml 127 | preen 128 | .preen/ 129 | -------------------------------------------------------------------------------- /internal/engine/printer.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/jedib0t/go-pretty/v6/table" 9 | ) 10 | 11 | // PrettyPrintJSON pretty prints a slice of maps containing JSON objects. 12 | func prettifyString(data []map[string]interface{}) (string, error) { 13 | prettyJSON, err := json.MarshalIndent(data, "", " ") 14 | if err != nil { 15 | return "", err 16 | } 17 | return string(prettyJSON), nil 18 | } 19 | 20 | // PrintPrettyJSON prints the pretty JSON to the console. 21 | func PrintPrettyJSON(data []map[string]interface{}) error { 22 | prettyJSON, err := prettifyString(data) 23 | if err != nil { 24 | return err 25 | } 26 | fmt.Println(prettyJSON) 27 | return nil 28 | } 29 | 30 | func prettifyStruct(v interface{}) (string, error) { 31 | // Marshal the struct with indentation 32 | prettyJSON, err := json.MarshalIndent(v, "", " ") 33 | if err != nil { 34 | return "", fmt.Errorf("failed to marshal struct: %w", err) 35 | } 36 | return string(prettyJSON), nil 37 | } 38 | 39 | func PrintPrettyStruct(v interface{}) error { 40 | prettyJSON, err := prettifyStruct(v) 41 | if err != nil { 42 | return fmt.Errorf("failed to pretty print struct: %w", err) 43 | } 44 | fmt.Println(prettyJSON) 45 | return nil 46 | } 47 | 48 | func WriteToTable(rows []map[string]any, columns []string, outputFormat string) error { 49 | // Set up 50 | t := table.NewWriter() 51 | t.SetOutputMirror(os.Stdout) 52 | t.SetStyle(table.StyleLight) 53 | 54 | // Set table headers. This is fucked, non-deterministic order of fields. 55 | headers := table.Row{} 56 | for _, header := range columns { 57 | headers = append(headers, header) 58 | } 59 | t.AppendHeader(headers) 60 | 61 | // Populate table with data 62 | for _, row := range rows { 63 | values := table.Row{} 64 | for _, header := range headers { 65 | values = append(values, row[header.(string)]) 66 | } 67 | t.AppendRow(values) 68 | } 69 | 70 | switch outputFormat { 71 | case "csv": 72 | t.RenderCSV() 73 | case "markdown": 74 | t.RenderMarkdown() 75 | default: 76 | t.Render() 77 | } 78 | 79 | return nil 80 | } 81 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | tests: 7 | env: 8 | PREEN_CONFIG_PATH: ./build/ci 9 | PREEN_MODELS_PATH: ./build/ci/models 10 | PG_USER: ${{ secrets.PG_USER }} 11 | PG_PASSWORD: ${{ secrets.PG_PASSWORD }} 12 | MYSQL_USER: ${{ secrets.MYSQL_USER }} 13 | MYSQL_PASSWORD: ${{ secrets.MYSQL_PASSWORD }} 14 | MONGO_USER: ${{ secrets.MONGO_USER }} 15 | MONGO_PASSWORD: ${{ secrets.MONGO_PASSWORD }} 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Checkout code 20 | uses: actions/checkout@v3 21 | - name: Set up Go 22 | uses: actions/setup-go@v5 23 | with: 24 | go-version: '1.23.0' 25 | 26 | - name: Build Docker services (PG, MySQL, etc.) 27 | run: 28 | docker compose -f build/ci/docker-compose.yaml up -d 29 | 30 | - name: Install dependencies 31 | run: | 32 | go mod tidy 33 | 34 | - name: golangci-lint 35 | uses: golangci/golangci-lint-action@v6 36 | with: 37 | version: v1.60 38 | args: --timeout=5m 39 | 40 | - name: Build Preen binary 41 | run: make build 42 | 43 | - name: Unit Tests 44 | run: make test 45 | 46 | - name: Integration tests 47 | run: | 48 | sleep 5 49 | 50 | bin/preen model build 51 | 52 | # Test that the MySQL model was built and can be queried. Query should return 1 row. 53 | MYSQL_RESULTS_LENGTH=$(bin/preen query -f json "select * from mysql_data_types_test;" | jq length) 54 | if [[ $MYSQL_RESULTS_LENGTH -ne 1 ]]; then 55 | echo "Expected 1 row in mysql_data_types_test, got $MYSQL_RESULTS_LENGTH" 56 | exit 1 57 | fi 58 | 59 | # Test that the PostgreSQL model was built and can be queried. Query should return 1 row. 60 | PG_RESULTS_LENGTH=$(bin/preen query -f json "select * from pg_data_types_test;" | jq length) 61 | if [[ $PG_RESULTS_LENGTH -ne 1 ]]; then 62 | echo "Expected 1 row in pg_data_types_test, got $PG_RESULTS_LENGTH" 63 | exit 1 64 | fi 65 | 66 | - name: Shut down services 67 | if: always() 68 | run: docker compose -f build/ci/docker-compose.yaml down -------------------------------------------------------------------------------- /docs/documentation/integrations/cloud-blob-storage/amazon-s3.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to connect to Amazon S3. 3 | --- 4 | 5 | # Amazon S3 6 | 7 | ## Credentials 8 | 9 | Preen's Amazon S3 integration uses the AWS SDK's credential chain to authenticate requests. This means you don't need to explicitly provide access keys in your application code or environment variables. Instead, the SDK will automatically look for credentials in the following order: 10 | 11 | 1. Environment variables 12 | 2. Shared credential file (\~/.aws/credentials) 13 | 3. AWS IAM role for Amazon EC2 or ECS tasks 14 | 15 | ### Setting Up Credentials 16 | 17 | To set up your credentials, you have several options: 18 | 19 | 1. **AWS CLI Configuration**: If you have the AWS CLI installed, you can run `aws configure` to set up your credentials. This will create a shared credential file. 20 | 2. **Shared Credentials File**: Manually create or edit the file `~/.aws/credentials` (on Linux/Mac) or `%UserProfile%\.aws\credentials` (on Windows) with the following content: 21 | 22 | ```conf 23 | [default] 24 | aws_access_key_id = YOUR_ACCESS_KEY 25 | aws_secret_access_key = YOUR_SECRET_KEY 26 | ``` 27 | 28 | 3. **Environment Variables**: Set the following environment variables: 29 | 30 | ```bash 31 | export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY 32 | export AWS_SECRET_ACCESS_KEY=YOUR_SECRET_KEY 33 | ``` 34 | 35 | 4. **IAM Roles**: If your application is running on an AWS EC2 instance or ECS task, you can assign an IAM role with the necessary permissions to access S3. 36 | 37 | ### Region and Bucket Configuration 38 | 39 | Region and bucket name are specified in your Preen source configuration. 40 | 41 | ### Preen Source and Model Configuration for Amazon S3 42 | 43 | ```yaml 44 | # FILENAME: ~/.preen/models/users.yaml 45 | name: users 46 | type: file 47 | file_patterns: 48 | - "users/v1/**.csv" # This will match all csv files under the users/v1 prefix 49 | format: csv 50 | options: 51 | auto_detect: true 52 | header: true 53 | delim: "," 54 | quote: "\"" 55 | escape: "\"" 56 | ``` 57 | 58 | ```yaml 59 | # FILENAME: ~/.preen/sources.yaml 60 | sources: 61 | - name: users-s3-us-east-1 62 | engine: s3 63 | connection: 64 | bucket_name: users 65 | region: us-east-1 66 | models: 67 | - users 68 | ``` 69 | -------------------------------------------------------------------------------- /internal/engine/sources.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | yaml "gopkg.in/yaml.v3" 8 | ) 9 | 10 | type Connection struct { 11 | Host string `yaml:"host"` 12 | Port int `yaml:"port"` 13 | Database string `yaml:"database"` 14 | Username string `yaml:"username"` 15 | Password string `yaml:"password"` 16 | AuthSource string `yaml:"auth_source"` 17 | BucketName string `yaml:"bucket_name"` 18 | Region string `yaml:"region"` 19 | Schema string `yaml:"schema"` 20 | Warehouse string `yaml:"warehouse"` 21 | Role string `yaml:"role"` 22 | Account string `yaml:"account"` 23 | } 24 | 25 | type Source struct { 26 | Name string `yaml:"name"` 27 | Engine string `yaml:"engine"` 28 | Connection Connection `yaml:"connection"` 29 | Models []string `yaml:"models"` 30 | } 31 | 32 | type SourceConfig struct { 33 | Sources []Source `yaml:"sources"` 34 | Env *Env `yaml:"-"` // not in yaml 35 | } 36 | 37 | func GetSourceConfig() (*SourceConfig, error) { 38 | sc := SourceConfig{} 39 | env, err := EnvInit() 40 | if err != nil { 41 | return nil, fmt.Errorf("error initializing environment: %w", err) 42 | } 43 | sc.Env = env 44 | 45 | // Create directory if not exists 46 | _, err = os.Stat(sc.Env.PreenConfigPath) 47 | 48 | if os.IsNotExist(err) { 49 | err = os.Mkdir(sc.Env.PreenConfigPath, os.ModePerm) 50 | 51 | if err != nil { 52 | return nil, fmt.Errorf("failed to create directory at %s with error %s", sc.Env.PreenConfigPath, err) 53 | } 54 | } else if err != nil { 55 | return nil, fmt.Errorf("failed to access %s with error %s", sc.Env.PreenConfigPath, err) 56 | } 57 | 58 | configFilePath := getYmlorYamlPath(sc.Env.PreenConfigPath, "sources") 59 | 60 | // Create file if not exists 61 | file, err := os.ReadFile(configFilePath) 62 | 63 | if os.IsNotExist(err) { 64 | _, err = os.Create(configFilePath) 65 | 66 | if err != nil { 67 | return nil, fmt.Errorf("failed to create file at %s with error %s", configFilePath, err) 68 | } 69 | 70 | file, err = os.ReadFile(configFilePath) 71 | } 72 | 73 | if err != nil { 74 | return nil, fmt.Errorf("failed to read source config file: %s", err) 75 | } 76 | 77 | // Pull yaml out of config file 78 | if err = yaml.Unmarshal(file, &sc); err != nil { 79 | return nil, fmt.Errorf("failed to parse source file: %w", err) 80 | } 81 | 82 | // Override config with environment variables 83 | fromEnv(&sc) 84 | 85 | return &sc, nil 86 | } 87 | -------------------------------------------------------------------------------- /docs/concepts/models.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: what is a model? 3 | --- 4 | 5 | # Models 6 | 7 | ## Overview 8 | 9 | A Preen **Model** is a fundamental concept that defines how data is accessed and structured for local querying. It acts as a bridge between your raw data sources and the Preen system, allowing for targeted data retrieval. 10 | 11 | ## Definition 12 | 13 | A Model is a storage system-dependent configuration that specifies: 14 | 15 | 1. The source of the data 16 | 2. The structure or schema of the data 17 | 3. Any filtering or transformation to be applied 18 | 19 | Models narrow down the set of data to be used for local querying, ensuring that only relevant information is processed. 20 | 21 | ## Examples 22 | 23 | Models can be configured for various types of storage systems. Here are some examples: 24 | 25 | ### SQL Databases 26 | 27 | These models are defined as a YAML file that contains a SQL query. 28 | 29 | ```yaml 30 | # FILENAME: ~/.preen/models/users.yaml 31 | name: users # This name needs to be unique 32 | type: database 33 | query: | 34 | select 35 | users.id, 36 | users.first_name, 37 | users.last_name, 38 | users.birthday 39 | from 40 | users; 41 | ``` 42 | 43 | ### File Systems 44 | 45 | These models are configured as a YAML file and contain configurations specific to the underlying file storage system. Here is an example of a model using Amazon S3 and a csv file. The full list of options can be found here. 46 | 47 | ```yaml 48 | # FILENAME: ~/.preen/models/users.yaml 49 | name: users # This name needs to be unique 50 | type: file 51 | file_patterns: 52 | - "users/v1/**.csv" # This will match all csv files under the users/v1 prefix 53 | format: csv 54 | options: 55 | auto_detect: true 56 | header: true 57 | delim: "," 58 | quote: "\"" 59 | escape: "\"" 60 | new_line: "\\r\\n" 61 | filename: true 62 | union_by_name: true 63 | ``` 64 | 65 | ## Benefits of Using Models 66 | 67 | 1. **Data Isolation**: Models allow you to work with specific subsets of your data, improving performance and reducing noise. 68 | 2. **Abstraction**: They provide a layer of abstraction between your raw data sources and your Preen queries. 69 | 3. **Flexibility**: Models can be easily adjusted to accommodate changes in data structure or source without affecting the rest of your Preen setup. 70 | 4. **Reusability**: Once defined, Models can be shared and reused by different users and teams within your organization. 71 | 72 | ## CLI Commands 73 | 74 | ```bash 75 | preen model build # Builds all models 76 | preen model build --target users # Target a specific model 77 | ``` 78 | 79 | For detailed configuration reference see [models.md](../documentation/config/models.md "mention") 80 | -------------------------------------------------------------------------------- /internal/engine/logging.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "runtime" 7 | 8 | "github.com/sirupsen/logrus" 9 | ) 10 | 11 | var logger *logrus.Logger 12 | 13 | type Fields = logrus.Fields 14 | 15 | func Initialize(logLevels ...string) error { 16 | logger = logrus.New() 17 | logger.Out = os.Stdout 18 | 19 | logger.SetFormatter(&logrus.TextFormatter{ 20 | FullTimestamp: true, 21 | }) 22 | 23 | // Default log level to info 24 | logLevel := "INFO" 25 | 26 | // If log level in environment, use it 27 | if l := os.Getenv("PREENDB_LOG_LEVEL"); l != "" { 28 | logLevel = l 29 | } 30 | 31 | // If log level passed in flag, prefer it 32 | if len(logLevels) > 0 && logLevels[0] != "" { 33 | logLevel = logLevels[0] 34 | } 35 | 36 | // Set loglevel 37 | level, err := logrus.ParseLevel(logLevel) 38 | if err != nil { 39 | return fmt.Errorf("invalid log level: %v", err) 40 | } 41 | logger.SetLevel(level) 42 | 43 | Debugf("Log level set to %s", level) 44 | 45 | return nil 46 | } 47 | 48 | func getCaller() (string, int) { 49 | _, file, line, ok := runtime.Caller(2) 50 | if !ok { 51 | return "unknown", 0 52 | } 53 | 54 | return file, line 55 | } 56 | 57 | func IsValidLogLevel(logLevel string) error { 58 | _, err := logrus.ParseLevel(logLevel) 59 | 60 | if err != nil { 61 | return fmt.Errorf("invalid log level: %s. Allowed values are: (DEBUG, INFO, WARN, ERROR, FATAL, PANIC)", logLevel) 62 | } 63 | 64 | return nil 65 | } 66 | 67 | func Debug(args ...interface{}) { 68 | file, line := getCaller() 69 | entry := logger.WithFields(Fields{ 70 | "caller": fmt.Sprintf("%s:%d", file, line), 71 | }) 72 | entry.Debug(args...) 73 | } 74 | 75 | func Debugf(format string, args ...interface{}) { 76 | file, line := getCaller() 77 | entry := logger.WithFields(Fields{ 78 | "caller": fmt.Sprintf("%s:%d", file, line), 79 | }) 80 | entry.Debugf(format, args...) 81 | } 82 | 83 | func Warn(args ...interface{}) { 84 | logger.Warn(args...) 85 | } 86 | 87 | func Warnf(format string, args ...interface{}) { 88 | logger.Warnf(format, args...) 89 | } 90 | func Info(args ...interface{}) { 91 | logger.Info(args...) 92 | } 93 | 94 | func Infof(format string, args ...interface{}) { 95 | logger.Infof(format, args...) 96 | } 97 | 98 | func Error(args ...interface{}) { 99 | logger.Error(args...) 100 | } 101 | 102 | func Errorf(format string, args ...interface{}) { 103 | logger.Errorf(format, args...) 104 | } 105 | 106 | func Fatal(args ...interface{}) { 107 | logger.Fatal(args...) 108 | } 109 | 110 | func Fatalf(format string, args ...interface{}) { 111 | logger.Fatalf(format, args...) 112 | } 113 | 114 | func WithFields(fields logrus.Fields) *logrus.Entry { 115 | return logger.WithFields(fields) 116 | } 117 | 118 | func WithError(err error) *logrus.Entry { 119 | return logger.WithError(err) 120 | } 121 | -------------------------------------------------------------------------------- /internal/cli/commands.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/preendata/preen/internal/engine" 8 | "github.com/urfave/cli/v2" 9 | ) 10 | 11 | func Query(c *cli.Context) error { 12 | engine.Debug("Executing cli.query") 13 | format := c.String("format") 14 | stmt := c.Args().First() 15 | engine.Debug("Query: ", stmt) 16 | 17 | qr, err := engine.Execute(stmt) 18 | 19 | if err != nil { 20 | engine.Debug("error executing query", err) 21 | return fmt.Errorf("error executing query %w", err) 22 | } 23 | if format == "json" { 24 | if err := engine.PrintPrettyJSON(qr.Rows); err != nil { 25 | return fmt.Errorf("error pretty printing JSON: %w", err) 26 | } 27 | } else { 28 | if err := engine.WriteToTable(qr.Rows, qr.Columns, "table"); err != nil { 29 | return fmt.Errorf("error writing to table: %w", err) 30 | } 31 | } 32 | 33 | return nil 34 | } 35 | 36 | func BuildModel(c *cli.Context) error { 37 | engine.Debug("Executing cli.buildmodel") 38 | modelTarget := c.String("target") 39 | sc, mc, err := engine.GetConfig(modelTarget) 40 | if err != nil { 41 | return fmt.Errorf("error getting config %w", err) 42 | } 43 | 44 | err = engine.BuildModels(sc, mc) 45 | if err != nil { 46 | return fmt.Errorf("error building model %w", err) 47 | } 48 | 49 | return nil 50 | } 51 | 52 | func BuildMetadata(c *cli.Context) error { 53 | engine.Debug("Executing cli.buildInformationSchema") 54 | modelTarget := "" 55 | sc, mc, err := engine.GetConfig(modelTarget) 56 | if err != nil { 57 | return fmt.Errorf("error getting config %w", err) 58 | } 59 | 60 | err = engine.BuildMetadata(sc, mc) 61 | if err != nil { 62 | return fmt.Errorf("error building metadata %w", err) 63 | } 64 | 65 | return nil 66 | } 67 | 68 | func Validate(c *cli.Context) error { 69 | engine.Debug("Executing cli.validate") 70 | modelTarget := "" 71 | sc, mc, err := engine.GetConfig(modelTarget) 72 | if err != nil { 73 | return fmt.Errorf("error getting config %w", err) 74 | } 75 | 76 | if err := engine.ValidateConfigs(sc, mc); err != nil { 77 | return fmt.Errorf("error parsing models %w", err) 78 | } 79 | 80 | if err = engine.BuildMetadata(sc, mc); err != nil { 81 | return fmt.Errorf("error building metadata %w", err) 82 | } 83 | 84 | _, err = engine.BuildColumnMetadata() 85 | if err != nil { 86 | return fmt.Errorf("error building column metadata %w", err) 87 | } 88 | 89 | return nil 90 | } 91 | 92 | func ListSources(c *cli.Context) error { 93 | engine.Debug("Executing cli.listSources") 94 | modelTarget := "" 95 | sc, _, err := engine.GetConfig(modelTarget) 96 | if err != nil { 97 | return fmt.Errorf("error getting config %w", err) 98 | } 99 | 100 | for _, conn := range sc.Sources { 101 | _, err := json.MarshalIndent(conn, "", " ") 102 | 103 | if err != nil { 104 | return fmt.Errorf("error unmarshalling config %w", err) 105 | } 106 | } 107 | return nil 108 | } 109 | -------------------------------------------------------------------------------- /internal/engine/tables.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "slices" 6 | 7 | "github.com/preendata/sqlparser" 8 | ) 9 | 10 | type TableAlias string 11 | type TableMap map[TableAlias]TableName 12 | type TableSet []TableName 13 | 14 | func ParseModelTables(mc *ModelConfig) error { 15 | for _, model := range mc.Models { 16 | if model.Type == "database" && model.Parsed != nil { 17 | switch stmt := model.Parsed.(type) { 18 | case *sqlparser.Select: 19 | model.TableMap, model.TableSet = getModelTableAliases(stmt) 20 | default: 21 | return fmt.Errorf("model %s failed. non-select queries not supported", model.Name) 22 | } 23 | } 24 | } 25 | return nil 26 | } 27 | 28 | func getModelTableAliases(stmt *sqlparser.Select) (TableMap, TableSet) { 29 | tableMap := make(TableMap) 30 | tableSet := make(TableSet, 0) 31 | table := stmt.From[0] 32 | switch t := table.(type) { 33 | case *sqlparser.AliasedTableExpr: 34 | if t.As.IsEmpty() { 35 | tableName := TableName(t.Expr.(sqlparser.TableName).Name.String()) 36 | tableMap[TableAlias(t.Expr.(sqlparser.TableName).Name.String())] = tableName 37 | if !slices.Contains(tableSet, tableName) { 38 | tableSet = append(tableSet, tableName) 39 | } 40 | } else { 41 | tableName := TableName(t.Expr.(sqlparser.TableName).Name.String()) 42 | tableMap[TableAlias(t.As.String())] = tableName 43 | if !slices.Contains(tableSet, tableName) { 44 | tableSet = append(tableSet, tableName) 45 | } 46 | } 47 | case *sqlparser.JoinTableExpr: 48 | _, joinTables := parseJoinTables(t, tableMap, tableSet) 49 | tableSet = append(tableSet, joinTables...) 50 | } 51 | 52 | return tableMap, tableSet 53 | } 54 | 55 | func parseJoinTables(j *sqlparser.JoinTableExpr, tableMap TableMap, tableSet TableSet) (*sqlparser.JoinTableExpr, TableSet) { 56 | rightAlias := j.RightExpr.(*sqlparser.AliasedTableExpr).As.String() 57 | rightTable := j.RightExpr.(*sqlparser.AliasedTableExpr).Expr.(sqlparser.TableName).Name.String() 58 | if rightAlias != "" { 59 | tableMap[TableAlias(rightAlias)] = TableName(rightTable) 60 | if !slices.Contains(tableSet, TableName(rightTable)) { 61 | tableSet = append(tableSet, TableName(rightTable)) 62 | } 63 | } else { 64 | tableMap[TableAlias(rightTable)] = TableName(rightTable) 65 | if !slices.Contains(tableSet, TableName(rightTable)) { 66 | tableSet = append(tableSet, TableName(rightTable)) 67 | } 68 | } 69 | 70 | switch left := j.LeftExpr.(type) { 71 | case *sqlparser.JoinTableExpr: 72 | _, tableSet = parseJoinTables(left, tableMap, tableSet) 73 | case *sqlparser.AliasedTableExpr: 74 | leftAlias := left.As.String() 75 | leftTable := left.Expr.(sqlparser.TableName).Name.String() 76 | if leftAlias != "" { 77 | tableMap[TableAlias(leftAlias)] = TableName(leftTable) 78 | if !slices.Contains(tableSet, TableName(leftTable)) { 79 | tableSet = append(tableSet, TableName(leftTable)) 80 | } 81 | } else { 82 | tableMap[TableAlias(leftTable)] = TableName(leftTable) 83 | if !slices.Contains(tableSet, TableName(leftTable)) { 84 | tableSet = append(tableSet, TableName(leftTable)) 85 | } 86 | } 87 | } 88 | return j, tableSet 89 | } 90 | -------------------------------------------------------------------------------- /internal/engine/env.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "log/slog" 7 | "os" 8 | "os/user" 9 | "path/filepath" 10 | "reflect" 11 | "regexp" 12 | "strconv" 13 | "time" 14 | ) 15 | 16 | type Env struct { 17 | PreenConfigPath string 18 | PreenModelsPath string 19 | LicenseKey string 20 | } 21 | 22 | func EnvInit() (*Env, error) { 23 | usr, err := user.Current() 24 | if err != nil { 25 | return nil, fmt.Errorf("failed to get current user: %w", err) 26 | } 27 | 28 | return &Env{ 29 | PreenConfigPath: getEnv("PREEN_CONFIG_PATH", filepath.Join(usr.HomeDir, ".preen"), false), 30 | PreenModelsPath: getEnv("PREEN_MODELS_PATH", filepath.Join(usr.HomeDir, ".preen/models"), false), 31 | LicenseKey: getEnv("PREEN_LICENSE_KEY", "", false), 32 | }, nil 33 | } 34 | 35 | var envRegex = regexp.MustCompile(`\${(\w+)}`) 36 | 37 | func fromEnv(v interface{}) { 38 | _fromEnv(reflect.ValueOf(v).Elem()) // assumes pointer to struct 39 | } 40 | 41 | // recursive 42 | func _fromEnv(rv reflect.Value) { 43 | for i := 0; i < rv.NumField(); i++ { 44 | fv := rv.Field(i) 45 | if fv.Kind() == reflect.Ptr { 46 | fv = fv.Elem() 47 | } 48 | if fv.Kind() == reflect.Struct { 49 | _fromEnv(fv) 50 | continue 51 | } 52 | if fv.Kind() == reflect.Slice { 53 | for j := 0; j < fv.Len(); j++ { 54 | if fv.Index(j).Kind() == reflect.String { 55 | match := envRegex.FindStringSubmatch(fv.Index(j).String()) 56 | if len(match) > 1 { 57 | slog.Debug( 58 | fmt.Sprintf("Setting env var: '%s'", match[1]), 59 | ) 60 | fv.SetString(os.Getenv(match[1])) 61 | } 62 | } 63 | if fv.Index(j).Kind() == reflect.Struct { 64 | _fromEnv(fv.Index(j)) 65 | continue 66 | } 67 | } 68 | } 69 | if fv.Kind() == reflect.String { 70 | match := envRegex.FindStringSubmatch(fv.String()) 71 | if len(match) > 1 { 72 | slog.Debug( 73 | fmt.Sprintf("Setting env var: '%s'", match[1]), 74 | ) 75 | fv.SetString(os.Getenv(match[1])) 76 | } 77 | } 78 | } 79 | } 80 | 81 | func getEnv[T float64 | string | int | bool | time.Duration](key string, defaultVal T, required bool) T { 82 | val, ok := os.LookupEnv(key) 83 | if !ok { 84 | if !required { 85 | return defaultVal 86 | } else { 87 | log.Fatalf("missing required environment variable %s", key) 88 | } 89 | } 90 | 91 | var out T 92 | switch ptr := any(&out).(type) { 93 | case *string: 94 | { 95 | *ptr = val 96 | } 97 | case *int: 98 | { 99 | v, err := strconv.Atoi(val) 100 | if err != nil { 101 | return defaultVal 102 | } 103 | *ptr = v 104 | } 105 | case *bool: 106 | { 107 | v, err := strconv.ParseBool(val) 108 | if err != nil { 109 | return defaultVal 110 | } 111 | *ptr = v 112 | } 113 | case *time.Duration: 114 | { 115 | v, err := time.ParseDuration(val) 116 | if err != nil { 117 | return defaultVal 118 | } 119 | *ptr = v 120 | } 121 | case *float64: 122 | { 123 | v, err := strconv.ParseFloat(val, 64) 124 | if err != nil { 125 | return defaultVal 126 | } 127 | *ptr = v 128 | } 129 | default: 130 | { 131 | log.Fatalf("unsupported type %T", out) 132 | } 133 | } 134 | 135 | return out 136 | } 137 | -------------------------------------------------------------------------------- /internal/engine/duckdb.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "database/sql/driver" 7 | 8 | "github.com/marcboeker/go-duckdb" 9 | ) 10 | 11 | // Returns a DuckDB appender instance for bulk loading of data 12 | func ddbNewAppender(connector driver.Connector, schema string, table string) (*duckdb.Appender, error) { 13 | conn, err := connector.Connect(context.Background()) 14 | if err != nil { 15 | return nil, err 16 | } 17 | 18 | appender, err := duckdb.NewAppenderFromConn(conn, schema, table) 19 | if err != nil { 20 | return nil, err 21 | } 22 | 23 | return appender, nil 24 | } 25 | 26 | func ddbCreateConnector() (driver.Connector, error) { 27 | connector, err := duckdb.NewConnector("./preenContext.db?threads=4", func(execer driver.ExecerContext) error { 28 | bootQueries := []string{ 29 | "INSTALL 'json'", 30 | "LOAD 'json'", 31 | "INSTALL aws", 32 | "LOAD aws", 33 | "INSTALL httpfs", 34 | "LOAD httpfs", 35 | } 36 | 37 | for _, query := range bootQueries { 38 | _, err := execer.ExecContext(context.Background(), query, nil) 39 | if err != nil { 40 | return err 41 | } 42 | } 43 | return nil 44 | }) 45 | 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | return connector, nil 51 | } 52 | 53 | func ddbOpenDatabase(connector driver.Connector) (*sql.DB, error) { 54 | db := sql.OpenDB(connector) 55 | return db, nil 56 | } 57 | 58 | func ddbExec(queryString string) error { 59 | connector, err := ddbCreateConnector() 60 | if err != nil { 61 | return err 62 | } 63 | 64 | db, err := ddbOpenDatabase(connector) 65 | if err != nil { 66 | return err 67 | } 68 | 69 | defer db.Close() 70 | Debug("querying duckdb database with query: ", queryString) 71 | _, err = db.Exec(queryString) 72 | if err != nil { 73 | return err 74 | } 75 | return err 76 | } 77 | 78 | func ddbQuery(queryString string, c chan map[string]any) ([]string, error) { 79 | connector, err := ddbCreateConnector() 80 | if err != nil { 81 | return nil, err 82 | } 83 | 84 | db, err := ddbOpenDatabase(connector) 85 | if err != nil { 86 | return nil, err 87 | } 88 | 89 | defer db.Close() 90 | Debug("querying duckdb database with query: ", queryString) 91 | rows, err := db.Query(queryString) 92 | if err != nil { 93 | return nil, err 94 | } 95 | columns, err := rows.Columns() 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | err = ReadRows(rows, c) 101 | 102 | if err != nil { 103 | return nil, err 104 | } 105 | return columns, err 106 | } 107 | 108 | func ReadRows(rows *sql.Rows, c chan map[string]any) error { 109 | defer rows.Close() 110 | 111 | columns, err := rows.Columns() 112 | if err != nil { 113 | return err 114 | } 115 | numColumns := len(columns) 116 | 117 | values := make([]any, numColumns) 118 | for i := range values { 119 | values[i] = new(interface{}) 120 | } 121 | 122 | for rows.Next() { 123 | if err := rows.Scan(values...); err != nil { 124 | return err 125 | } 126 | 127 | dest := make(map[string]interface{}, numColumns) 128 | for i, column := range columns { 129 | dest[column] = *(values[i].(*interface{})) 130 | } 131 | c <- dest 132 | } 133 | 134 | if err := rows.Err(); err != nil { 135 | return err 136 | } 137 | return nil 138 | } 139 | -------------------------------------------------------------------------------- /internal/engine/mongo.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "database/sql/driver" 6 | "encoding/json" 7 | "fmt" 8 | "net/url" 9 | "time" 10 | 11 | "go.mongodb.org/mongo-driver/bson" 12 | "go.mongodb.org/mongo-driver/mongo" 13 | "go.mongodb.org/mongo-driver/mongo/options" 14 | "go.mongodb.org/mongo-driver/mongo/readpref" 15 | ) 16 | 17 | func mongoConnFromSource(source Source, ctx context.Context) (*mongo.Client, error) { 18 | 19 | url := fmt.Sprintf( 20 | "mongodb://%s:%s@%s:%d/?authSource=%s", 21 | source.Connection.Username, 22 | url.QueryEscape(source.Connection.Password), 23 | url.QueryEscape(source.Connection.Host), 24 | source.Connection.Port, 25 | source.Connection.AuthSource, 26 | ) 27 | 28 | client, err := mongo.Connect(ctx, options.Client().ApplyURI(url)) 29 | if err != nil { 30 | return nil, err 31 | } 32 | if err = client.Ping(ctx, readpref.Primary()); err != nil { 33 | return nil, err 34 | } 35 | return client, nil 36 | } 37 | 38 | func ingestMongoModel(r *Retriever, ic chan []driver.Value) error { 39 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name)) 40 | ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 41 | defer cancel() 42 | mongoClient, err := mongoConnFromSource(r.Source, ctx) 43 | if err != nil { 44 | return err 45 | } 46 | 47 | // If this function returns an error, then it failed to disconnect from the mongo client 48 | defer func() { 49 | if err = mongoClient.Disconnect(context.Background()); err != nil { 50 | Errorf("Error disconnecting from mongo: %s", err) 51 | } 52 | }() 53 | 54 | defer cancel() 55 | 56 | if err = processMongoDocuments(r, mongoClient, ic); err != nil { 57 | return err 58 | } 59 | 60 | return nil 61 | } 62 | 63 | func processMongoDocuments(r *Retriever, client *mongo.Client, ic chan []driver.Value) error { 64 | collection := client.Database(r.Source.Connection.Database).Collection(r.Collection) 65 | ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 66 | defer cancel() 67 | jsonQuery := make(map[string]interface{}) 68 | if err := json.Unmarshal([]byte(r.Query), &jsonQuery); err != nil { 69 | return fmt.Errorf("Error unmarshalling json query: %s", err) 70 | } 71 | bsonQuery, err := bson.Marshal(jsonQuery) 72 | if err != nil { 73 | return fmt.Errorf("Error marshalling json query to BSON: %s", err) 74 | } 75 | cur, err := collection.Find(ctx, bsonQuery) 76 | if err != nil { 77 | return fmt.Errorf("Error executing query: %s", err) 78 | } 79 | if err := cur.Err(); err != nil { 80 | return fmt.Errorf("Error iterating cursor: %s", err) 81 | } 82 | defer cur.Close(ctx) 83 | var rowCounter int64 84 | for cur.Next(ctx) { 85 | var result bson.M 86 | if err := cur.Decode(&result); err != nil { 87 | return fmt.Errorf("Error decoding result: %s", err) 88 | } 89 | jsonBytes, err := json.Marshal(result) 90 | if err != nil { 91 | return fmt.Errorf("Error marshalling result: %s", err) 92 | } 93 | rowCounter++ 94 | driverRow := make([]driver.Value, 2) 95 | driverRow[0] = r.Source.Name 96 | driverRow[1] = string(jsonBytes) 97 | ic <- driverRow 98 | } 99 | Debug(fmt.Sprintf("Retrieved %d rows for %s - %s\n", rowCounter, r.Source.Name, r.ModelName)) 100 | return nil 101 | } 102 | -------------------------------------------------------------------------------- /internal/engine/types_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "net/netip" 5 | "testing" 6 | "time" 7 | 8 | "github.com/jackc/pgx/v5/pgtype" 9 | ) 10 | 11 | func TestDuckdbTimeScan(t *testing.T) { 12 | var dt duckdbTime 13 | 14 | // Test case for pgtype.Time 15 | pgTime := pgtype.Time{Microseconds: 3600000000} // 1 hour in microseconds 16 | err := dt.Scan(pgTime) 17 | if err != nil { 18 | t.Errorf("unexpected error: %v", err) 19 | } 20 | expectedTime := time.Now().Truncate(24 * time.Hour).Add(time.Hour).String() 21 | if string(dt) != expectedTime { 22 | t.Errorf("expected %s, got %s", expectedTime, dt) 23 | } 24 | 25 | // Test case for nil 26 | err = dt.Scan(nil) 27 | if err != nil { 28 | t.Errorf("unexpected error: %v", err) 29 | } 30 | if dt != "" { 31 | t.Errorf("expected empty string, got %s", dt) 32 | } 33 | 34 | // Test case for invalid type 35 | err = dt.Scan(123) 36 | if err == nil { 37 | t.Errorf("expected error, got nil") 38 | } 39 | } 40 | 41 | func TestDuckdbTimeValue(t *testing.T) { 42 | dt := duckdbTime("test_time") 43 | val, err := dt.Value() 44 | if err != nil { 45 | t.Errorf("unexpected error: %v", err) 46 | } 47 | if val != "test_time" { 48 | t.Errorf("expected test_time, got %v", val) 49 | } 50 | } 51 | 52 | func TestDuckdbDurationScan(t *testing.T) { 53 | var dd duckdbDuration 54 | 55 | // Test case for pgtype.Interval 56 | pgInterval := pgtype.Interval{Microseconds: 1000000, Days: 1, Months: 1} 57 | err := dd.Scan(pgInterval) 58 | if err != nil { 59 | t.Errorf("unexpected error: %v", err) 60 | } 61 | expectedDuration := "Microseconds: 1000000, Days: 1, Months: 1" 62 | if string(dd) != expectedDuration { 63 | t.Errorf("expected %s, got %s", expectedDuration, dd) 64 | } 65 | 66 | // Test case for nil 67 | err = dd.Scan(nil) 68 | if err != nil { 69 | t.Errorf("unexpected error: %v", err) 70 | } 71 | if dd != "" { 72 | t.Errorf("expected empty string, got %s", dd) 73 | } 74 | 75 | // Test case for invalid type 76 | err = dd.Scan(123) 77 | if err == nil { 78 | t.Errorf("expected error, got nil") 79 | } 80 | } 81 | 82 | func TestDuckdbDurationValue(t *testing.T) { 83 | dd := duckdbDuration("test_duration") 84 | val, err := dd.Value() 85 | if err != nil { 86 | t.Errorf("unexpected error: %v", err) 87 | } 88 | if val != "test_duration" { 89 | t.Errorf("expected test_duration, got %v", val) 90 | } 91 | } 92 | 93 | func TestDuckdbNetIpPrefixScan(t *testing.T) { 94 | var dip duckdbNetIpPrefix 95 | 96 | // Test case for netip.Prefix 97 | prefix, _ := netip.ParsePrefix("192.168.1.0/24") 98 | err := dip.Scan(prefix) 99 | if err != nil { 100 | t.Errorf("unexpected error: %v", err) 101 | } 102 | expectedPrefix := "192.168.1.0/24" 103 | if string(dip) != expectedPrefix { 104 | t.Errorf("expected %s, got %s", expectedPrefix, dip) 105 | } 106 | 107 | // Test case for nil 108 | err = dip.Scan(nil) 109 | if err != nil { 110 | t.Errorf("unexpected error: %v", err) 111 | } 112 | if dip != "" { 113 | t.Errorf("expected empty string, got %s", dip) 114 | } 115 | 116 | // Test case for invalid type 117 | err = dip.Scan(123) 118 | if err == nil { 119 | t.Errorf("expected error, got nil") 120 | } 121 | } 122 | 123 | func TestDuckdbNetIpPrefixValue(t *testing.T) { 124 | dip := duckdbNetIpPrefix("test_prefix") 125 | val, err := dip.Value() 126 | if err != nil { 127 | t.Errorf("unexpected error: %v", err) 128 | } 129 | if val != "test_prefix" { 130 | t.Errorf("expected test_prefix, got %v", val) 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /internal/engine/retrieve.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "database/sql/driver" 5 | "fmt" 6 | "slices" 7 | "strings" 8 | 9 | "golang.org/x/sync/errgroup" 10 | ) 11 | 12 | type Retriever struct { 13 | ModelName string 14 | TableName string 15 | Query string 16 | Source Source 17 | Options Options 18 | Format string 19 | FilePatterns *[]string 20 | Collection string 21 | } 22 | 23 | // Retrieve data from sources and insert into the duckDB database. 24 | // Database sources are inserted via the Insert function. 25 | // File sources are inserted via the native duckDB integrations. 26 | func Retrieve(sc *SourceConfig, mc *ModelConfig) error { 27 | for _, model := range mc.Models { 28 | ic := make(chan []driver.Value, 10000) 29 | dc := make(chan []int64) 30 | tableName := strings.ReplaceAll(string(model.Name), "-", "_") 31 | // Only insert database models into DuckDB 32 | if model.Type == "database" { 33 | go Insert(ModelName(tableName), ic, dc) 34 | } 35 | g := errgroup.Group{} 36 | g.SetLimit(200) 37 | for _, source := range sc.Sources { 38 | if !slices.Contains(source.Models, string(model.Name)) { 39 | Debug(fmt.Sprintf("Skipping %s for %s", model.Name, source.Name)) 40 | continue 41 | } 42 | r := Retriever{ 43 | Source: source, 44 | ModelName: string(model.Name), 45 | Query: model.Query, 46 | Options: model.Options, 47 | Format: model.Format, 48 | FilePatterns: model.FilePatterns, 49 | TableName: tableName, 50 | } 51 | if model.Collection != "" { 52 | r.Collection = model.Collection 53 | } else { 54 | r.Collection = string(model.Name) 55 | } 56 | switch source.Engine { 57 | case "s3": 58 | err := func(r Retriever, ic chan []driver.Value) error { 59 | g.Go(func() error { 60 | if err := ingestS3Model(&r); err != nil { 61 | return err 62 | } 63 | return nil 64 | }) 65 | 66 | return nil 67 | }(r, ic) 68 | if err != nil { 69 | return err 70 | } 71 | case "snowflake": 72 | err := func(r Retriever, ic chan []driver.Value) error { 73 | g.Go(func() error { 74 | if err := ingestSnowflakeModel(&r, ic); err != nil { 75 | return err 76 | } 77 | return nil 78 | }) 79 | return nil 80 | }(r, ic) 81 | if err != nil { 82 | return err 83 | } 84 | case "postgres": 85 | err := func(r Retriever, ic chan []driver.Value) error { 86 | g.Go(func() error { 87 | if err := ingestPostgresModel(&r, ic); err != nil { 88 | return err 89 | } 90 | return nil 91 | }) 92 | return nil 93 | }(r, ic) 94 | if err != nil { 95 | return err 96 | } 97 | case "mysql": 98 | err := func(r Retriever, ic chan []driver.Value) error { 99 | g.Go(func() error { 100 | if err := ingestMysqlModel(&r, ic); err != nil { 101 | return err 102 | } 103 | return nil 104 | }) 105 | return nil 106 | }(r, ic) 107 | if err != nil { 108 | return err 109 | } 110 | case "mongodb": 111 | err := func(r Retriever, ic chan []driver.Value) error { 112 | g.Go(func() error { 113 | if err := ingestMongoModel(&r, ic); err != nil { 114 | return err 115 | } 116 | return nil 117 | }) 118 | return nil 119 | }(r, ic) 120 | if err != nil { 121 | return err 122 | } 123 | default: 124 | Error(fmt.Sprintf("Engine %s not supported", source.Engine)) 125 | } 126 | } 127 | if err := g.Wait(); err != nil { 128 | return err 129 | } 130 | ic <- []driver.Value{"quit"} 131 | if model.Type == "database" { 132 | ConfirmInsert(string(model.Name), dc, 0) 133 | } 134 | } 135 | return nil 136 | } 137 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [published] 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | build-linux: 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | matrix: 17 | go-version: [1.23.0] 18 | os: [linux] 19 | arch: [amd64] 20 | 21 | steps: 22 | - name: Checkout code 23 | uses: actions/checkout@v2 24 | 25 | - name: Set up Go 26 | uses: actions/setup-go@v2 27 | with: 28 | go-version: ${{ matrix.go-version }} 29 | 30 | - name: Install dependencies 31 | run: go mod tidy 32 | 33 | - name: Build 34 | env: 35 | GOOS: ${{ matrix.os }} 36 | GOARCH: ${{ matrix.arch }} 37 | CGO_ENABLED: 1 38 | run: | 39 | CGO_ENABLED=1 CGO_LDFLAGS="-L/usr/lib" go build -o output/${{ matrix.os }}_${{ matrix.arch }}/preen --ldflags="-extldflags=-static" -tags osusergo,netgo main.go 40 | tar -C output/${{ matrix.os }}_${{ matrix.arch }} -czvf preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz preen 41 | echo "Built for $GOOS $GOARCH" 42 | 43 | - name: Generate checksum 44 | run: | 45 | sha256sum preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz | tee preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum 46 | 47 | - name: Upload binary 48 | uses: svenstaro/upload-release-action@v2 49 | with: 50 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz 51 | repo_token: ${{ secrets.GITHUB_TOKEN }} 52 | tag: ${{ github.ref }} 53 | 54 | - name: Upload checksum 55 | uses: svenstaro/upload-release-action@v2 56 | with: 57 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum 58 | repo_token: ${{ secrets.GITHUB_TOKEN }} 59 | tag: ${{ github.ref }} 60 | 61 | build-macos: 62 | runs-on: macos-latest 63 | 64 | strategy: 65 | matrix: 66 | go-version: [1.23.0] 67 | os: [darwin] 68 | arch: [arm64, amd64] 69 | 70 | steps: 71 | - name: Checkout code 72 | uses: actions/checkout@v2 73 | 74 | - name: Set up Go 75 | uses: actions/setup-go@v2 76 | with: 77 | go-version: ${{ matrix.go-version }} 78 | 79 | - name: Install dependencies 80 | run: go mod tidy 81 | 82 | - name: Build 83 | env: 84 | GOOS: ${{ matrix.os }} 85 | GOARCH: ${{ matrix.arch }} 86 | CGO_ENABLED: 1 87 | run: | 88 | CGO_ENABLED=1 CGO_LDFLAGS="-L/usr/lib" go build -o output/${{ matrix.os }}_${{ matrix.arch }}/preen main.go 89 | tar -C output/${{ matrix.os }}_${{ matrix.arch }} -czvf preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz preen 90 | echo "Built for $GOOS $GOARCH" 91 | 92 | - name: Generate checksum 93 | run: | 94 | shasum -a 256 preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz | tee preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum 95 | 96 | - name: Upload binary 97 | uses: svenstaro/upload-release-action@v2 98 | with: 99 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.tar.gz 100 | repo_token: ${{ secrets.GITHUB_TOKEN }} 101 | tag: ${{ github.ref }} 102 | 103 | - name: Upload checksum 104 | uses: svenstaro/upload-release-action@v2 105 | with: 106 | file: preen-${{ matrix.os }}_${{ matrix.arch }}-${{ github.event.release.tag_name }}.sha256sum 107 | repo_token: ${{ secrets.GITHUB_TOKEN }} 108 | tag: ${{ github.ref }} -------------------------------------------------------------------------------- /internal/engine/snowflake.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "database/sql/driver" 7 | "fmt" 8 | "reflect" 9 | "time" 10 | 11 | "github.com/snowflakedb/gosnowflake" 12 | ) 13 | 14 | func getSnowflakePoolFromSource(source Source) (*sql.DB, error) { 15 | 16 | config := gosnowflake.Config{ 17 | Account: source.Connection.Account, 18 | User: source.Connection.Username, 19 | Password: source.Connection.Password, 20 | Database: source.Connection.Database, 21 | Schema: source.Connection.Schema, 22 | Warehouse: source.Connection.Warehouse, 23 | } 24 | connStr, err := gosnowflake.DSN(&config) 25 | 26 | if err != nil { 27 | panic(err) 28 | } 29 | 30 | db, err := sql.Open("snowflake", connStr) 31 | if err != nil { 32 | panic(err) 33 | } 34 | err = db.PingContext(context.Background()) 35 | if err != nil { 36 | return nil, fmt.Errorf("error pinging Snowflake: %w", err) 37 | } 38 | 39 | return db, nil 40 | } 41 | 42 | func ingestSnowflakeModel(r *Retriever, ic chan []driver.Value) error { 43 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name)) 44 | clientPool, err := getSnowflakePoolFromSource(r.Source) 45 | if err != nil { 46 | return err 47 | } 48 | defer clientPool.Close() 49 | rows, err := clientPool.Query(r.Query) 50 | if err != nil { 51 | return fmt.Errorf("error querying Snowflake: %w", err) 52 | } 53 | defer rows.Close() 54 | 55 | if err = processSnowflakeRows(r, ic, rows); err != nil { 56 | return err 57 | } 58 | 59 | return nil 60 | } 61 | 62 | func processSnowflakeRows(r *Retriever, ic chan []driver.Value, rows *sql.Rows) error { 63 | valuePtrs, err := processSnowflakeColumns(rows) 64 | 65 | if err != nil { 66 | return fmt.Errorf("error processing Snowflake columns: %w", err) 67 | } 68 | for rows.Next() { 69 | if err = rows.Scan(valuePtrs...); err != nil { 70 | return fmt.Errorf("error scanning Snowflake rows: %w", err) 71 | } 72 | driverRow := make([]driver.Value, len(valuePtrs)+1) 73 | driverRow[0] = r.Source.Name 74 | for i, ptr := range valuePtrs { 75 | switch v := ptr.(type) { 76 | case *duckdbDecimal: 77 | driverRow[i+1], err = v.Value() 78 | if err != nil { 79 | return fmt.Errorf("error converting duckdbDecimal: %w", err) 80 | } 81 | default: 82 | driverRow[i+1] = dereferenceIfPtr(ptr) 83 | } 84 | } 85 | ic <- driverRow 86 | } 87 | 88 | return nil 89 | } 90 | 91 | func dereferenceIfPtr[T any](v T) T { 92 | rv := reflect.ValueOf(v) 93 | if rv.Kind() == reflect.Ptr { 94 | return rv.Elem().Interface().(T) 95 | } 96 | return v 97 | } 98 | 99 | func processSnowflakeColumns(rows *sql.Rows) ([]any, error) { 100 | columnTypes, err := rows.ColumnTypes() 101 | if err != nil { 102 | return nil, err 103 | } 104 | valuePtrs := make([]any, len(columnTypes)) 105 | 106 | for i, columnType := range columnTypes { 107 | 108 | switch columnType.DatabaseTypeName() { 109 | case "DECIMAL", "NUMBER", "FLOAT", "DOUBLE", "REAL", "FIXED": 110 | valuePtrs[i] = new(duckdbDecimal) 111 | case "BIGINT": 112 | valuePtrs[i] = new(int64) 113 | case "BOOLEAN": 114 | valuePtrs[i] = new(bool) 115 | case "INT", "MEDIUMINT": 116 | valuePtrs[i] = new(int32) 117 | case "SMALLINT", "YEAR": 118 | valuePtrs[i] = new(int16) 119 | case "TINYINT": 120 | valuePtrs[i] = new(int8) 121 | case "BINARY", "VARBINARY", "VARIANT", "OBJECT", "ARRAY": 122 | valuePtrs[i] = new([]byte) 123 | case "DATE", "DATETIME", "TIMESTAMP_TZ", "TIMESTAMP_LTZ", "TIMESTAMP_NTZ": 124 | valuePtrs[i] = new(time.Time) 125 | case "CHAR", "CHARACTER", "NCHAR", "VARCHAR", "TEXT", "STRING", "NVARCHAR", "NVARCHAR2", "CHAR VARYING", "NCHAR VARYING", "ENUM", "SET", "JSON", "TIME": 126 | Debug(fmt.Sprintf("Column type is a string: %s", columnType.DatabaseTypeName())) 127 | valuePtrs[i] = new(string) 128 | default: 129 | return nil, fmt.Errorf("unsupported column type: %s", columnType.DatabaseTypeName()) 130 | } 131 | } 132 | 133 | return valuePtrs, nil 134 | } 135 | -------------------------------------------------------------------------------- /internal/engine/mysql.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "database/sql" 5 | "database/sql/driver" 6 | "fmt" 7 | "log/slog" 8 | "net/url" 9 | "reflect" 10 | "time" 11 | 12 | _ "github.com/go-sql-driver/mysql" 13 | ) 14 | 15 | func GetMysqlPoolFromSource(source Source) (*sql.DB, error) { 16 | // Example url := "root:thisisnotarealpassword@tcp(127.0.0.1:33061)/mysql_db_1" 17 | url := fmt.Sprintf( 18 | "%s:%s@tcp(%s:%d)/%s?parseTime=true", 19 | source.Connection.Username, 20 | url.QueryEscape(source.Connection.Password), 21 | url.QueryEscape(source.Connection.Host), 22 | source.Connection.Port, 23 | source.Connection.Database, 24 | ) 25 | dbpool, err := getMysqlPool(url) 26 | 27 | if err != nil { 28 | return nil, err 29 | } 30 | 31 | return dbpool, nil 32 | } 33 | 34 | func getMysqlPool(url string) (*sql.DB, error) { 35 | dbPool, err := sql.Open("mysql", url) 36 | 37 | if err != nil { 38 | slog.Error( 39 | fmt.Sprintf("Unable to connect to database: %v\n", err), 40 | ) 41 | return nil, err 42 | } 43 | 44 | return dbPool, nil 45 | } 46 | 47 | // Retrieve retrieves data from a MySQL source and sends it to the insert channel. 48 | func ingestMysqlModel(r *Retriever, ic chan []driver.Value) error { 49 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name)) 50 | clientPool, err := GetMysqlPoolFromSource(r.Source) 51 | if err != nil { 52 | return err 53 | } 54 | defer clientPool.Close() 55 | rows, err := clientPool.Query(r.Query) 56 | if err != nil { 57 | return err 58 | } 59 | defer rows.Close() 60 | 61 | if err = processMysqlRows(r, ic, rows); err != nil { 62 | return err 63 | } 64 | 65 | return nil 66 | } 67 | 68 | // processMysqlRows processes rows from a MySQL source and sends them to the insert channel. 69 | func processMysqlRows(r *Retriever, ic chan []driver.Value, rows *sql.Rows) error { 70 | // Get the column types from the rows and create a slice of pointers to scan into. 71 | valuePtrs, err := processMysqlColumns(rows) 72 | if err != nil { 73 | return err 74 | } 75 | for rows.Next() { 76 | if err = rows.Scan(valuePtrs...); err != nil { 77 | return err 78 | } 79 | driverRow := make([]driver.Value, len(valuePtrs)+1) 80 | driverRow[0] = r.Source.Name 81 | for i, ptr := range valuePtrs { 82 | if ptr == nil { 83 | driverRow[i+1] = nil 84 | continue 85 | } 86 | switch reflect.TypeOf(ptr).String() { 87 | case "*engine.duckdbDecimal": 88 | value := reflect.ValueOf(ptr).Elem().Interface() 89 | driverRow[i+1], err = value.(duckdbDecimal).Value() 90 | if err != nil { 91 | return err 92 | } 93 | default: 94 | // If the value is not a custom type, we can just use the value as is. 95 | driverRow[i+1] = reflect.ValueOf(ptr).Elem().Interface() 96 | } 97 | } 98 | ic <- driverRow 99 | } 100 | 101 | return nil 102 | } 103 | 104 | func processMysqlColumns(rows *sql.Rows) ([]any, error) { 105 | columnTypes, err := rows.ColumnTypes() 106 | if err != nil { 107 | return nil, err 108 | } 109 | valuePtrs := make([]any, len(columnTypes)) 110 | 111 | for i, columnType := range columnTypes { 112 | switch columnType.DatabaseTypeName() { 113 | case "DECIMAL", "NUMERIC", "FLOAT", "DOUBLE", "REAL": 114 | valuePtrs[i] = new(duckdbDecimal) 115 | case "BIGINT": 116 | valuePtrs[i] = new(int64) 117 | case "INT", "MEDIUMINT": 118 | valuePtrs[i] = new(int32) 119 | case "SMALLINT", "YEAR": 120 | valuePtrs[i] = new(int16) 121 | case "TINYINT": 122 | valuePtrs[i] = new(int8) 123 | case "BIT", "BINARY", "VARBINARY", "TINYBLOB", "MEDIUMBLOB", "LONGBLOB", "BLOB": 124 | valuePtrs[i] = new([]byte) 125 | case "DATE", "DATETIME", "TIMESTAMP": 126 | valuePtrs[i] = new(time.Time) 127 | case "CHAR", "VARCHAR", "TEXT", "TINYTEXT", "MEDIUMTEXT", "LONGTEXT", "ENUM", "SET", "JSON", "TIME": 128 | valuePtrs[i] = new(string) 129 | default: 130 | return nil, fmt.Errorf("unsupported column type: %s", columnType.DatabaseTypeName()) 131 | } 132 | } 133 | return valuePtrs, nil 134 | } 135 | -------------------------------------------------------------------------------- /internal/engine/s3.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "path/filepath" 6 | "reflect" 7 | ) 8 | 9 | func ingestS3Model(r *Retriever) error { 10 | switch r.Format { 11 | case "csv": 12 | optionsString, err := getCSVOptions(r.Options) 13 | if err != nil { 14 | return fmt.Errorf("failed to get csv options: %v", err) 15 | } 16 | query := fmt.Sprintf( 17 | `create or replace table main.%s as select * from read_csv(%s,%s) 18 | `, r.TableName, formatFilePatterns(r), *optionsString, 19 | ) 20 | Debug(fmt.Sprintf("running query: %s", query)) 21 | if err := ddbExec(query); err != nil { 22 | Debug(fmt.Sprintf("running query: %s", query)) 23 | return fmt.Errorf("failed to create file model table %s: %v", r.ModelName, err) 24 | } 25 | default: 26 | return fmt.Errorf("unsupported model file format %s", r.Format) 27 | } 28 | return nil 29 | } 30 | 31 | func formatFilePatterns(r *Retriever) string { 32 | queryString := "[" 33 | for i, v := range *r.FilePatterns { 34 | if i == len(*r.FilePatterns)-1 { 35 | queryString += fmt.Sprintf("'%s://%s'", r.Source.Engine, filepath.Join(r.Source.Connection.BucketName, v)) + "]" 36 | break 37 | } 38 | queryString += fmt.Sprintf("'%s://%s', ", r.Source.Engine, filepath.Join(r.Source.Connection.BucketName, v)) 39 | } 40 | return queryString 41 | } 42 | 43 | func getCSVOptions(o Options) (*string, error) { 44 | options := reflect.VisibleFields(reflect.TypeOf(o)) 45 | queryString := new(string) 46 | for _, option := range options { 47 | if _, ok := option.Tag.Lookup("default"); !ok { 48 | return nil, fmt.Errorf("missing default value for option %s", option.Name) 49 | } 50 | if _, ok := option.Tag.Lookup("yaml"); !ok { 51 | return nil, fmt.Errorf("missing yaml tag for option %s", option.Name) 52 | } 53 | 54 | defaultVal := option.Tag.Get("default") 55 | optionName := option.Tag.Get("yaml") 56 | optionValue := getDefaultValue(reflect.ValueOf(o).FieldByName(option.Name).Interface(), defaultVal) 57 | if optionValue == "" { 58 | continue 59 | } 60 | optionString := fmt.Sprintf("%s = %v", optionName, optionValue) 61 | if *queryString == "" { 62 | *queryString += optionString 63 | } else { 64 | *queryString = fmt.Sprintf("%s, %s", *queryString, optionString) 65 | } 66 | } 67 | return queryString, nil 68 | } 69 | 70 | func getDefaultValue(key any, defaultVal any) any { 71 | switch key := key.(type) { 72 | case *[]string: 73 | // If the key is not set and there is not default value, return an empty string 74 | if key == nil && defaultVal == "-" { 75 | return "" 76 | } 77 | // If the key is not set and there is a default value, return the default value 78 | if key == nil && defaultVal != "-" { 79 | return defaultVal 80 | } 81 | // Convert the []string from YAML to a string array for the query 82 | queryString := "[" 83 | for i, v := range *key { 84 | if i == len(*key)-1 { 85 | queryString += v + "]" 86 | break 87 | } 88 | queryString += v + ", " 89 | } 90 | return queryString 91 | case *bool: 92 | if key == nil { 93 | return defaultVal 94 | } 95 | return *key 96 | case *string: 97 | // If the key is not set and there is not default value, return an empty string 98 | if key == nil && defaultVal == "-" { 99 | return "" 100 | } 101 | // If the key is not set and there is a default value, return the default value 102 | if key == nil && defaultVal != "-" { 103 | return fmt.Sprintf("'%s'", defaultVal) 104 | } 105 | return fmt.Sprintf("'%s'", *key) 106 | case *int64: 107 | if key == nil { 108 | return defaultVal 109 | } 110 | return *key 111 | case *[]Type: 112 | // If the key is not set and there is not default value, return an empty string 113 | if key == nil && defaultVal == "-" { 114 | return "" 115 | } 116 | // If the key is not set and there is a default value, return the default value 117 | if key == nil && defaultVal != "-" { 118 | return defaultVal 119 | } 120 | // Convert the []Type from YAML to a string object for the query 121 | queryString := "{" 122 | for i, v := range *key { 123 | if i == len(*key)-1 { 124 | queryString += fmt.Sprintf("'%s': '%s'", v.Name, v.Type) + "}" 125 | break 126 | } 127 | queryString += fmt.Sprintf("'%s': '%s',", v.Name, v.Type) 128 | } 129 | return queryString 130 | } 131 | return key 132 | } 133 | -------------------------------------------------------------------------------- /internal/engine/postgres.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "database/sql/driver" 6 | "fmt" 7 | "net/url" 8 | "reflect" 9 | 10 | "github.com/jackc/pgx/v5" 11 | "github.com/jackc/pgx/v5/pgxpool" 12 | "github.com/marcboeker/go-duckdb" 13 | ) 14 | 15 | type QueryResult struct { 16 | Rows []map[string]any 17 | Columns []string 18 | } 19 | 20 | func getPostgresPool(url string) (*pgxpool.Pool, error) { 21 | // urlExample := "postgres://username:password@localhost:5432/database_name" 22 | dbpool, err := pgxpool.New(context.Background(), url) 23 | 24 | if err != nil { 25 | Error( 26 | fmt.Sprintf("Unable to connect to database: %v\n", err), 27 | ) 28 | return nil, err 29 | } 30 | return dbpool, nil 31 | } 32 | 33 | func getPostgresPoolFromSource(source Source) (*pgxpool.Pool, error) { 34 | 35 | url := fmt.Sprintf( 36 | "postgres://%s:%s@%s:%d/%s", 37 | source.Connection.Username, 38 | url.QueryEscape(source.Connection.Password), 39 | url.QueryEscape(source.Connection.Host), 40 | source.Connection.Port, 41 | source.Connection.Database, 42 | ) 43 | dbpool, err := getPostgresPool(url) 44 | 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | return dbpool, nil 50 | } 51 | 52 | func ingestPostgresModel(r *Retriever, ic chan []driver.Value) error { 53 | Debug(fmt.Sprintf("Retrieving context %s for %s", r.ModelName, r.Source.Name)) 54 | clientPool, err := getPostgresPoolFromSource(r.Source) 55 | if err != nil { 56 | return err 57 | } 58 | defer clientPool.Close() 59 | rows, err := clientPool.Query(context.Background(), r.Query) 60 | if err != nil { 61 | return err 62 | } 63 | defer rows.Close() 64 | 65 | if err = processPostgresRows(r, ic, rows); err != nil { 66 | return err 67 | } 68 | 69 | return nil 70 | } 71 | 72 | func processPostgresRows(r *Retriever, ic chan []driver.Value, rows pgx.Rows) error { 73 | var rowCounter int64 74 | for rows.Next() { 75 | values, err := rows.Values() 76 | if err != nil { 77 | return err 78 | } 79 | rowCounter++ 80 | driverRow := make([]driver.Value, len(values)+1) 81 | driverRow[0] = r.Source.Name 82 | for i, value := range values { 83 | if value == nil { 84 | driverRow[i+1] = nil 85 | continue 86 | } 87 | switch reflect.TypeOf(value).String() { 88 | case "pgtype.Numeric": 89 | decimal := duckdbDecimal(0) 90 | if err = decimal.Scan(value); err != nil { 91 | return err 92 | } 93 | driverRow[i+1], err = decimal.Value() 94 | if err != nil { 95 | return err 96 | } 97 | case "pgtype.Time": 98 | timeVal := duckdbTime("") 99 | if err = timeVal.Scan(value); err != nil { 100 | return err 101 | } 102 | driverRow[i+1], err = timeVal.Value() 103 | if err != nil { 104 | return err 105 | } 106 | case "pgtype.Interval": 107 | duration := duckdbDuration("") 108 | if err = duration.Scan(value); err != nil { 109 | return err 110 | } 111 | driverRow[i+1], err = duration.Value() 112 | if err != nil { 113 | return err 114 | } 115 | case "netip.Prefix": 116 | prefix := duckdbNetIpPrefix("") 117 | if err = prefix.Scan(value); err != nil { 118 | return err 119 | } 120 | driverRow[i+1], err = prefix.Value() 121 | if err != nil { 122 | return err 123 | } 124 | case "net.HardwareAddr": 125 | hwAddr := duckdbHardwareAddr("") 126 | if err = hwAddr.Scan(value); err != nil { 127 | return err 128 | } 129 | driverRow[i+1], err = hwAddr.Value() 130 | if err != nil { 131 | return err 132 | } 133 | case "map[string]interface {}", "[]interface {}": 134 | jsonVal := duckdbJSON("") 135 | if err = jsonVal.Scan(value); err != nil { 136 | return err 137 | } 138 | driverRow[i+1], err = jsonVal.Value() 139 | if err != nil { 140 | return err 141 | } 142 | // These are UUIDs 143 | case "[16]uint8": 144 | uuid := duckdbUUID(duckdb.UUID{}) 145 | if err = uuid.Scan(value); err != nil { 146 | return err 147 | } 148 | driverRow[i+1], err = uuid.Value() 149 | if err != nil { 150 | return err 151 | } 152 | default: 153 | driverRow[i+1] = value 154 | } 155 | } 156 | ic <- driverRow 157 | } 158 | Debug(fmt.Sprintf("Retrieved %d rows for %s - %s\n", rowCounter, r.Source.Name, r.ModelName)) 159 | if err := rows.Err(); err != nil { 160 | return err 161 | } 162 | return nil 163 | } 164 | -------------------------------------------------------------------------------- /internal/cli/app.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/preendata/preen/internal/engine" 7 | "github.com/urfave/cli/v2" 8 | ) 9 | 10 | func NewApp() *cli.App { 11 | app := &cli.App{ 12 | Name: "preen", 13 | Usage: "A command-line application for preen", 14 | Flags: []cli.Flag{ 15 | &cli.StringFlag{ 16 | Name: "log-level", 17 | Aliases: []string{"l"}, 18 | Usage: "Set the log level (DEBUG, INFO, WARN, ERROR, FATAL, PANIC)", 19 | }, 20 | &cli.BoolFlag{ 21 | Name: "verbose", 22 | Aliases: []string{"v"}, 23 | Usage: "Set the log level to DEBUG", 24 | }, 25 | }, 26 | Commands: []*cli.Command{ 27 | { 28 | Name: "repl", 29 | Aliases: []string{"r"}, 30 | Usage: "Initiate interactive query session", 31 | Action: Repl, 32 | Flags: []cli.Flag{ 33 | &cli.StringFlag{ 34 | Name: "output-format", 35 | Aliases: []string{"o"}, 36 | Usage: "Set output format. Options are 'table', 'csv', 'markdown'", 37 | DefaultText: "table", 38 | Action: func(c *cli.Context, v string) error { 39 | format := c.String("output-format") 40 | if format != "table" && format != "csv" && format != "markdown" { 41 | return fmt.Errorf("invalid format: %s. Allowed values are 'table', 'csv', 'markdown'", format) 42 | } 43 | return nil 44 | }, 45 | }, 46 | }, 47 | }, 48 | { 49 | Name: "query", 50 | Aliases: []string{"q"}, 51 | Usage: "Execute a query", 52 | Action: Query, 53 | Flags: []cli.Flag{ 54 | &cli.StringFlag{ 55 | Name: "format", 56 | Aliases: []string{"f"}, 57 | Usage: "Set output format. Options are 'table' or 'json'", 58 | DefaultText: "table", 59 | Action: func(c *cli.Context, v string) error { 60 | format := c.String("format") 61 | if format != "table" && format != "json" { 62 | return fmt.Errorf("invalid format: %s. Allowed values are 'table' or 'json'", format) 63 | } 64 | return nil 65 | }, 66 | }, 67 | }, 68 | }, 69 | { 70 | Name: "model", 71 | Aliases: []string{"m"}, 72 | Usage: "Commands to manage models", 73 | Subcommands: []*cli.Command{ 74 | { 75 | Name: "build", 76 | Action: BuildModel, 77 | Aliases: []string{"b"}, 78 | Usage: "Build model", 79 | Flags: []cli.Flag{ 80 | &cli.StringFlag{ 81 | Name: "target", 82 | Aliases: []string{"t"}, 83 | Usage: "Target a specific model(s). The default is all models. This is relative to the PREEN_MODELS_PATH.", 84 | }, 85 | &cli.BoolFlag{ 86 | Name: "source-name", 87 | Aliases: []string{"sn"}, 88 | Usage: "Target a specific source", 89 | }, 90 | }, 91 | }, 92 | }, 93 | }, 94 | { 95 | Name: "source", 96 | Aliases: []string{"s"}, 97 | Usage: "Commands to manage sources", 98 | Subcommands: []*cli.Command{ 99 | { 100 | Name: "list", 101 | Aliases: []string{"l"}, 102 | Usage: "Print stored sources.", 103 | Action: ListSources, 104 | }, 105 | { 106 | Name: "validate", 107 | Aliases: []string{"v"}, 108 | Usage: "Validate config file and retrieve source data types", 109 | Action: Validate, 110 | }, 111 | { 112 | Name: "metadata", 113 | Aliases: []string{"i"}, 114 | Usage: "Build source metadata", 115 | Action: BuildMetadata, 116 | }, 117 | }, 118 | }, 119 | { 120 | Name: "version", 121 | Usage: "Print the version of the application", 122 | Action: func(c *cli.Context) error { 123 | fmt.Println("Preen version:", engine.Version) 124 | return nil 125 | }, 126 | }, 127 | }, 128 | Before: func(c *cli.Context) error { 129 | logLevel := "" 130 | 131 | // Check if log-level flag is set 132 | if c.IsSet("log-level") { 133 | logLevel = c.String("log-level") 134 | } 135 | 136 | // Check if verbose flag is set 137 | if c.Bool("verbose") { 138 | logLevel = "DEBUG" 139 | } 140 | 141 | err := engine.IsValidLogLevel(logLevel) 142 | if logLevel != "" && err != nil { 143 | return fmt.Errorf("invalid log level: %s. Allowed values are: DEBUG, INFO, WARN, ERROR, FATAL, PANIC", logLevel) 144 | } 145 | 146 | // Initialize logger, passes empty string if no flag set which is handled by variadic Intialize function 147 | if err := engine.Initialize(logLevel); err != nil { 148 | return err 149 | } 150 | 151 | return nil 152 | }, 153 | } 154 | return app 155 | } 156 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/preendata/preen 2 | 3 | go 1.23.1 4 | 5 | require ( 6 | github.com/aws/aws-sdk-go-v2/config v1.29.2 7 | github.com/aws/aws-sdk-go-v2/service/s3 v1.75.0 8 | github.com/chzyer/readline v1.5.1 9 | github.com/go-sql-driver/mysql v1.8.1 10 | github.com/jackc/pgx/v5 v5.7.2 11 | github.com/jedib0t/go-pretty/v6 v6.6.5 12 | github.com/joho/godotenv v1.5.1 13 | github.com/marcboeker/go-duckdb v1.8.3 14 | github.com/preendata/sqlparser v0.0.1 15 | github.com/sirupsen/logrus v1.9.3 16 | github.com/snowflakedb/gosnowflake v1.13.0 17 | github.com/urfave/cli/v2 v2.27.5 18 | go.mongodb.org/mongo-driver v1.17.2 19 | golang.org/x/sync v0.10.0 20 | gopkg.in/yaml.v3 v3.0.1 21 | ) 22 | 23 | require ( 24 | filippo.io/edwards25519 v1.1.0 // indirect 25 | github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect 26 | github.com/99designs/keyring v1.2.2 // indirect 27 | github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0 // indirect 28 | github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect 29 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.0 // indirect 30 | github.com/BurntSushi/toml v1.4.0 // indirect 31 | github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect 32 | github.com/apache/arrow-go/v18 v18.1.0 // indirect 33 | github.com/apache/arrow/go/v16 v16.1.0 // indirect 34 | github.com/aws/aws-sdk-go-v2 v1.34.0 // indirect 35 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.8 // indirect 36 | github.com/aws/aws-sdk-go-v2/credentials v1.17.55 // indirect 37 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.25 // indirect 38 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.55 // indirect 39 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.29 // indirect 40 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.29 // indirect 41 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 // indirect 42 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.29 // indirect 43 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 // indirect 44 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.3 // indirect 45 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.10 // indirect 46 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.10 // indirect 47 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.12 // indirect 48 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.11 // indirect 49 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.10 // indirect 50 | github.com/aws/smithy-go v1.22.2 // indirect 51 | github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect 52 | github.com/danieljoos/wincred v1.2.2 // indirect 53 | github.com/dvsekhvalnov/jose2go v1.8.0 // indirect 54 | github.com/gabriel-vasile/mimetype v1.4.8 // indirect 55 | github.com/goccy/go-json v0.10.5 // indirect 56 | github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect 57 | github.com/golang-jwt/jwt/v5 v5.2.1 // indirect 58 | github.com/golang/snappy v0.0.4 // indirect 59 | github.com/google/flatbuffers v25.1.24+incompatible // indirect 60 | github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c // indirect 61 | github.com/jackc/pgpassfile v1.0.0 // indirect 62 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect 63 | github.com/jackc/puddle/v2 v2.2.2 // indirect 64 | github.com/klauspost/compress v1.17.11 // indirect 65 | github.com/klauspost/cpuid/v2 v2.2.9 // indirect 66 | github.com/mattn/go-runewidth v0.0.16 // indirect 67 | github.com/mitchellh/mapstructure v1.5.0 // indirect 68 | github.com/montanaflynn/stats v0.7.1 // indirect 69 | github.com/mtibben/percent v0.2.1 // indirect 70 | github.com/pierrec/lz4/v4 v4.1.22 // indirect 71 | github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect 72 | github.com/rivo/uniseg v0.4.7 // indirect 73 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 74 | github.com/xdg-go/pbkdf2 v1.0.0 // indirect 75 | github.com/xdg-go/scram v1.1.2 // indirect 76 | github.com/xdg-go/stringprep v1.0.4 // indirect 77 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect 78 | github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 // indirect 79 | github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect 80 | github.com/zeebo/xxh3 v1.0.2 // indirect 81 | golang.org/x/crypto v0.32.0 // indirect 82 | golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c // indirect 83 | golang.org/x/mod v0.22.0 // indirect 84 | golang.org/x/net v0.34.0 // indirect 85 | golang.org/x/sys v0.29.0 // indirect 86 | golang.org/x/term v0.28.0 // indirect 87 | golang.org/x/text v0.21.0 // indirect 88 | golang.org/x/tools v0.29.0 // indirect 89 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect 90 | ) 91 | -------------------------------------------------------------------------------- /docs/documentation/integrations/file-formats/csv-format.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: how to configure preen to read CSV files. 3 | --- 4 | 5 | # CSV Format 6 | 7 | Preen supports the following options for CSV format. This is largely a wrapper on the [DuckDB CSV scan options](https://duckdb.org/docs/data/csv/overview.html#parameters). 8 | 9 | | Option | Description | Default Value | 10 | | -------------------- | ---------------------------------------------------- | ------------- | 11 | | all\_varchar | Interpret all columns as varchar | false | 12 | | allow\_quoted\_nulls | Allow NULL values in quotes | true | 13 | | auto\_detect | Automatically detect CSV dialect | true | 14 | | columns | Specify column names | - | 15 | | compression | Compression type (auto, none, gzip, zstd) | auto | 16 | | dateformat | Specifies the date format to use | - | 17 | | decimal\_separator | Specifies the decimal separator | . | 18 | | delim | Specifies the delimiter character | , | 19 | | escape | Specifies the escape character | " | 20 | | filename | Include filename in the result | false | 21 | | force\_not\_null | Do not convert blank values to NULL | \[] | 22 | | header | Whether or not the CSV file has a header | false | 23 | | ignore\_errors | Ignore parsing errors | false | 24 | | max\_line\_size | Maximum line size in bytes | 2097152 | 25 | | names | Specify column names | - | 26 | | new\_line | Specifies the newline character | - | 27 | | normalize\_names | Normalize column names | false | 28 | | null\_padding | Pad columns with null values if row is too short | false | 29 | | nullstr | Specifies the string that represents NULL values | - | 30 | | parallel | Use multi-threading for reading CSV files | true | 31 | | quote | Specifies the quote character | " | 32 | | sample\_size | Number of sample rows for dialect and type detection | 20480 | 33 | | skip | Number of rows to skip | 0 | 34 | | timestampformat | Specifies the timestamp format | - | 35 | | types | Specify column types | - | 36 | | union\_by\_name | Union by name when reading multiple files | false | 37 | 38 | ## Examples 39 | 40 | ### Basic Auto-Detection 41 | 42 | This is the most common case. Preen will auto-detect the CSV format and use the default options. 43 | 44 | ```yaml 45 | # FILENAME: ~/.preen/models/users.yaml 46 | name: users 47 | type: file 48 | file_patterns: 49 | - "users/v1/**.csv" # This will match all csv files under the users/v1 prefix 50 | format: csv 51 | options: 52 | auto_detect: true 53 | header: true 54 | delim: "," 55 | quote: "\"" 56 | escape: "\"" 57 | union_by_name: true 58 | ``` 59 | 60 | ### Fully Specifying Options without auto-detection 61 | 62 | This is useful if you want to override the auto-detection and specify the options manually. This will save time and avoid the memory overhead of auto-detection. 63 | 64 | ```yaml 65 | # FILENAME: ~/.preen/models/users.yaml 66 | name: users 67 | type: file 68 | file_patterns: 69 | - "users/v1/**.csv" 70 | format: csv 71 | options: 72 | auto_detect: false 73 | header: true 74 | delim: "," 75 | quote: "\"" 76 | escape: "\"" 77 | columns: # List of all columns in the CSV file along with their DuckDB types 78 | - name: id 79 | type: bigint 80 | - name: name 81 | type: varchar 82 | - name: email 83 | type: varchar 84 | - name: birthday 85 | type: date 86 | ``` 87 | 88 | ### Partially Specifying Options to override auto-detection 89 | 90 | ```yaml 91 | # FILENAME: ~/.preen/models/users.yaml 92 | name: users 93 | type: file 94 | file_patterns: 95 | - "users/v1/**.csv" 96 | format: csv 97 | options: 98 | auto_detect: true 99 | header: true 100 | delim: "," 101 | quote: "\"" 102 | escape: "\"" 103 | types: # This overrides the DuckDB auto-detection for the specified columns 104 | - name: birthday 105 | type: date 106 | ``` 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![tests](https://github.com/preendata/preen/actions/workflows/ci.yaml/badge.svg) 2 | 3 | # Preen 4 | 5 | Preen is a powerful command-line application for querying from multiple sources locally from your laptop. Under the hood, Preen uses [DuckDB](https://duckdb.org/) to build an ephemeral, in-memory data warehouse and then uses DuckDB's SQL engine to query the data. Think of Preen as a mix of Fivetran and DBT for your DuckDB use cases. You describe the data you want to query using a declarative language and Preen takes care of the rest. 6 | 7 | Preen is currently in the alpha stage and not all features are available. We are working on adding more features and improving the user experience. If you have any questions or feedback, please feel free to open an issue on GitHub. 8 | 9 | ## Documentation 10 | See our [Gitbook](https://preen.gitbook.io/preen-docs) page for rich documentation on Preen. 11 | 12 | ## Features 13 | 14 | - Query data from multiple sources using a single interface 15 | - Support for MongoDB, PostgreSQL, MySQL, and AWS S3 16 | - Model-based data retrieval and collation 17 | - Identify and resolve data type discrepancies between sources 18 | - Interactive REPL for querying data 19 | - Configurable output formats (table, CSV, markdown, JSON) 20 | - Extensible architecture for adding new data sources 21 | 22 | ## Installation 23 | 24 | ### Homebrew 25 | 26 | Download the executable via our Homebrew cask. 27 | 28 | ```bash 29 | brew tap preendata/preen 30 | brew install preen 31 | ``` 32 | 33 | ### Download pre-built binary 34 | 35 | You can download a pre-built binary for your operating system and architecture from the [GitHub Releases](https://github.com/preendata/preen/releases) page. 36 | 37 | ```bash 38 | # Using curl 39 | sh -c "$(curl -fsSL https://raw.githubusercontent.com/preendata/preen/main/build/install.sh)" 40 | 41 | # Using wget 42 | sh -c "$(wget https://raw.githubusercontent.com/preendata/preen/main/build/install.sh -O -)" 43 | ``` 44 | 45 | ### Build from source 46 | 47 | To build Preen from source, you need to have Go 1.23.0 or later installed on your system. Then, you can build the application using the following commands: 48 | 49 | ```bash 50 | git clone https://github.com/preendata/preen.git 51 | cd preen 52 | make build 53 | ``` 54 | 55 | This will create a `preen` binary in the `bin` directory. You can add this to your `PATH` if you want to use the `preen` command from anywhere. 56 | 57 | ## Configuration 58 | 59 | [Example repository](https://github.com/preendata/preen-template) 60 | 61 | Preen uses two configuration files: `sources.yaml` and `models.yaml`. The `sources.yaml` file is used to configure the data sources that Preen will query. The `models.yaml` file is used to define the models that Preen will build. The directory Preen will look for source and model configurations is configurable via the `PREEN_CONFIG_PATH` environment variable. You can see an example of the environment configuation in the [.env.example](.env.example) file.The `models.yaml` file is optional. If it is not present, Preen will look for `.yaml` files in the `models` directory. 62 | 63 | Here is an example `sources.yaml` file: 64 | 65 | ```yaml 66 | sources: 67 | - name: mongo-db-us-west-1 # This has to be unique 68 | engine: mongodb 69 | connection: 70 | host: localhost 71 | port: 27117 72 | database: preen 73 | username: root 74 | password: ${MONGO_PASSWORD} # You can also use environment variables. 75 | auth_source: admin 76 | models: 77 | - users 78 | - orders 79 | - products 80 | ``` 81 | 82 | Here is an example `models.yaml` file: 83 | 84 | ```yaml 85 | models: 86 | - name: preen-users-model 87 | type: database 88 | query: | 89 | SELECT users.user_id, users.name, users.email FROM users 90 | ``` 91 | 92 | You can validate your configuration by running: 93 | 94 | ```bash 95 | preen source validate 96 | ``` 97 | 98 | ## Usage 99 | 100 | ### Building Models 101 | 102 | Building a model will fetch the data from the source and save it to the DuckDB database. To build your models, run: 103 | 104 | ```bash 105 | preen model build 106 | ``` 107 | 108 | ### Querying Data 109 | 110 | You can query data using the interactive REPL. You can also specify the output format of the data (table, csv, markdown, json). 111 | 112 | ```bash 113 | preen repl 114 | 115 | # Specify output format 116 | preen repl --output-format csv 117 | ``` 118 | 119 | For one-off queries, use the `query` command: 120 | 121 | ```bash 122 | preen query "select * from your_model limit 10" --output-format csv 123 | ``` 124 | 125 | ## Development 126 | 127 | To set up the development environment: 128 | 129 | 1. Clone the repository 130 | 2. Copy `.env.example` to `.env` and adjust the values as needed 131 | 3. Install dependencies: `go mod tidy` 132 | 4. Run Unit tests: `make test` 133 | 5. Run Integration tests: `make integration-test` 134 | 6. Run linter: `make lint` 135 | 136 | ## License 137 | 138 | This project is licensed under the LGPL-3.0 License. See the [LICENSE](LICENSE) file for details. 139 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /internal/engine/types.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "database/sql/driver" 5 | "encoding/json" 6 | "fmt" 7 | "math" 8 | "net" 9 | "net/netip" 10 | "strconv" 11 | "time" 12 | 13 | "github.com/jackc/pgx/v5/pgtype" 14 | "github.com/marcboeker/go-duckdb" 15 | ) 16 | 17 | // Implements the Scanner and Valuer interfaces for custom data types. 18 | // https://pkg.go.dev/database/sql#Scanner 19 | 20 | // duckdbDecimal is a custom type for scanning and valuing float64 values. 21 | // The MySQL driver returns numeric types as strings, so we need to convert them to float64. 22 | // The PG driver returns numeric types as a custom type, so we need to convert them to float64. 23 | type duckdbDecimal float64 24 | 25 | func (d *duckdbDecimal) Scan(s any) error { 26 | switch v := s.(type) { 27 | // The string is from the Snowflake driver. 28 | case string: 29 | Debug(fmt.Sprintf("Scanning duckdbDecimal: %s", v)) 30 | if float, err := strconv.ParseFloat(v, 64); err == nil { 31 | *d = duckdbDecimal(float) 32 | } else { 33 | Debug(fmt.Sprintf("Error scanning duckdbDecimal: %s", err)) 34 | return fmt.Errorf("error scanning duckdbDecimal: %w", err) 35 | } 36 | // The byte array is from the MySQL driver. 37 | case []byte: 38 | if float, err := strconv.ParseFloat(string(v), 64); err == nil { 39 | *d = duckdbDecimal(float) 40 | } else { 41 | return fmt.Errorf("error scanning duckdbDecimal: %w", err) 42 | } 43 | // The float32 type is from the MySQL driver. 44 | case float32: 45 | *d = duckdbDecimal(v) 46 | // The float64 type is from the MySQL driver. 47 | case float64: 48 | *d = duckdbDecimal(v) 49 | // The numeric type is from the PG driver. 50 | case pgtype.Numeric: 51 | numericType := s.(pgtype.Numeric) 52 | decimal := duckdb.Decimal{Value: numericType.Int, Scale: uint8(math.Abs(float64(numericType.Exp)))} 53 | *d = duckdbDecimal(decimal.Float64()) 54 | case nil: 55 | *d = duckdbDecimal(0) 56 | default: 57 | fmt.Printf("type: %T\n", s) 58 | return fmt.Errorf("cannot sql.Scan() duckdbDecimal from: %#v", s) 59 | } 60 | return nil 61 | } 62 | 63 | func (d duckdbDecimal) Value() (driver.Value, error) { 64 | return float64(d), nil 65 | } 66 | 67 | // duckdbTime is a custom type for scanning and valuing time.Time values. 68 | // The PG driver returns time types as a custom type, so we need to convert them to string. 69 | // The database/sql driver doesn't respect time data types. 70 | type duckdbTime string 71 | 72 | func (t *duckdbTime) Scan(s any) error { 73 | switch v := s.(type) { 74 | case pgtype.Time: 75 | timeType := v 76 | // Create a Time object for midnight of the current day 77 | midnight := time.Now().Truncate(24 * time.Hour) 78 | resultTime := midnight.Add(time.Duration(timeType.Microseconds) * time.Microsecond) 79 | *t = duckdbTime(resultTime.String()) 80 | case nil: 81 | *t = duckdbTime("") 82 | default: 83 | return fmt.Errorf("cannot sql.Scan() duckdbTime from: %#v", s) 84 | } 85 | return nil 86 | } 87 | 88 | func (t duckdbTime) Value() (driver.Value, error) { 89 | return fmt.Sprint(t), nil 90 | } 91 | 92 | // duckdbDuration is a custom type for scanning and valuing string values. 93 | // The PG driver returns interval types as a custom type, so we need to convert them to string. 94 | // The database/sql driver doesn't respect interval data types. 95 | type duckdbDuration string 96 | 97 | func (d *duckdbDuration) Scan(s any) error { 98 | switch v := s.(type) { 99 | case pgtype.Interval: 100 | stringVal := fmt.Sprintf("Microseconds: %d, Days: %d, Months: %d", v.Microseconds, v.Days, v.Months) 101 | *d = duckdbDuration(stringVal) 102 | case nil: 103 | *d = duckdbDuration("") 104 | default: 105 | return fmt.Errorf("cannot sql.Scan() strfmt.Duration from: %#v", v) 106 | } 107 | return nil 108 | } 109 | 110 | func (d duckdbDuration) Value() (driver.Value, error) { 111 | return string(d), nil 112 | } 113 | 114 | // duckdbNetIPPrefix is a custom type for scanning and valuing netip.Prefix values. 115 | // The PG driver returns inet types as a custom type, so we need to convert them to string. 116 | type duckdbNetIpPrefix string 117 | 118 | func (d *duckdbNetIpPrefix) Scan(s any) error { 119 | switch v := s.(type) { 120 | case netip.Prefix: 121 | *d = duckdbNetIpPrefix(v.String()) 122 | case nil: 123 | *d = duckdbNetIpPrefix("") 124 | default: 125 | return fmt.Errorf("cannot sql.Scan() netip.Prefix from: %#v", v) 126 | } 127 | return nil 128 | } 129 | 130 | func (d duckdbNetIpPrefix) Value() (driver.Value, error) { 131 | return string(d), nil 132 | } 133 | 134 | // duckdbHardwareAddr is a custom type for scanning and valuing net.HardwareAddr values. 135 | // The PG driver returns macaddr types as a custom type, so we need to convert them to string. 136 | type duckdbHardwareAddr string 137 | 138 | func (d *duckdbHardwareAddr) Scan(s any) error { 139 | switch v := s.(type) { 140 | case net.HardwareAddr: 141 | *d = duckdbHardwareAddr(v.String()) 142 | case nil: 143 | *d = duckdbHardwareAddr("") 144 | default: 145 | return fmt.Errorf("cannot sql.Scan() net.HardwareAddr from: %#v", v) 146 | } 147 | return nil 148 | } 149 | 150 | func (d duckdbHardwareAddr) Value() (driver.Value, error) { 151 | return string(d), nil 152 | } 153 | 154 | // duckdbJSON is a custom type for scanning and valuing json values. 155 | // The PG driver returns json types as a custom type, so we need to convert them to string. 156 | type duckdbJSON string 157 | 158 | func (j *duckdbJSON) Scan(s any) error { 159 | switch v := s.(type) { 160 | case map[string]interface{}, []interface{}: 161 | jsonVal, err := json.Marshal(v) 162 | if err != nil { 163 | return fmt.Errorf("error scanning duckdbJSON: %w", err) 164 | } 165 | *j = duckdbJSON(jsonVal) 166 | case nil: 167 | *j = duckdbJSON("") 168 | default: 169 | return fmt.Errorf("cannot sql.Scan() duckdbJSON from: %#v", v) 170 | } 171 | return nil 172 | } 173 | 174 | func (j duckdbJSON) Value() (driver.Value, error) { 175 | return string(j), nil 176 | } 177 | 178 | // duckdbUUID is a custom type for scanning and valuing UUID values. 179 | // The PG driver returns UUID types as a custom type, so we need to convert them to string. 180 | type duckdbUUID duckdb.UUID 181 | 182 | func (u *duckdbUUID) Scan(s any) error { 183 | switch v := s.(type) { 184 | case [16]uint8: 185 | value := duckdb.UUID(v) 186 | *u = duckdbUUID(value) 187 | case nil: 188 | *u = duckdbUUID(duckdb.UUID([]uint8{})) 189 | default: 190 | return fmt.Errorf("cannot sql.Scan() duckdbUUID from: %#v", v) 191 | } 192 | return nil 193 | } 194 | 195 | func (u duckdbUUID) Value() (driver.Value, error) { 196 | return duckdb.UUID(u), nil 197 | } 198 | 199 | var duckdbTypeMap = map[string]string{ 200 | "integer": "integer", 201 | "bigint": "bigint", 202 | "smallint": "smallint", 203 | "mediumint": "integer", 204 | "int": "integer", 205 | "year": "smallint", 206 | "double precision": "double", 207 | "double": "double", 208 | "number": "double", //snowflake 209 | "numeric": "double", 210 | "decimal": "double", 211 | "real": "real", 212 | "float4": "real", 213 | "float": "real", 214 | "boolean": "boolean", 215 | "date": "date", 216 | "timestamp": "timestamp", 217 | "datetime": "timestamp", 218 | "timestamp_tz": "timestamp", //snowflake 219 | "timestamp_ltz": "timestamp", //snowflake 220 | "timestamp_ntz": "timestamp", //snowflake 221 | "timestamp without time zone": "timestamp", 222 | "timestamp with time zone": "timestamp", 223 | "binary": "blob", 224 | "varbinary": "blob", 225 | "tinyblob": "blob", 226 | "blob": "blob", 227 | "mediumblob": "blob", 228 | "longblob": "blob", 229 | "bytea": "blob", 230 | "variant": "blob", // snowflake 231 | "object": "json", // snowflake 232 | "json": "json", 233 | "jsonb": "json", 234 | "inet": "varchar", 235 | "cidr": "varchar", 236 | "macaddr": "varchar", 237 | "array": "json", 238 | "xml": "varchar", 239 | "int4range": "varchar", 240 | "varchar": "varchar", 241 | "tinyint": "tinyint", 242 | "char": "varchar", 243 | "tinytext": "varchar", 244 | "mediumtext": "varchar", 245 | "longtext": "varchar", 246 | "character varying": "varchar", 247 | "text": "varchar", 248 | "character": "varchar", 249 | "enum": "varchar", 250 | "set": "varchar", 251 | "time without time zone": "varchar", 252 | "time": "varchar", 253 | "interval": "varchar", 254 | "uuid": "uuid", 255 | } 256 | -------------------------------------------------------------------------------- /internal/engine/models.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "slices" 8 | "strings" 9 | 10 | "github.com/preendata/sqlparser" 11 | yaml "gopkg.in/yaml.v3" 12 | ) 13 | 14 | type ModelName string 15 | 16 | type Type struct { 17 | Name string `yaml:"name"` 18 | Type string `yaml:"type"` 19 | } 20 | 21 | type Options struct { 22 | AllVarchar *bool `default:"false" yaml:"all_varchar"` 23 | AllowQuotedNulls *bool `default:"true" yaml:"allow_quoted_nulls"` 24 | AutoDetect *bool `default:"true" yaml:"auto_detect"` 25 | AutoTypeCandidates *[]string `default:"-" yaml:"auto_type_candidates"` 26 | Columns *[]Type `default:"-" yaml:"columns"` 27 | Compression *string `default:"auto" yaml:"compression"` 28 | DateFormat *string `default:"-" yaml:"date_format"` 29 | DecimalSeparator *string `default:"." yaml:"decimal_separator"` 30 | Delim *string `default:"," yaml:"delim"` 31 | Escape *string `default:"\"" yaml:"escape"` 32 | FileName *bool `default:"false" yaml:"filename"` 33 | ForceNotNull *[]string `default:"[]" yaml:"force_not_null"` 34 | Header *bool `default:"false" yaml:"header"` 35 | HivePartitioning *bool `default:"false" yaml:"hive_partitioning"` 36 | IgnoreErrors *bool `default:"false" yaml:"ignore_errors"` 37 | MaxLineSize *int64 `default:"2097152" yaml:"max_line_size"` 38 | Names *[]string `default:"-" yaml:"names"` 39 | NewLine *string `default:"-" yaml:"new_line"` 40 | NormalizeNames *bool `default:"false" yaml:"normalize_names"` 41 | NullPadding *bool `default:"false" yaml:"null_padding"` 42 | NullString *[]string `default:"-" yaml:"null_string"` 43 | Parallel *bool `default:"true" yaml:"parallel"` 44 | Quote *string `default:"\"" yaml:"quote"` 45 | SampleSize *int64 `default:"20480" yaml:"sample_size"` 46 | Skip *int64 `default:"0" yaml:"skip"` 47 | TimestampFormat *string `default:"-" yaml:"timestamp_format"` 48 | Types *[]Type `default:"-" yaml:"types"` 49 | UnionByName *bool `default:"false" yaml:"union_by_name"` 50 | } 51 | 52 | type Model struct { 53 | Name ModelName `yaml:"name"` 54 | Type string `yaml:"type"` 55 | Format string `yaml:"format"` 56 | Options Options `yaml:"options"` 57 | Query string `yaml:"query"` 58 | FilePatterns *[]string `yaml:"file_patterns"` 59 | Collection string `yaml:"collection"` 60 | Parsed sqlparser.Statement 61 | DDLString string 62 | Columns map[TableName]map[ColumnName]Column 63 | TableMap TableMap 64 | TableSet TableSet 65 | } 66 | 67 | type ModelConfig struct { 68 | Models []*Model `yaml:"models"` 69 | Env *Env `yaml:"-"` 70 | } 71 | 72 | // Models can be defined in a models.yaml file in the preen config directory. 73 | // Models can also be defined in individual .yaml files in the preen models directory. 74 | 75 | func GetModelConfigs(modelTarget string) (*ModelConfig, error) { 76 | mc := ModelConfig{} 77 | env, err := EnvInit() 78 | if err != nil { 79 | return nil, fmt.Errorf("error initializing environment: %w", err) 80 | } 81 | mc.Env = env 82 | 83 | configFilePath := getYmlorYamlPath(mc.Env.PreenConfigPath, "models") 84 | modelsDir := mc.Env.PreenModelsPath 85 | 86 | // Check if a models.yaml file exists in the config directory. 87 | // If it does, parse it. 88 | if _, err = os.Stat(configFilePath); err == nil { 89 | err = parseModelsYamlFile(configFilePath, &mc) 90 | if err != nil { 91 | return nil, fmt.Errorf("error parsing models.yaml file: %w", err) 92 | } 93 | } 94 | 95 | // Process any .yaml files in the models directory 96 | err = parseModelDirectoryFiles(modelsDir, modelTarget, &mc) 97 | if err != nil { 98 | return nil, fmt.Errorf("error parsing models directory: %w", err) 99 | } 100 | 101 | // If no models are detected, return an error 102 | if len(mc.Models) == 0 { 103 | return nil, fmt.Errorf( 104 | "no models detected in %s/models.yaml file or %s directory", 105 | mc.Env.PreenConfigPath, mc.Env.PreenModelsPath, 106 | ) 107 | } 108 | 109 | // Override config with environment variables 110 | fromEnv(&mc) 111 | if err = parseModels(&mc); err != nil { 112 | return nil, fmt.Errorf("error parsing models: %w", err) 113 | } 114 | 115 | if err = ParseModelTables(&mc); err != nil { 116 | return nil, fmt.Errorf("error parsing model tables: %w", err) 117 | } 118 | 119 | return &mc, nil 120 | } 121 | 122 | // This is the main entry point for building models. The CLI commands call this function. 123 | func BuildModels(sc *SourceConfig, mc *ModelConfig) error { 124 | if err := BuildMetadata(sc, mc); err != nil { 125 | return fmt.Errorf("error building information schema: %w", err) 126 | } 127 | 128 | if err := removeUnusedModels(sc, mc); err != nil { 129 | return fmt.Errorf("error removing unused models: %w", err) 130 | } 131 | 132 | columnMetadata, err := BuildColumnMetadata() 133 | if err != nil { 134 | return fmt.Errorf("error building column metadata: %w", err) 135 | } 136 | 137 | if err = ParseModelColumns(mc, columnMetadata); err != nil { 138 | return fmt.Errorf("error parsing model columns: %w", err) 139 | } 140 | 141 | if err = buildDuckDBTables(mc); err != nil { 142 | return fmt.Errorf("error building model tables: %w", err) 143 | } 144 | 145 | Info(fmt.Sprintf("Fetching data from %d configured sources", len(sc.Sources))) 146 | if err = Retrieve(sc, mc); err != nil { 147 | return fmt.Errorf("error retrieving data: %w", err) 148 | } 149 | 150 | return nil 151 | } 152 | 153 | // Parse the models.yaml file in the preen config directory. This file can contain multiple models. 154 | // It is optional, but if it exists, it will be parsed. 155 | func parseModelsYamlFile(filePath string, mc *ModelConfig) error { 156 | file, err := os.ReadFile(filePath) 157 | if err != nil { 158 | return fmt.Errorf("failed to read model file: %w", err) 159 | } 160 | 161 | if err = yaml.Unmarshal(file, &mc); err != nil { 162 | return fmt.Errorf("failed to parse model file: %w", err) 163 | } 164 | 165 | return nil 166 | } 167 | 168 | // Parse the models directory which is supplied as a possible environment value. 169 | // The modelTarget is the user input prefix of any model files that should be used. 170 | // Each .yaml file in this directory is a model. 171 | func parseModelDirectoryFiles(modelsDir string, modelTarget string, mc *ModelConfig) error { 172 | _, err := os.ReadDir(modelsDir) 173 | if err != nil { 174 | return fmt.Errorf("failed to read models directory: %w", err) 175 | } 176 | 177 | err = filepath.WalkDir(modelsDir, func(path string, d os.DirEntry, err error) error { 178 | if err != nil { 179 | return fmt.Errorf("error walking directory: %w", err) 180 | } 181 | 182 | if d.IsDir() { 183 | return nil 184 | } 185 | if (strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml")) && 186 | (modelTarget == "" || strings.HasPrefix(path, filepath.Join(modelsDir, modelTarget))) { 187 | 188 | file, err := os.ReadFile(path) 189 | if err != nil { 190 | return fmt.Errorf("error reading model file %s: %w", path, err) 191 | } 192 | m := Model{} 193 | err = yaml.Unmarshal(file, &m) 194 | if err != nil { 195 | return fmt.Errorf("error parsing model file %s: %w", path, err) 196 | } 197 | if m.Name != "" { 198 | mc.Models = append(mc.Models, &m) 199 | } else { 200 | Warn(fmt.Sprintf("Unrecognized model file %s: no model name detected", path)) 201 | } 202 | } 203 | return nil 204 | }) 205 | if err != nil { 206 | return fmt.Errorf("error parsing model directory: %w", err) 207 | } 208 | return nil 209 | } 210 | 211 | // Parse the models and create a parsed version of the model's required fields. 212 | // This is where the SQL models are parsed into ASTs. 213 | // This is where the file models are validated. 214 | func parseModels(mc *ModelConfig) error { 215 | for modelName, model := range mc.Models { 216 | switch model.Type { 217 | case "database": 218 | // Database models require a query 219 | if model.Query == "" { 220 | return fmt.Errorf("error parsing database model %v: query required", modelName) 221 | } 222 | // If the query is a SELECT statement, parse it 223 | if strings.HasPrefix(strings.ToLower(model.Query), "select") { 224 | stmt, err := sqlparser.Parse(model.Query) 225 | if err != nil { 226 | return fmt.Errorf("error parsing sql model %v: %w", modelName, err) 227 | } 228 | model.Parsed = stmt 229 | mc.Models[modelName] = model 230 | // If the query is not a SELECT statement, set the parsed statement to nil 231 | } else { 232 | model.Parsed = nil 233 | mc.Models[modelName] = model 234 | } 235 | case "file": 236 | if model.FilePatterns == nil { 237 | return fmt.Errorf("error parsing file model %v: file_pattern required", modelName) 238 | } 239 | } 240 | } 241 | return nil 242 | } 243 | 244 | // Create each model's destination table in DuckDB 245 | func buildDuckDBTables(mc *ModelConfig) error { 246 | for _, model := range mc.Models { 247 | switch model.Type { 248 | case "database": 249 | Debug(fmt.Sprintf("Creating table %s", model.Name)) 250 | tableName := strings.ReplaceAll(string(model.Name), "-", "_") 251 | createTableStmt := fmt.Sprintf("create or replace table main.%s (%s);", tableName, model.DDLString) 252 | if err := ddbExec(createTableStmt); err != nil { 253 | return fmt.Errorf("error creating table %s: %w", tableName, err) 254 | } 255 | case "file": 256 | Debug("Tables for file models will be created on model retrieval") 257 | } 258 | } 259 | return nil 260 | } 261 | 262 | // If a model file is referenced in a source, but no model file exists, return an error. 263 | func errorOnMissingModels(sc *SourceConfig, mc *ModelConfig) error { 264 | missingModels := make([]string, 0) 265 | for _, source := range sc.Sources { 266 | for _, modelName := range source.Models { 267 | modelFound := false 268 | for _, model := range mc.Models { 269 | if model.Name == ModelName(modelName) { 270 | modelFound = true 271 | break 272 | } 273 | } 274 | if !modelFound && !slices.Contains(missingModels, string(modelName)) { 275 | missingModels = append(missingModels, string(modelName)) 276 | } 277 | } 278 | } 279 | if len(missingModels) > 0 { 280 | return fmt.Errorf("no model file detected for models: %s", strings.Join(missingModels, ", ")) 281 | } 282 | return nil 283 | } 284 | 285 | // Remove unused models from ModelConfig. If a model is not referenced in any source, it is unused. 286 | func removeUnusedModels(sc *SourceConfig, mc *ModelConfig) error { 287 | usedModels := make([]string, 0) 288 | for _, source := range sc.Sources { 289 | for _, modelName := range source.Models { 290 | usedModels = append(usedModels, string(modelName)) 291 | } 292 | } 293 | 294 | for i, model := range mc.Models { 295 | if !slices.Contains(usedModels, string(model.Name)) { 296 | Info(fmt.Sprintf("Removing unused model: %s", model.Name)) 297 | mc.Models = append(mc.Models[:i], mc.Models[i+1:]...) 298 | } 299 | } 300 | 301 | return nil 302 | } 303 | -------------------------------------------------------------------------------- /internal/engine/metadata.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "database/sql/driver" 6 | "fmt" 7 | "slices" 8 | "strings" 9 | 10 | "github.com/aws/aws-sdk-go-v2/config" 11 | "github.com/aws/aws-sdk-go-v2/service/s3" 12 | "golang.org/x/sync/errgroup" 13 | ) 14 | 15 | // BuildMetadata builds any required metadata for the sources in the sources.yaml config. 16 | // Postgres and MySQL sources require an information schema to be built. 17 | // S3 sources require duckDB secrets to be stored. 18 | func BuildMetadata(sc *SourceConfig, mc *ModelConfig) error { 19 | // Ensure info schema table exists 20 | if err := prepareDDBInformationSchema(); err != nil { 21 | return err 22 | } 23 | 24 | // Reuse the insert function to insert data to the information schema 25 | ic := make(chan []driver.Value, 10) 26 | dc := make(chan []int64) 27 | 28 | go Insert("preen_information_schema", ic, dc) 29 | 30 | // Group sources by engine to distribute across specific engine handlers 31 | preenSourcesByEngine := groupSourceByEngine(sc) 32 | 33 | sourceErrGroup := new(errgroup.Group) 34 | 35 | for engine, sources := range preenSourcesByEngine { 36 | sourceErrGroup.Go(func() error { 37 | switch engine { 38 | case "postgres": 39 | if err := buildPostgresInformationSchema(sources, ic, mc); err != nil { 40 | return fmt.Errorf("error building postgres information schema: %w", err) 41 | } 42 | case "mysql": 43 | if err := buildMySQLInformationSchema(sources, ic, mc); err != nil { 44 | return fmt.Errorf("error building mysql information schema: %w", err) 45 | } 46 | case "snowflake": 47 | if err := buildSnowflakeInformationSchema(sources, ic, mc); err != nil { 48 | return fmt.Errorf("error building snowflake information schema: %w", err) 49 | } 50 | case "mongodb": 51 | Debug("No information schema required for MongoDB") 52 | case "s3": 53 | if len(sources) > 1 { 54 | return fmt.Errorf("only one s3 source is supported") 55 | } 56 | if err := buildS3Secrets(sources[0]); err != nil { 57 | return fmt.Errorf("error configuring s3 access: %w", err) 58 | } 59 | if err := confirmS3Connection(sources[0]); err != nil { 60 | return fmt.Errorf("error confirming s3 objects: %w", err) 61 | } 62 | default: 63 | return fmt.Errorf("unsupported engine: %s", engine) 64 | } 65 | 66 | return nil 67 | }) 68 | } 69 | 70 | if err := sourceErrGroup.Wait(); err != nil { 71 | return err 72 | } 73 | ic <- []driver.Value{"quit"} 74 | ConfirmInsert("preen_information_schema", dc, 0) 75 | Info("Metadata build completed successfully") 76 | 77 | return nil 78 | } 79 | 80 | // buildS3Secrets builds the secrets for all s3 sources in the config 81 | // This is required to access the S3 bucket, https://duckdb.org/docs/extensions/httpfs/s3api.html 82 | func buildS3Secrets(s Source) error { 83 | query := fmt.Sprintf(` 84 | install aws; 85 | load aws; 86 | create or replace persistent secret aws_s3 ( 87 | type S3, 88 | region '%s', 89 | provider CREDENTIAL_CHAIN 90 | ) 91 | `, s.Connection.Region) 92 | if err := ddbExec(query); err != nil { 93 | return err 94 | } 95 | return nil 96 | } 97 | 98 | // confirmS3Connection confirms that the S3 connection is working, 99 | // and that at least one object is present inside the bucket. 100 | func confirmS3Connection(s Source) error { 101 | ctx := context.Background() 102 | cfg, err := config.LoadDefaultConfig( 103 | ctx, 104 | config.WithRegion(s.Connection.Region), 105 | ) 106 | if err != nil { 107 | return fmt.Errorf("error loading default config: %w", err) 108 | } 109 | 110 | s3Client := s3.NewFromConfig(cfg) 111 | input := &s3.ListObjectsV2Input{ 112 | Bucket: &s.Connection.BucketName, 113 | } 114 | 115 | result, err := s3Client.ListObjectsV2(ctx, input) 116 | if err != nil { 117 | return fmt.Errorf("unable to list items in bucket %q: %w", s.Connection.BucketName, err) 118 | } 119 | if len(result.Contents) == 0 { 120 | return fmt.Errorf("no objects found in bucket %q", s.Connection.BucketName) 121 | } else { 122 | Debug(fmt.Sprintf("Found %d objects in bucket %q", len(result.Contents), s.Connection.BucketName)) 123 | } 124 | return nil 125 | } 126 | 127 | // buildMySQLInformationSchema builds the information schema for all mysql sources in the config 128 | func buildMySQLInformationSchema(sources []Source, ic chan<- []driver.Value, mc *ModelConfig) error { 129 | schemaErrGroup := new(errgroup.Group) 130 | 131 | for _, source := range sources { 132 | err := func(source Source) error { 133 | schemaErrGroup.Go(func() error { 134 | // Open new pool for every source 135 | pool, err := GetMysqlPoolFromSource(source) 136 | if err != nil { 137 | return err 138 | } 139 | 140 | defer pool.Close() 141 | 142 | // Iterate over all models and get the tables for each model 143 | for _, model := range mc.Models { 144 | if model.Type == "database" && model.Parsed != nil && slices.Contains(source.Models, string(model.Name)) { 145 | tablesQueryString := "" 146 | for _, tableName := range model.TableSet { 147 | if tablesQueryString != "" { 148 | tablesQueryString += fmt.Sprintf(",'%s'", tableName) 149 | } else { 150 | tablesQueryString += fmt.Sprintf("'%s'", tableName) 151 | } 152 | } 153 | 154 | // MySQL does not have schemas, so we use the database name 155 | schema := source.Connection.Database 156 | 157 | query := fmt.Sprintf(` 158 | select table_name, column_name, data_type from information_schema.columns 159 | where table_schema = '%s' and table_name in (%s); 160 | `, schema, tablesQueryString) 161 | 162 | rows, err := pool.Query(query) 163 | if err != nil { 164 | return err 165 | } 166 | 167 | defer rows.Close() 168 | 169 | for rows.Next() { 170 | var table_name string 171 | var column_name string 172 | var data_type string 173 | err = rows.Scan(&table_name, &column_name, &data_type) 174 | 175 | if err != nil { 176 | return err 177 | } 178 | ic <- []driver.Value{source.Name, string(model.Name), table_name, column_name, data_type} 179 | } 180 | } 181 | } 182 | return nil 183 | }) 184 | return nil 185 | }(source) 186 | if err != nil { 187 | return err 188 | } 189 | } 190 | if err := schemaErrGroup.Wait(); err != nil { 191 | return err 192 | } 193 | 194 | return nil 195 | } 196 | 197 | // buildSnowflakeInformationSchema builds the information schema for all snowflake sources in the config 198 | func buildSnowflakeInformationSchema(sources []Source, ic chan<- []driver.Value, mc *ModelConfig) error { 199 | schemaErrGroup := new(errgroup.Group) 200 | 201 | for _, source := range sources { 202 | schemaErrGroup.Go(func() error { 203 | pool, err := getSnowflakePoolFromSource(source) 204 | if err != nil { 205 | return err 206 | } 207 | defer pool.Close() 208 | schema := "'PUBLIC'" 209 | 210 | for _, model := range mc.Models { 211 | if model.Type == "database" && model.Parsed != nil && slices.Contains(source.Models, string(model.Name)) { 212 | tablesQueryString := "" 213 | for _, tableName := range model.TableSet { 214 | if tablesQueryString != "" { 215 | tablesQueryString += fmt.Sprintf(",'%s'", tableName) 216 | } else { 217 | tablesQueryString += fmt.Sprintf("'%s'", tableName) 218 | } 219 | } 220 | 221 | query := fmt.Sprintf(` 222 | select table_name, column_name, data_type from %s.information_schema.columns 223 | where TABLE_SCHEMA = upper(%s) and table_name = upper(%s); 224 | `, source.Connection.Database, schema, tablesQueryString) 225 | rows, err := pool.Query(query) 226 | if err != nil { 227 | return err 228 | } 229 | 230 | defer rows.Close() 231 | 232 | for rows.Next() { 233 | var table_name string 234 | var column_name string 235 | var data_type string 236 | err = rows.Scan(&table_name, &column_name, &data_type) 237 | 238 | if err != nil { 239 | return err 240 | } 241 | ic <- []driver.Value{source.Name, string(model.Name), table_name, column_name, data_type} 242 | } 243 | } 244 | } 245 | return nil 246 | }) 247 | } 248 | if err := schemaErrGroup.Wait(); err != nil { 249 | return err 250 | } 251 | 252 | return nil 253 | } 254 | 255 | // buildPostgresInformationSchema builds the information schema for all postgres sources in the config 256 | func buildPostgresInformationSchema(sources []Source, ic chan<- []driver.Value, mc *ModelConfig) error { 257 | schemaErrGroup := new(errgroup.Group) 258 | 259 | for _, source := range sources { 260 | err := func(source Source) error { 261 | schemaErrGroup.Go(func() error { 262 | // Open new pool for every source 263 | pool, err := getPostgresPoolFromSource(source) 264 | if err != nil { 265 | return err 266 | } 267 | 268 | defer pool.Close() 269 | schema := "public" 270 | 271 | // Iterate over all models and get the tables for each model 272 | for _, model := range mc.Models { 273 | if model.Type == "database" && model.Parsed != nil && slices.Contains(source.Models, string(model.Name)) { 274 | tablesQueryString := "" 275 | for _, tableName := range model.TableSet { 276 | if tablesQueryString != "" { 277 | tablesQueryString += fmt.Sprintf(",'%s'", tableName) 278 | } else { 279 | tablesQueryString += fmt.Sprintf("'%s'", tableName) 280 | } 281 | } 282 | 283 | query := fmt.Sprintf(` 284 | select table_name, column_name, data_type from information_schema.columns 285 | where table_schema = '%s' and table_name in (%s); 286 | `, schema, tablesQueryString) 287 | 288 | rows, err := pool.Query(context.Background(), query) 289 | if err != nil { 290 | return fmt.Errorf("error querying postgres information schema: %w", err) 291 | } 292 | 293 | defer rows.Close() 294 | 295 | for rows.Next() { 296 | values, err := rows.Values() 297 | if err != nil { 298 | return err 299 | } 300 | ic <- []driver.Value{source.Name, string(model.Name), values[0], values[1], values[2]} 301 | } 302 | } 303 | } 304 | return nil 305 | }) 306 | return nil 307 | }(source) 308 | if err != nil { 309 | return err 310 | } 311 | } 312 | if err := schemaErrGroup.Wait(); err != nil { 313 | return err 314 | } 315 | 316 | return nil 317 | } 318 | 319 | // groupSourceByEngine reduces the raw config.Sources into a map of engine -> sources 320 | func groupSourceByEngine(sc *SourceConfig) map[string][]Source { 321 | engines := make(map[string][]Source) 322 | for _, source := range sc.Sources { 323 | engines[source.Engine] = append(engines[source.Engine], source) 324 | } 325 | 326 | return engines 327 | } 328 | 329 | // prepareDDBInformationSchema creates the table for the information schema in duckDB 330 | func prepareDDBInformationSchema() error { 331 | informationSchemaColumnNames := []string{"source_name varchar", "model_name varchar", "table_name varchar", "column_name varchar", "data_type varchar"} 332 | informationSchemaTableName := "main.preen_information_schema" 333 | Debug(fmt.Sprintf("Creating table %s", informationSchemaTableName)) 334 | err := ddbExec(fmt.Sprintf("create or replace table %s (%s)", informationSchemaTableName, strings.Join(informationSchemaColumnNames, ", "))) 335 | if err != nil { 336 | return err 337 | } 338 | 339 | return nil 340 | } 341 | -------------------------------------------------------------------------------- /internal/engine/columns.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "strings" 7 | 8 | "github.com/preendata/sqlparser" 9 | ) 10 | 11 | type FuncName string 12 | 13 | type Column struct { 14 | TableName *TableName 15 | ModelName ModelName 16 | FuncName FuncName 17 | IsJoin bool 18 | Position int 19 | Alias string 20 | } 21 | 22 | type columnParser struct { 23 | columns map[TableName]map[ColumnName]Column 24 | ddlString string 25 | tableName TableName 26 | modelName ModelName 27 | selectIdx int 28 | columnMetadata ColumnMetadata 29 | } 30 | 31 | type TableName string 32 | type ColumnName string 33 | type MajorityType string 34 | type ColumnType struct { 35 | // Types is a slice of every data type found for a column from its sources 36 | Types []string `json:"types"` 37 | MajorityType MajorityType `json:"majority_type"` 38 | } 39 | 40 | type ColumnMetadata map[TableName]map[ColumnName]ColumnType 41 | 42 | // BuildColumnMetadata does 2 things: 43 | // 1) Acts as the interface between information schema data stored in DuckDB and the parts of the application that will 44 | // need to consume that data, in particular the model builder 45 | // 2) Performs type validation against each column pulled from the source databases, via the Boyer-Moore majority voting 46 | // algorithm. This majority type is then packaged into the ColumnMetadata and return to the caller. This is important 47 | // for typing the model tables created in DuckDB 48 | func BuildColumnMetadata() (ColumnMetadata, error) { 49 | // query data from preen_information_schema 50 | results, err := Execute("SELECT column_name, data_type, table_name FROM preen_information_schema") 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | columnMetadata := buildColumnMetadataDataStructure(&results.Rows) 56 | // For each column in each table as sourced from InformationSchema, determine the majority type 57 | for tableName, tableStruct := range columnMetadata { 58 | for columnName, columnStruct := range tableStruct { 59 | majorityType, err := identifyMajorityType(columnName, columnStruct.Types) 60 | if err != nil { 61 | return nil, err 62 | } 63 | columnMetadata[tableName][columnName] = ColumnType{ 64 | Types: columnStruct.Types, 65 | MajorityType: majorityType, 66 | } 67 | } 68 | } 69 | 70 | return columnMetadata, nil 71 | } 72 | 73 | // Rearranges the result set from the information schema to make it easier to process for the majority type calculator 74 | func buildColumnMetadataDataStructure(rows *[]map[string]any) ColumnMetadata { 75 | columnMetadata := make(ColumnMetadata) 76 | 77 | for _, row := range *rows { 78 | 79 | // Runtime panic waiting to happen. This depends on the information schema being built correctly and only with 80 | // type string 81 | tableName := TableName(row["table_name"].(string)) 82 | columnName := ColumnName(row["column_name"].(string)) 83 | dataType := (row["data_type"].(string)) 84 | // Create table map if not exists 85 | _, exists := columnMetadata[tableName] 86 | if !exists { 87 | columnMetadata[tableName] = make(map[ColumnName]ColumnType) 88 | } 89 | 90 | // Create column map if not exists 91 | _, exists = columnMetadata[tableName][columnName] 92 | if !exists { 93 | columnMetadata[tableName][columnName] = ColumnType{ 94 | Types: make([]string, 0), 95 | } 96 | } 97 | 98 | // Append data type to column map 99 | localTypesCopy := append(columnMetadata[tableName][columnName].Types, dataType) 100 | columnMetadata[tableName][columnName] = ColumnType{ 101 | Types: localTypesCopy, 102 | } 103 | 104 | } 105 | 106 | return columnMetadata 107 | } 108 | 109 | // Select majority type of input column via Boyer-Moore majority vote algorithm 110 | func identifyMajorityType(columnName ColumnName, types []string) (MajorityType, error) { 111 | // Implement Boyer-Moore majority vote algorithm 112 | var majority MajorityType 113 | votes := 0 114 | 115 | for _, candidate := range types { 116 | mtCandidate := MajorityType(candidate) 117 | if votes == 0 { 118 | majority = mtCandidate 119 | } 120 | if mtCandidate == majority { 121 | votes++ 122 | } else { 123 | votes-- 124 | } 125 | } 126 | 127 | count := 0 128 | 129 | // Checking if majority candidate occurs more than n/2 times 130 | for _, candidate := range types { 131 | if MajorityType(candidate) == majority { 132 | count += 1 133 | } 134 | } 135 | if majority == "" { 136 | Warn( 137 | fmt.Sprintf("Column: '%s' is missing from majority of tables!", columnName), 138 | ) 139 | } else if count > len(types)/2 && count == len(types) { 140 | Debug( 141 | fmt.Sprintf("Data type for column '%s' is: %s", columnName, majority), 142 | ) 143 | return majority, nil 144 | 145 | } else if count > len(types)/2 && count != len(types) { 146 | Warn( 147 | fmt.Sprintf("Discrepancy in data types for column '%s'! Using majority data type of %s", columnName, majority), 148 | ) 149 | return majority, nil 150 | } 151 | 152 | Warn( 153 | fmt.Sprintf("No majority data type found for column '%s'!", columnName), 154 | ) 155 | // This needs to be made unreachable 156 | return "unknown", fmt.Errorf("no majority data type found for column '%s'", columnName) 157 | } 158 | 159 | func ParseModelColumns(mc *ModelConfig, columnMetadata ColumnMetadata) error { 160 | cp := columnParser{ 161 | columns: make(map[TableName]map[ColumnName]Column), 162 | columnMetadata: columnMetadata, 163 | } 164 | for _, model := range mc.Models { 165 | switch model.Type { 166 | case "database": 167 | if model.Parsed == nil { 168 | if err := parseNoSQLDatabaseModelColumns(model, &cp); err != nil { 169 | return fmt.Errorf("error parsing noSQL database model columns: %w", err) 170 | } 171 | } else { 172 | if err := parseSQLDatabaseModelColumns(model, &cp); err != nil { 173 | return fmt.Errorf("error parsing SQL database model columns: %w", err) 174 | } 175 | } 176 | case "file": 177 | Debug("no columns to parse for file model") 178 | default: 179 | return fmt.Errorf("model type %s not supported", model.Type) 180 | } 181 | model.Columns = cp.columns 182 | model.DDLString = cp.ddlString 183 | } 184 | 185 | return nil 186 | } 187 | 188 | func parseSQLDatabaseModelColumns(model *Model, cp *columnParser) error { 189 | cp.ddlString = "preen_source_name varchar" 190 | selectStmt := model.Parsed.(*sqlparser.Select) 191 | for selectIdx := range selectStmt.SelectExprs { 192 | cp.selectIdx = selectIdx 193 | switch expr := selectStmt.SelectExprs[selectIdx].(type) { 194 | case *sqlparser.AliasedExpr: 195 | switch expr.Expr.(type) { 196 | // Process normal column. 197 | case *sqlparser.ColName: 198 | tableAlias := expr.Expr.(*sqlparser.ColName).Qualifier.Name.String() 199 | cp.tableName = model.TableMap[TableAlias(tableAlias)] 200 | if err := processModelColumn(expr, cp); err != nil { 201 | return err 202 | } 203 | // Process function expression column. 204 | case *sqlparser.FuncExpr: 205 | cp.tableName = "model_generated" 206 | if err := processFunction(expr, cp); err != nil { 207 | return err 208 | } 209 | // Process case expression column 210 | case *sqlparser.CaseExpr: 211 | cp.tableName = "model_generated" 212 | if err := processCase(expr, cp); err != nil { 213 | return err 214 | } 215 | // Process cast expression column 216 | case *sqlparser.ConvertExpr: 217 | tableAlias := expr.Expr.(*sqlparser.ConvertExpr).Expr.(*sqlparser.ColName).Qualifier.Name.String() 218 | cp.tableName = model.TableMap[TableAlias(tableAlias)] 219 | if err := processConvertColumn(expr, cp); err != nil { 220 | return err 221 | } 222 | } 223 | case *sqlparser.StarExpr: 224 | return errors.New("star expressions are not supported. please specify columns explicitly") 225 | } 226 | } 227 | return nil 228 | } 229 | 230 | func parseNoSQLDatabaseModelColumns(model *Model, cp *columnParser) error { 231 | cp.modelName = ModelName(model.Name) 232 | cp.tableName = TableName(model.Name) 233 | cp.ddlString = "preen_source_name varchar, document json" 234 | cp.columns[cp.tableName] = make(map[ColumnName]Column) 235 | sourceColumn := Column{ 236 | ModelName: model.Name, 237 | TableName: &cp.tableName, 238 | IsJoin: false, 239 | Position: 0, 240 | Alias: "preen_source_name", 241 | } 242 | sourceColumnHashKey := ColumnName(fmt.Sprintf("%s.preen_source_name", model.Name)) 243 | cp.columns[cp.tableName][sourceColumnHashKey] = sourceColumn 244 | documentColumn := Column{ 245 | ModelName: model.Name, 246 | TableName: &cp.tableName, 247 | IsJoin: false, 248 | Position: 1, 249 | Alias: "document", 250 | } 251 | documentColumnHashKey := ColumnName(fmt.Sprintf("%s.document", model.Name)) 252 | cp.columns[cp.tableName][documentColumnHashKey] = documentColumn 253 | 254 | return nil 255 | } 256 | 257 | func processModelColumn(expr *sqlparser.AliasedExpr, cp *columnParser) error { 258 | // We require fully qualified column names, i.e. table.column, users.user_id. 259 | if expr.Expr.(*sqlparser.ColName).Qualifier.Name.String() == "" { 260 | return errors.New("column names must be fully qualified, e.g. table.column") 261 | } 262 | if _, ok := cp.columns[cp.tableName]; !ok { 263 | cp.columns[cp.tableName] = make(map[ColumnName]Column) 264 | } 265 | 266 | col := Column{ 267 | TableName: &cp.tableName, 268 | Position: cp.selectIdx, 269 | } 270 | if expr.As.String() != "" { 271 | col.Alias = expr.As.String() 272 | } else { 273 | col.Alias = expr.Expr.(*sqlparser.ColName).Name.String() 274 | } 275 | colName := expr.Expr.(*sqlparser.ColName).Name.String() 276 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, colName) 277 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col 278 | 279 | // Check to see if the table and column exists in the columnMetadata structure 280 | // If it does not exist, then we return an error since we are unable to determine 281 | // the appropriate data type. 282 | if _, ok := cp.columnMetadata[TableName(cp.tableName)][ColumnName(colName)]; !ok { 283 | return fmt.Errorf("column not found in table: %s.%s. check that your model query is valid", cp.tableName, colName) 284 | } 285 | 286 | // Look up the data type and append it to the table creation DDL string. 287 | // ToLower is necessary because Snowflake is an upper case-aholic 288 | colType := duckdbTypeMap[strings.ToLower(string(cp.columnMetadata[TableName(cp.tableName)][ColumnName(colName)].MajorityType))] 289 | if colType == "" { 290 | return fmt.Errorf("data type not found for column: %s.%s", cp.tableName, colName) 291 | } 292 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, colType) 293 | 294 | return nil 295 | } 296 | 297 | func processFunction(expr *sqlparser.AliasedExpr, cp *columnParser) error { 298 | funcExpr := expr.Expr.(*sqlparser.FuncExpr) 299 | if _, ok := cp.columns[cp.tableName]; !ok { 300 | cp.columns[cp.tableName] = make(map[ColumnName]Column) 301 | } 302 | col := Column{ 303 | TableName: &cp.tableName, 304 | FuncName: FuncName(funcExpr.Name.String()), 305 | Position: cp.selectIdx, 306 | } 307 | if expr.As.String() != "" { 308 | col.Alias = expr.As.String() 309 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias) 310 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col 311 | } else { 312 | col.Alias = fmt.Sprintf("\"%s\"", sqlparser.String(expr)) 313 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias) 314 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col 315 | } 316 | 317 | switch col.FuncName { 318 | // Count always returns an integer type 319 | case "count": 320 | cp.ddlString = fmt.Sprintf("%s, %s bigint", cp.ddlString, col.Alias) 321 | // Average always returns a double 322 | case "avg": 323 | cp.ddlString = fmt.Sprintf("%s, %s double", cp.ddlString, col.Alias) 324 | // Look up the data type of the column inside the function and use that data type 325 | default: 326 | selectExpr := funcExpr.Exprs[0].(*sqlparser.AliasedExpr).Expr 327 | colName := selectExpr.(*sqlparser.ColName).Name.String() 328 | tableName := TableName(selectExpr.(*sqlparser.ColName).Qualifier.Name.String()) 329 | if _, ok := cp.columnMetadata[tableName][ColumnName(colName)]; !ok { 330 | return fmt.Errorf("column not found in table: %s.%s. check that your model query is valid", cp.tableName, colName) 331 | } 332 | colType := duckdbTypeMap[string(cp.columnMetadata[tableName][ColumnName(colName)].MajorityType)] 333 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, colType) 334 | } 335 | 336 | return nil 337 | } 338 | 339 | func processCase(expr *sqlparser.AliasedExpr, cp *columnParser) error { 340 | if _, ok := cp.columns[cp.tableName]; !ok { 341 | cp.columns[cp.tableName] = make(map[ColumnName]Column) 342 | } 343 | col := Column{ 344 | TableName: &cp.tableName, 345 | Position: cp.selectIdx, 346 | } 347 | 348 | if expr.As.String() != "" { 349 | col.Alias = expr.As.String() 350 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias) 351 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col 352 | } else { 353 | col.Alias = fmt.Sprintf("\"%s\"", sqlparser.String(expr)) 354 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias) 355 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col 356 | } 357 | 358 | colType := new(string) 359 | whens := expr.Expr.(*sqlparser.CaseExpr).Whens 360 | // Check the first when clause to determine the data type of the column 361 | // If any of the when clauses have a different data type, then the SQL engine 362 | // will throw an error for us. 363 | switch expr := whens[0].Val.(type) { 364 | case sqlparser.BoolVal: 365 | *colType = "boolean" 366 | case *sqlparser.SQLVal: 367 | switch expr.Type { 368 | case sqlparser.IntVal: 369 | *colType = "bigint" 370 | case sqlparser.StrVal: 371 | *colType = "varchar" 372 | case sqlparser.FloatVal: 373 | *colType = "double" 374 | default: 375 | return errors.New("unsupported data type in case expression") 376 | } 377 | default: 378 | return errors.New("unsupported data type in case expression") 379 | } 380 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, *colType) 381 | 382 | return nil 383 | } 384 | 385 | func processConvertColumn(expr *sqlparser.AliasedExpr, cp *columnParser) error { 386 | convertExpr := expr.Expr.(*sqlparser.ConvertExpr) 387 | if _, ok := cp.columns[cp.tableName]; !ok { 388 | cp.columns[cp.tableName] = make(map[ColumnName]Column) 389 | } 390 | col := Column{ 391 | TableName: &cp.tableName, 392 | Position: cp.selectIdx, 393 | } 394 | if expr.As.String() != "" { 395 | col.Alias = expr.As.String() 396 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias) 397 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col 398 | } else { 399 | col.Alias = fmt.Sprintf("\"%s\"", sqlparser.String(expr)) 400 | colHashKey := fmt.Sprintf("%s.%s", cp.tableName, col.Alias) 401 | cp.columns[cp.tableName][ColumnName(colHashKey)] = col 402 | } 403 | colType := convertExpr.Type.Type 404 | cp.ddlString = fmt.Sprintf("%s, %s %s", cp.ddlString, col.Alias, colType) 405 | 406 | return nil 407 | } 408 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= 2 | filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= 3 | github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 h1:/vQbFIOMbk2FiG/kXiLl8BRyzTWDw7gX/Hz7Dd5eDMs= 4 | github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4/go.mod h1:hN7oaIRCjzsZ2dE+yG5k+rsdt3qcwykqK6HVGcKwsw4= 5 | github.com/99designs/keyring v1.2.2 h1:pZd3neh/EmUzWONb35LxQfvuY7kiSXAq3HQd97+XBn0= 6 | github.com/99designs/keyring v1.2.2/go.mod h1:wes/FrByc8j7lFOAGLGSNEg8f/PaI3cgTBqhFkHUrPk= 7 | github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0 h1:g0EZJwz7xkXQiZAI5xi9f3WWFYBlX1CPTrR+NDToRkQ= 8 | github.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0/go.mod h1:XCW7KnZet0Opnr7HccfUw1PLc4CjHqpcaxW8DHklNkQ= 9 | github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0 h1:B/dfvscEQtew9dVuoxqxrUKKv8Ih2f55PydknDamU+g= 10 | github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0/go.mod h1:fiPSssYvltE08HJchL04dOy+RD4hgrjph0cwGGMntdI= 11 | github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY= 12 | github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY= 13 | github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.6.0 h1:PiSrjRPpkQNjrM8H0WwKMnZUdu1RGMtd/LdGKUrOo+c= 14 | github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.6.0/go.mod h1:oDrbWx4ewMylP7xHivfgixbfGBT6APAwsSoHRKotnIc= 15 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.0 h1:UXT0o77lXQrikd1kgwIPQOUect7EoR/+sbP4wQKdzxM= 16 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.0/go.mod h1:cTvi54pg19DoT07ekoeMgE/taAwNtCShVeZqA+Iv2xI= 17 | github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2 h1:kYRSnvJju5gYVyhkij+RTJ/VR6QIUaCfWeaFm2ycsjQ= 18 | github.com/AzureAD/microsoft-authentication-library-for-go v1.3.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= 19 | github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0= 20 | github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= 21 | github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= 22 | github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= 23 | github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= 24 | github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= 25 | github.com/apache/arrow-go/v18 v18.1.0 h1:agLwJUiVuwXZdwPYVrlITfx7bndULJ/dggbnLFgDp/Y= 26 | github.com/apache/arrow-go/v18 v18.1.0/go.mod h1:tigU/sIgKNXaesf5d7Y95jBBKS5KsxTqYBKXFsvKzo0= 27 | github.com/apache/arrow/go/v16 v16.1.0 h1:dwgfOya6s03CzH9JrjCBx6bkVb4yPD4ma3haj9p7FXI= 28 | github.com/apache/arrow/go/v16 v16.1.0/go.mod h1:9wnc9mn6vEDTRIm4+27pEjQpRKuTvBaessPoEXQzxWA= 29 | github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= 30 | github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= 31 | github.com/aws/aws-sdk-go-v2 v1.34.0 h1:9iyL+cjifckRGEVpRKZP3eIxVlL06Qk1Tk13vreaVQU= 32 | github.com/aws/aws-sdk-go-v2 v1.34.0/go.mod h1:JgstGg0JjWU1KpVJjD5H0y0yyAIpSdKEq556EI6yOOM= 33 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.8 h1:zAxi9p3wsZMIaVCdoiQp2uZ9k1LsZvmAnoTBeZPXom0= 34 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.8/go.mod h1:3XkePX5dSaxveLAYY7nsbsZZrKxCyEuE5pM4ziFxyGg= 35 | github.com/aws/aws-sdk-go-v2/config v1.29.2 h1:JuIxOEPcSKpMB0J+khMjznG9LIhIBdmqNiEcPclnwqc= 36 | github.com/aws/aws-sdk-go-v2/config v1.29.2/go.mod h1:HktTHregOZwNSM/e7WTfVSu9RCX+3eOv+6ij27PtaYs= 37 | github.com/aws/aws-sdk-go-v2/credentials v1.17.55 h1:CDhKnDEaGkLA5ZszV/qw5uwN5M8rbv9Cl0JRN+PRsaM= 38 | github.com/aws/aws-sdk-go-v2/credentials v1.17.55/go.mod h1:kPD/vj+RB5MREDUky376+zdnjZpR+WgdBBvwrmnlmKE= 39 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.25 h1:kU7tmXNaJ07LsyN3BUgGqAmVmQtq0w6duVIHAKfp0/w= 40 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.25/go.mod h1:OiC8+OiqrURb1wrwmr/UbOVLFSWEGxjinj5C299VQdo= 41 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.55 h1:yfz56qEKO2MqTV0m81KtZS7swlP335FMrmoC1GBgU5k= 42 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.55/go.mod h1:O/fEJxrOLSCbhA8jL1xHwo8gzbN/iNcT+y7aq7c6KHE= 43 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.29 h1:Ej0Rf3GMv50Qh4G4852j2djtoDb7AzQ7MuQeFHa3D70= 44 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.29/go.mod h1:oeNTC7PwJNoM5AznVr23wxhLnuJv0ZDe5v7w0wqIs9M= 45 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.29 h1:6e8a71X+9GfghragVevC5bZqvATtc3mAMgxpSNbgzF0= 46 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.29/go.mod h1:c4jkZiQ+BWpNqq7VtrxjwISrLrt/VvPq3XiopkUIolI= 47 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 h1:Pg9URiobXy85kgFev3og2CuOZ8JZUBENF+dcgWBaYNk= 48 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= 49 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.29 h1:g9OUETuxA8i/Www5Cby0R3WSTe7ppFTZXHVLNskNS4w= 50 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.29/go.mod h1:CQk+koLR1QeY1+vm7lqNfFii07DEderKq6T3F1L2pyc= 51 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 h1:D4oz8/CzT9bAEYtVhSBmFj2dNOtaHOtMKc2vHBwYizA= 52 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2/go.mod h1:Za3IHqTQ+yNcRHxu1OFucBh0ACZT4j4VQFF0BqpZcLY= 53 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.3 h1:EP1ITDgYVPM2dL1bBBntJ7AW5yTjuWGz9XO+CZwpALU= 54 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.3/go.mod h1:5lWNWeAgWenJ/BZ/CP9k9DjLbC0pjnM045WjXRPPi14= 55 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.10 h1:hN4yJBGswmFTOVYqmbz1GBs9ZMtQe8SrYxPwrkrlRv8= 56 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.10/go.mod h1:TsxON4fEZXyrKY+D+3d2gSTyJkGORexIYab9PTf56DA= 57 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.10 h1:fXoWC2gi7tdJYNTPnnlSGzEVwewUchOi8xVq/dkg8Qs= 58 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.10/go.mod h1:cvzBApD5dVazHU8C2rbBQzzzsKc8m5+wNJ9mCRZLKPc= 59 | github.com/aws/aws-sdk-go-v2/service/s3 v1.75.0 h1:UPQJDyqUXICUt60X4PwbiEf+2QQ4VfXUhDk8OEiGtik= 60 | github.com/aws/aws-sdk-go-v2/service/s3 v1.75.0/go.mod h1:hHnELVnIHltd8EOF3YzahVX6F6y2C6dNqpRj1IMkS5I= 61 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.12 h1:kznaW4f81mNMlREkU9w3jUuJvU5g/KsqDV43ab7Rp6s= 62 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.12/go.mod h1:bZy9r8e0/s0P7BSDHgMLXK2KvdyRRBIQ2blKlvLt0IU= 63 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.11 h1:mUwIpAvILeKFnRx4h1dEgGEFGuV8KJ3pEScZWVFYuZA= 64 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.11/go.mod h1:JDJtD+b8HNVv71axz8+S5492KM8wTzHRFpMKQbPlYxw= 65 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.10 h1:g9d+TOsu3ac7SgmY2dUf1qMgu/uJVTlQ4VCbH6hRxSw= 66 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.10/go.mod h1:WZfNmntu92HO44MVZAubQaz3qCuIdeOdog2sADfU6hU= 67 | github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ= 68 | github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= 69 | github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= 70 | github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= 71 | github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= 72 | github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= 73 | github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= 74 | github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= 75 | github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0= 76 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= 77 | github.com/danieljoos/wincred v1.2.2 h1:774zMFJrqaeYCK2W57BgAem/MLi6mtSE47MB6BOJ0i0= 78 | github.com/danieljoos/wincred v1.2.2/go.mod h1:w7w4Utbrz8lqeMbDAK0lkNJUv5sAOkFi7nd/ogr0Uh8= 79 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 80 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 81 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 82 | github.com/dvsekhvalnov/jose2go v1.8.0 h1:LqkkVKAlHFfH9LOEl5fe4p/zL02OhWE7pCufMBG2jLA= 83 | github.com/dvsekhvalnov/jose2go v1.8.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU= 84 | github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= 85 | github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= 86 | github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= 87 | github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= 88 | github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= 89 | github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= 90 | github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 h1:ZpnhV/YsD2/4cESfV5+Hoeu/iUR3ruzNvZ+yQfO03a0= 91 | github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= 92 | github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= 93 | github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= 94 | github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= 95 | github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 96 | github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o= 97 | github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= 98 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 99 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 100 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 101 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 102 | github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c h1:6rhixN/i8ZofjG1Y75iExal34USq5p+wiN1tpie8IrU= 103 | github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c/go.mod h1:NMPJylDgVpX0MLRlPy15sqSwOFv/U1GZ2m21JhFfek0= 104 | github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= 105 | github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= 106 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= 107 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= 108 | github.com/jackc/pgx/v5 v5.7.2 h1:mLoDLV6sonKlvjIEsV56SkWNCnuNv531l94GaIzO+XI= 109 | github.com/jackc/pgx/v5 v5.7.2/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ= 110 | github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= 111 | github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= 112 | github.com/jedib0t/go-pretty/v6 v6.6.5 h1:9PgMJOVBedpgYLI56jQRJYqngxYAAzfEUua+3NgSqAo= 113 | github.com/jedib0t/go-pretty/v6 v6.6.5/go.mod h1:Uq/HrbhuFty5WSVNfjpQQe47x16RwVGXIveNGEyGtHs= 114 | github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= 115 | github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= 116 | github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= 117 | github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= 118 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= 119 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= 120 | github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= 121 | github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= 122 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 123 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 124 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 125 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 126 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 127 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 128 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 129 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 130 | github.com/marcboeker/go-duckdb v1.8.3 h1:ZkYwiIZhbYsT6MmJsZ3UPTHrTZccDdM4ztoqSlEMXiQ= 131 | github.com/marcboeker/go-duckdb v1.8.3/go.mod h1:C9bYRE1dPYb1hhfu/SSomm78B0FXmNgRvv6YBW/Hooc= 132 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= 133 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 134 | github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= 135 | github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= 136 | github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= 137 | github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= 138 | github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= 139 | github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= 140 | github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE= 141 | github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= 142 | github.com/mtibben/percent v0.2.1 h1:5gssi8Nqo8QU/r2pynCm+hBQHpkB/uNK7BJCFogWdzs= 143 | github.com/mtibben/percent v0.2.1/go.mod h1:KG9uO+SZkUp+VkRHsCdYQV3XSZrrSpR3O9ibNBTZrns= 144 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 145 | github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= 146 | github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= 147 | github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= 148 | github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= 149 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 150 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 151 | github.com/preendata/sqlparser v0.0.1 h1:b6rQhOPudlKhTjfWiW51mPFNa9S6en0cnOiKPharJzs= 152 | github.com/preendata/sqlparser v0.0.1/go.mod h1:34zYH6Q7NIW6XT4buKfY4S3QalQwgeeehwHW7DD9Se8= 153 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 154 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 155 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 156 | github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= 157 | github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= 158 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 159 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 160 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 161 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 162 | github.com/snowflakedb/gosnowflake v1.13.0 h1:NQoy4mnHUmBuruJhzAGVRO9YLpFxayYTCLf+dxvG7bk= 163 | github.com/snowflakedb/gosnowflake v1.13.0/go.mod h1:nwiPNHaS3EGxnW1rr10ascVYFLA4EKrqMX2TxPt0+N4= 164 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 165 | github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= 166 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 167 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 168 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 169 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 170 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 171 | github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= 172 | github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= 173 | github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= 174 | github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= 175 | github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= 176 | github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= 177 | github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= 178 | github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= 179 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= 180 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= 181 | github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 h1:zzrxE1FKn5ryBNl9eKOeqQ58Y/Qpo3Q9QNxKHX5uzzQ= 182 | github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2/go.mod h1:hzfGeIUDq/j97IG+FhNqkowIyEcD88LrW6fyU3K3WqY= 183 | github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= 184 | github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= 185 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 186 | github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= 187 | github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= 188 | github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= 189 | github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= 190 | go.mongodb.org/mongo-driver v1.17.2 h1:gvZyk8352qSfzyZ2UMWcpDpMSGEr1eqE4T793SqyhzM= 191 | go.mongodb.org/mongo-driver v1.17.2/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= 192 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 193 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 194 | golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc= 195 | golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= 196 | golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc= 197 | golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= 198 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 199 | golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= 200 | golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= 201 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 202 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 203 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 204 | golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= 205 | golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= 206 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 207 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 208 | golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= 209 | golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 210 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 211 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 212 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 213 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 214 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 215 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 216 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 217 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 218 | golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= 219 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 220 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 221 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 222 | golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= 223 | golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= 224 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 225 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 226 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 227 | golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= 228 | golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= 229 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= 230 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 231 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 232 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 233 | golang.org/x/tools v0.29.0 h1:Xx0h3TtM9rzQpQuR4dKLrdglAmCEN5Oi+P74JdhdzXE= 234 | golang.org/x/tools v0.29.0/go.mod h1:KMQVMRsVxU6nHCFXrBPhDB8XncLNLM0lIy/F14RP588= 235 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 236 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= 237 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= 238 | gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= 239 | gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= 240 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 241 | gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 242 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 243 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 244 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 245 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 246 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 247 | --------------------------------------------------------------------------------