├── examples
├── PeakHoursAnalysis.sql
├── raw_sql_file.sql
├── require_non_md.md
├── hf.md
├── ducklake.md
├── hf.ipynb
├── s3.md
├── actions.md
├── pg.md
├── s3.ipynb
└── http.cs.md
├── .gitignore
├── .github
└── workflows
│ ├── docker-publish.yml
│ ├── linux.yaml
│ ├── release.yml
│ ├── build.windows.yml
│ ├── macos.yaml
│ ├── docker.yml
│ ├── windows.cc.yaml
│ └── go.yml
├── internal
├── env
│ └── env.go
├── db
│ ├── db_interface.go
│ ├── odbc.go
│ └── duckdb.go
└── etlx
│ ├── ipynb_md.go
│ ├── http.go
│ ├── compress.go
│ ├── run_logs.go
│ ├── sftp.go
│ ├── build_query.go
│ ├── ftp.go
│ ├── aws.go
│ ├── mail.go
│ ├── ducklake.go
│ ├── action_db2db.go
│ ├── load_requirements.go
│ ├── run_notify.go
│ ├── run_multiples_queries.go
│ └── run_scripts.go
├── LICENSE
├── debian.Dockerfile
├── alpine.Dockerfile
├── debian.slim.Dockerfile
├── etlx.go
├── ubuntu.Dockerfile
├── Dockerfile
├── go.mod
└── cmd
└── main.go
/examples/PeakHoursAnalysis.sql:
--------------------------------------------------------------------------------
1 | -- Peak Hours Analysis
2 | SELECT EXTRACT(hour FROM tpep_pickup_datetime) AS hour_of_day,
3 | COUNT(*) AS total_trips,
4 | ROUND(AVG(total_amount), 2) AS avg_fare,
5 | ROUND(AVG(trip_distance), 2) AS avg_distance,
6 | ROUND(AVG(EXTRACT(EPOCH FROM (tpep_dropoff_datetime - tpep_pickup_datetime)) / 60.0), 2) AS avg_duration_minutes
7 | FROM DB."NYC_TAXI"
8 | WHERE tpep_dropoff_datetime > tpep_pickup_datetime
9 | GROUP BY hour_of_day
10 | ORDER BY hour_of_day
--------------------------------------------------------------------------------
/examples/raw_sql_file.sql:
--------------------------------------------------------------------------------
1 | -- Create a table
2 | CREATE TABLE employees (
3 | id INT PRIMARY KEY,
4 | name VARCHAR(100),
5 | position VARCHAR(50),
6 | salary DECIMAL(10, 2)
7 | );
8 |
9 | -- Insert data
10 | INSERT INTO employees (id, name, position, salary) VALUES
11 | (1, 'Alice Smith', 'Developer', 75000.00),
12 | (2, 'Bob Johnson', 'Manager', 90000.00);
13 |
14 | -- Select data
15 | SELECT * FROM employees;
16 |
17 | -- Update data
18 | UPDATE employees SET salary = 80000.00 WHERE id = 1;
19 |
20 | -- Delete data
21 | DELETE FROM employees WHERE id = 2;
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### AL ###
2 | #Template for AL projects for Dynamics 365 Business Central
3 | #launch.json folder
4 | .vscode/
5 | #Cache folder
6 | .alcache/
7 | #Symbols folder
8 | .alpackages/
9 | #Snapshots folder
10 | .snapshots/
11 | #Testing Output folder
12 | .output/
13 | #Extension App-file
14 | *.app
15 | #Rapid Application Development File
16 | rad.json
17 | #Translation Base-file
18 | *.g.xlf
19 | #License-file
20 | *.flf
21 | #Test results file
22 | TestResults.xml
23 | *.env
24 | *.exe
25 | *.db
26 | *.db-journal
27 | *.duckdb
28 | *.ddb
29 | database/
30 | tmp/
31 | bin/
32 | .air.toml
33 | logs.*.json
34 | *.xlsx
35 | *.csv
36 | *.parquet
37 | *.ddb
38 | *.zip
39 | *.gz
40 | *nyc*
--------------------------------------------------------------------------------
/.github/workflows/docker-publish.yml:
--------------------------------------------------------------------------------
1 | name: Build and Push Docker image to Docker Hub
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | build-and-push:
8 | runs-on: ubuntu-latest
9 |
10 | steps:
11 | - name: Checkout code
12 | uses: actions/checkout@v4
13 |
14 | - name: Log in to Docker Hub
15 | uses: docker/login-action@v3
16 | with:
17 | username: ${{ secrets.DOCKERHUB_USERNAME }}
18 | password: ${{ secrets.DOCKERHUB_TOKEN }}
19 |
20 | - name: Build Docker image
21 | run: docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/etlx:latest .
22 |
23 | - name: Push Docker image
24 | run: docker push ${{ secrets.DOCKERHUB_USERNAME }}/etlx:latest
25 |
--------------------------------------------------------------------------------
/internal/env/env.go:
--------------------------------------------------------------------------------
1 | package env
2 |
3 | import (
4 | "os"
5 | "strconv"
6 | )
7 |
8 | func GetString(key, defaultValue string) string {
9 | value, exists := os.LookupEnv(key)
10 | if !exists {
11 | return defaultValue
12 | }
13 |
14 | return value
15 | }
16 |
17 | func GetInt(key string, defaultValue int) int {
18 | value, exists := os.LookupEnv(key)
19 | if !exists {
20 | return defaultValue
21 | }
22 |
23 | intValue, err := strconv.Atoi(value)
24 | if err != nil {
25 | panic(err)
26 | }
27 |
28 | return intValue
29 | }
30 |
31 | func GetBool(key string, defaultValue bool) bool {
32 | value, exists := os.LookupEnv(key)
33 | if !exists {
34 | return defaultValue
35 | }
36 |
37 | boolValue, err := strconv.ParseBool(value)
38 | if err != nil {
39 | return false
40 | }
41 |
42 | return boolValue
43 | }
44 |
--------------------------------------------------------------------------------
/.github/workflows/linux.yaml:
--------------------------------------------------------------------------------
1 | name: Linux Build
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | build-linux:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Install Dependencies
11 | run: |
12 | sudo apt-get update
13 | sudo apt-get install -y build-essential gcc g++ unixodbc unixodbc-dev
14 | - name: Checkout Code
15 | uses: actions/checkout@v3
16 | - name: Setup Go
17 | uses: actions/setup-go@v4
18 | with:
19 | go-version: '1.23'
20 | - name: Build Linux Binaries
21 | run: |
22 | mkdir -p dist
23 | CGO_ENABLED=1 go build -o dist/etlx-linux-amd64 ./cmd/main.go
24 | - name: Upload Artifacts
25 | uses: actions/upload-artifact@v4
26 | with:
27 | name: linux-binary
28 | path: dist/*linux*
29 |
--------------------------------------------------------------------------------
/examples/require_non_md.md:
--------------------------------------------------------------------------------
1 | # ETL
2 |
3 | ```yaml metadata
4 | name: DB
5 | description: "Example extrating from S3 to a local sqlite3 file"
6 | connection: "duckdb:"
7 | active: true
8 | ```
9 |
10 | ## VERSION
11 |
12 | ```yaml metadata
13 | name: VERSION
14 | description: "DDB Version"
15 | table: VERSION
16 | load_conn: "duckdb:"
17 | load_before_sql: "ATTACH 'database/DB.db' AS DB (TYPE SQLITE)"
18 | load_sql: 'CREATE OR REPLACE TABLE DB."
" AS SELECT version() AS "VERSION";'
19 | load_after_sql: "DETACH DB;"
20 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB.""'
21 | active: true
22 | ```
23 |
24 | # REQUIRES
25 |
26 | ```yaml metadata
27 | name: REQUIRES
28 | description: load dependencies
29 | active: true
30 | ```
31 |
32 | ## RAW_SQL
33 | ```yaml
34 | name: RAW_SQL
35 | description: load raw sql from file
36 | path: examples/raw_sql_file.sql
37 | active: true
38 | ```
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | release:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Download Windows Binary
11 | uses: actions/download-artifact@v3
12 | with:
13 | name: windows-binary
14 | path: dist
15 | - name: Download MacOS Binary
16 | uses: actions/download-artifact@v3
17 | with:
18 | name: macos-binary
19 | path: dist
20 | - name: Download Linux Binary
21 | uses: actions/download-artifact@v3
22 | with:
23 | name: linux-binary
24 | path: dist
25 | - name: Create Release
26 | uses: softprops/action-gh-release@v1
27 | with:
28 | files: |
29 | dist/etlx-windows-amd64.exe
30 | dist/etlx-macos-amd64
31 | dist/etlx-linux-amd64
32 | env:
33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 realdatadriven
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/debian.Dockerfile:
--------------------------------------------------------------------------------
1 | # Use a minimal Debian-based image
2 | FROM debian:bookworm-slim
3 |
4 | # Set the ETLX version and architecture
5 | ARG ETLX_VERSION=v0.2.1
6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems
7 |
8 | # Define the download URL for the zipped release
9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip"
10 |
11 | # Install dependencies (curl for downloading, unzip for extracting)
12 | RUN apt-get update && apt-get install -y \
13 | curl \
14 | unzip \
15 | ca-certificates \
16 | unixodbc \
17 | build-essential \
18 | libc6 \
19 | && rm -rf /var/lib/apt/lists/*
20 |
21 | # Set working directory
22 | WORKDIR /app
23 |
24 | # Download and extract the ETLX binary
25 | RUN curl -L $ETLX_URL -o etlx.zip && \
26 | unzip etlx.zip && \
27 | rm etlx.zip && \
28 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \
29 | chmod +x /usr/local/bin/etlx
30 |
31 | # Allow users to mount a config file
32 | VOLUME ["/app/config"]
33 |
34 | # Set the entrypoint to pass CLI arguments
35 | ENTRYPOINT ["/usr/local/bin/etlx"]
36 |
--------------------------------------------------------------------------------
/alpine.Dockerfile:
--------------------------------------------------------------------------------
1 | # Use a minimal Alpine-based image
2 | FROM alpine:latest
3 |
4 | # Set the ETLX version and architecture
5 | ARG ETLX_VERSION=v0.2.1
6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems
7 |
8 | # Define the download URL for the zipped release
9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip"
10 |
11 | # Install dependencies (curl for downloading, unzip for extracting, libc6 is replaced by musl)
12 | RUN apk update && apk add --no-cache \
13 | curl \
14 | unzip \
15 | ca-certificates \
16 | unixodbc \
17 | libc6-compat \
18 | bash \
19 | && rm -rf /var/cache/apk/*
20 |
21 | # Set working directory
22 | WORKDIR /app
23 |
24 | # Download and extract the ETLX binary
25 | RUN curl -L $ETLX_URL -o etlx.zip && \
26 | unzip etlx.zip && \
27 | rm etlx.zip && \
28 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \
29 | chmod +x /usr/local/bin/etlx
30 |
31 | # Allow users to mount a config file
32 | VOLUME ["/app/config"]
33 |
34 | # Set the entrypoint to pass CLI arguments
35 | ENTRYPOINT ["/usr/local/bin/etlx"]
36 |
--------------------------------------------------------------------------------
/.github/workflows/build.windows.yml:
--------------------------------------------------------------------------------
1 | name: Debug Windows Build
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | build-windows-msvc:
8 | runs-on: windows-latest
9 | steps:
10 | - name: Checkout Code
11 | uses: actions/checkout@v3
12 |
13 | - name: Setup Go
14 | uses: actions/setup-go@v4
15 | with:
16 | go-version: '1.24'
17 |
18 | - name: Setup MSBuild (for Visual Studio environment)
19 | uses: microsoft/setup-msbuild@v2
20 |
21 | - name: Set Environment Variables for CGO
22 | run: |
23 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV
24 | # echo "CC=cl.exe" >> $env:GITHUB_ENV
25 |
26 | - name: Build with MSVC (Visual Studio compiler)
27 | run: |
28 | mkdir dist
29 | go build -o dist/etlx-windows-amd64.exe ./cmd/main.go
30 | # echo "GOARCH=arm64" >> $env:GITHUB_ENV
31 | # go build -o dist/etlx-windows-arm64.exe ./cmd/main.go
32 |
33 | - name: Upload MSVC Artifacts
34 | uses: actions/upload-artifact@v4
35 | with:
36 | name: windows-msvc-binary
37 | path: dist/*windows*
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/debian.slim.Dockerfile:
--------------------------------------------------------------------------------
1 | # Use a minimal Debian-based image
2 | FROM debian:bookworm-slim
3 |
4 | # Set the ETLX version and architecture
5 | ARG ETLX_VERSION=v0.2.1
6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems
7 |
8 | # Define the download URL for the zipped release
9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip"
10 |
11 | # Install dependencies (curl for downloading, unzip for extracting)
12 | RUN apt-get update && apt-get install -y \
13 | curl \
14 | unzip \
15 | ca-certificates \
16 | unixodbc \
17 | build-essential \
18 | libc6 \
19 | && rm -rf /var/lib/apt/lists/*
20 |
21 | # Set working directory
22 | WORKDIR /app
23 |
24 | # Download and extract the ETLX binary
25 | RUN curl -L $ETLX_URL -o etlx.zip && \
26 | unzip etlx.zip && \
27 | rm etlx.zip && \
28 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \
29 | chmod +x /usr/local/bin/etlx
30 |
31 | # Allow users to mount a config file
32 | VOLUME ["/app/config"]
33 |
34 | # Set the entrypoint to pass CLI arguments
35 | ENTRYPOINT ["/usr/local/bin/etlx"]
36 |
37 | # sudo docker build -t etlx:latest .
38 | # sudo docker exec etxl --help
39 |
--------------------------------------------------------------------------------
/etlx.go:
--------------------------------------------------------------------------------
1 | package etlx
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/joho/godotenv"
7 | "github.com/realdatadriven/etlx/internal/db"
8 | etlxlib "github.com/realdatadriven/etlx/internal/etlx"
9 | )
10 |
11 | // Expose the library functions
12 | type ETLX = etlxlib.ETLX
13 |
14 | type DBInterface = db.DBInterface
15 |
16 | type DB = db.DB
17 |
18 | func New(driverName string, dsn string) (*db.DB, error) {
19 | return db.New(driverName, dsn)
20 | }
21 |
22 | type DuckDB = db.DuckDB
23 |
24 | func NewDuckDB(dsn string) (*db.DuckDB, error) {
25 | return db.NewDuckDB(dsn)
26 | }
27 |
28 | type ODBC = db.ODBC
29 |
30 | func NewODBC(dsn string) (*db.ODBC, error) {
31 | return db.NewODBC(dsn)
32 | }
33 |
34 | func ReplaceDBName(dsn, dbname string) (string, error) {
35 | return db.ReplaceDBName(dsn, dbname)
36 | }
37 |
38 | type DuckLakeParseResult = etlxlib.DuckLakeParseResult
39 | type DuckLakeOccurrence = etlxlib.DuckLakeOccurrence
40 | type DuckLakeParser = etlxlib.DuckLakeParser
41 |
42 | func NewDuckLakeParser() *etlxlib.DuckLakeParser {
43 | return etlxlib.NewDuckLakeParser()
44 | }
45 |
46 | func GetDB(conn string) (DBInterface, error) {
47 | /**retuns DBInterface and chooses the driver base on the etlx connection style driver: */
48 | return etlxlib.GetDB(conn)
49 | }
50 |
51 | func LoadDotEnv() {
52 | _err := godotenv.Load()
53 | if _err != nil {
54 | fmt.Println("Error loading .env file")
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/internal/db/db_interface.go:
--------------------------------------------------------------------------------
1 | package db
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | )
7 |
8 | type DBInterface interface {
9 | ExecuteQuery(query string, data ...interface{}) (int, error)
10 | Query2CSV(query string, csv_path string, params ...interface{}) (bool, error)
11 | QueryMultiRows(query string, params ...interface{}) (*[]map[string]interface{}, bool, error)
12 | ExecuteQueryRowsAffected(query string, data ...interface{}) (int64, error)
13 | QuerySingleRow(query string, params ...interface{}) (*map[string]interface{}, bool, error)
14 | QueryRows(ctx context.Context, query string, params ...interface{}) (*sql.Rows, error)
15 | QueryMultiRowsWithCols(query string, params ...interface{}) (*[]map[string]interface{}, []string, bool, error)
16 | AllTables(params map[string]interface{}, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error)
17 | TableSchema(params map[string]interface{}, table string, dbName string, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error)
18 | ExecuteNamedQuery(query string, data map[string]interface{}) (int, error)
19 | ExecuteQueryPGInsertWithLastInsertId(query string, data ...interface{}) (int, error)
20 | GetUserByNameOrEmail(email string) (map[string]interface{}, bool, error)
21 | GetDriverName() string
22 | Close() error
23 | IsEmpty(value interface{}) bool
24 | FromParams(params map[string]interface{}, extra_conf map[string]interface{}) (*DB, string, string, error)
25 | Ping() error
26 | }
27 |
--------------------------------------------------------------------------------
/ubuntu.Dockerfile:
--------------------------------------------------------------------------------
1 | # Use a minimal Ubuntu-based image
2 | FROM ubuntu:latest
3 |
4 | # Set the ETLX version and architecture
5 | ARG ETLX_VERSION=v0.2.2
6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems
7 |
8 | # Define the download URL for the zipped release
9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip"
10 | #ENV ETLX_URL="https://github.com/realdatadriven/etlx/actions/runs/13442643061/artifacts/2625838115"
11 | # Install dependencies (curl for downloading, unzip for extracting, and necessary libraries)
12 | RUN apt-get update && apt-get install -y \
13 | curl \
14 | unzip \
15 | ca-certificates \
16 | unixodbc \
17 | build-essential \
18 | libc6 \
19 | wget \
20 | && rm -rf /var/lib/apt/lists/*
21 |
22 | # Install a newer version of glibc (careful with this in production)
23 | #RUN wget http://ftp.gnu.org/gnu/libc/glibc-2.38.tar.gz && \
24 | # tar -xvzf glibc-2.38.tar.gz && \
25 | # cd glibc-2.38 && \
26 | # mkdir build && \
27 | # cd build && \
28 | # ../configure && \
29 | # make -j$(nproc) && \
30 | # make install
31 |
32 | # Set working directory
33 | WORKDIR /app
34 |
35 | # Download and extract the ETLX binary
36 | RUN curl -L $ETLX_URL -o etlx.zip && \
37 | unzip etlx.zip && \
38 | rm etlx.zip && \
39 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \
40 | chmod +x /usr/local/bin/etlx
41 |
42 | # Allow users to mount a config file
43 | VOLUME ["/app/config"]
44 |
45 | # Set the entrypoint to pass CLI arguments
46 | ENTRYPOINT ["/usr/local/bin/etlx"]
47 |
--------------------------------------------------------------------------------
/.github/workflows/macos.yaml:
--------------------------------------------------------------------------------
1 | name: MacOS Build with CGO and UnixODBC
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | build-macos:
8 | runs-on: macos-latest
9 | steps:
10 | # Step 1: Checkout the code
11 | - name: Checkout Code
12 | uses: actions/checkout@v3
13 |
14 | # Step 2: Setup Go
15 | - name: Setup Go
16 | uses: actions/setup-go@v4
17 | with:
18 | go-version: '1.23'
19 |
20 | # Step 3: Install UnixODBC
21 | - name: Install UnixODBC
22 | run: |
23 | brew install unixodbc
24 | brew --prefix unixodbc
25 |
26 | # Step 4: Set Environment Variables
27 | - name: Set Environment Variables
28 | run: |
29 | ODBC_PREFIX=$(brew --prefix unixodbc)
30 | echo "CGO_ENABLED=1" >> $GITHUB_ENV
31 | echo "CGO_CFLAGS=-I$ODBC_PREFIX/include" >> $GITHUB_ENV
32 | echo "CGO_LDFLAGS=-L$ODBC_PREFIX/lib -lodbc" >> $GITHUB_ENV
33 |
34 | # Step 5: Build the Application
35 | - name: Build MacOS Binary
36 | run: |
37 | mkdir dist
38 | go build -o dist/etlx-macos-amd64 ./cmd/main.go
39 |
40 | # Step 6: Upload Build Logs for Debugging (if Build Fails)
41 | - name: Upload Logs
42 | if: failure()
43 | uses: actions/upload-artifact@v3
44 | with:
45 | name: build-logs
46 | path: ${{ github.workspace }}
47 |
48 | # Step 7: Upload Built Binary for Testing (if Successful)
49 | - name: Upload Binary
50 | if: success()
51 | uses: actions/upload-artifact@v3
52 | with:
53 | name: macos-binary
54 | path: dist/etlx-macos-amd64
55 |
--------------------------------------------------------------------------------
/internal/etlx/ipynb_md.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "os"
7 | "strings"
8 | )
9 |
10 | // Notebook represents the structure of a Jupyter notebook.
11 | type Notebook struct {
12 | Cells []Cell `json:"cells"`
13 | }
14 |
15 | // Cell represents a single cell in the notebook.
16 | type Cell struct {
17 | CellType string `json:"cell_type"`
18 | Source []string `json:"source"`
19 | }
20 |
21 | // ConvertIPYNBToMarkdown converts the content of a .ipynb file to Markdown text.
22 | func (etlx *ETLX) ConvertIPYNBToMarkdown(ipynbContent []byte) (string, error) {
23 | // Parse the .ipynb content
24 | var notebook Notebook
25 | if err := json.Unmarshal(ipynbContent, ¬ebook); err != nil {
26 | return "", fmt.Errorf("error parsing JSON: %w", err)
27 | }
28 | // Build the Markdown output
29 | var mdBuilder strings.Builder
30 | for _, cell := range notebook.Cells {
31 | // Skip empty cells
32 | if len(cell.Source) == 0 {
33 | continue
34 | }
35 | switch cell.CellType {
36 | case "markdown":
37 | // Add Markdown content directly
38 | for _, line := range cell.Source {
39 | mdBuilder.WriteString(line)
40 | }
41 | mdBuilder.WriteString("\n\n") // Add spacing between cells
42 | case "code":
43 | // Wrap code content in a Markdown code block
44 | mdBuilder.WriteString("```\n")
45 | for _, line := range cell.Source {
46 | mdBuilder.WriteString(line)
47 | }
48 | mdBuilder.WriteString("```\n\n")
49 | }
50 | }
51 | if os.Getenv("ETLX_DEBUG_QUERY") == "true" {
52 | _, err := etlx.TempFIle("", mdBuilder.String(), "ipymd2md.*.md")
53 | if err != nil {
54 | fmt.Println(err)
55 | }
56 | //fmt.Println(_file)
57 | }
58 | return mdBuilder.String(), nil
59 | }
60 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # ============================================
2 | # 🛠️ Stage 1: Build etlx from Source
3 | # ============================================
4 | FROM golang:1.24 AS builder
5 |
6 | WORKDIR /app
7 |
8 | # Install build deps if needed
9 | RUN apt-get update && apt-get install -y \
10 | build-essential \
11 | gcc \
12 | g++ \
13 | unixodbc \
14 | unixodbc-dev \
15 | && rm -rf /var/lib/apt/lists/*
16 |
17 | ENV CGO_ENABLED=1
18 |
19 | # Clone etlx repository
20 | RUN git clone --depth=1 https://github.com/realdatadriven/etlx.git .
21 |
22 | # Build etlx binary
23 | RUN go build -o etlx ./cmd
24 |
25 | # ============================================
26 | # 🚀 Stage 2: Runtime Image
27 | # ============================================
28 | FROM ubuntu:24.04
29 |
30 | RUN apt-get update && apt-get install -y \
31 | ca-certificates \
32 | unixodbc \
33 | && rm -rf /var/lib/apt/lists/*
34 |
35 | WORKDIR /app
36 |
37 | # Copy compiled binary
38 | COPY --from=builder /app/etlx /usr/local/bin/etlx
39 |
40 | # Ensure binary is executable
41 | RUN chmod +x /usr/local/bin/etlx
42 |
43 | # Volume mounts (db/config/env handled externally)
44 | VOLUME ["/app/database"]
45 |
46 | # Entry script for env/config handling
47 | RUN echo '#!/bin/bash\n\
48 | set -e\n\
49 | \n\
50 | # Load env if mounted\n\
51 | if [ -f "/app/.env" ]; then\n\
52 | echo "Loading environment variables from /app/.env"\n\
53 | set -a\n\
54 | source /app/.env\n\
55 | set +a\n\
56 | fi\n\
57 | \n\
58 | # If first arg is empty, show help\n\
59 | if [ $# -eq 0 ]; then\n\
60 | echo "Usage: docker run etlx [command] [args]"\n\
61 | echo "Run \\"docker run etlx help\\" for full CLI usage."\n\
62 | exit 0\n\
63 | fi\n\
64 | \n\
65 | echo "Executing: etlx $@"\n\
66 | exec /usr/local/bin/etlx "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
67 |
68 | ENTRYPOINT ["/entrypoint.sh"]
69 | CMD []
70 |
71 | # ============================================
72 | # 📝 Usage Instructions
73 | #docker build --no-cache -t etlx:latest .
74 | #docker run -v ./.env:/app/.env:ro -v ./config.md:/app/config.md:ro -v ./database:/app/database etlx:latest --config /app/config.md
75 | #podman tag etlx:latest docker.io/realdatadriven/etlx:latest
76 | #podman tag etlx:latest docker.io/realdatadriven/etlx:v1.4.7
77 | #podman login docker.io
78 | #podman push docker.io/realdatadriven/etlx:latest
79 | #podman push docker.io/realdatadriven/etlx:v1.4.7
--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
1 | name: v2Build and Push Docker image to Docker Hub
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | deploy:
8 | runs-on: ubuntu-latest
9 |
10 | steps:
11 | - name: Checkout code
12 | uses: actions/checkout@v4
13 |
14 | - name: Extract Version
15 | id: version_step
16 | run: |
17 | if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
18 | VERSION="${GITHUB_REF#refs/tags/v}"
19 | else
20 | VERSION="${GITHUB_REF##*/}" # fallback to branch name
21 | fi
22 | VERSION_TAG="${{ github.repository }}:$VERSION"
23 | LATEST_TAG="${{ github.repository }}:latest"
24 |
25 | echo "version=$VERSION" >> "$GITHUB_OUTPUT"
26 | echo "version_tag=${VERSION_TAG,,}" >> "$GITHUB_OUTPUT"
27 | echo "latest_tag=${LATEST_TAG,,}" >> "$GITHUB_OUTPUT"
28 |
29 | - name: Debug Version Info
30 | run: |
31 | echo "version: ${{ steps.version_step.outputs.version }}"
32 | echo "version_tag: ${{ steps.version_step.outputs.version_tag }}"
33 | echo "latest_tag: ${{ steps.version_step.outputs.latest_tag }}"
34 |
35 |
36 |
37 |
38 | - name: Set up QEMU
39 | uses: docker/setup-qemu-action@v3
40 |
41 | - name: Set up Docker Buildx
42 | uses: docker/setup-buildx-action@v3
43 |
44 | - name: Login to Docker Hub
45 | uses: docker/login-action@v3
46 | with:
47 | username: ${{ secrets.DOCKERHUB_USERNAME }}
48 | password: ${{ secrets.DOCKERHUB_TOKEN }}
49 |
50 | # - name: Prepare registry tags
51 | # run: |
52 | # echo "VERSION_TAG=$(echo ${{ steps.version_step.outputs.version_tag }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
53 | # echo "LATEST_TAG=$(echo ${{ steps.version_step.outputs.latest_tag }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
54 |
55 | - name: Prepare registry tags
56 | run: |
57 | echo "VERSION_TAG=${{ steps.version_step.outputs.version_tag }}" >> $GITHUB_ENV
58 | echo "LATEST_TAG=${{ steps.version_step.outputs.latest_tag }}" >> $GITHUB_ENV
59 |
60 | - name: Build and push Docker image
61 | uses: docker/build-push-action@v5
62 | with:
63 | context: .
64 | push: true
65 | tags: |
66 | ${{ env.VERSION_TAG }}
67 | ${{ env.LATEST_TAG }}
68 | build-args: |
69 | VERSION=${{ steps.version_step.outputs.version }}
70 |
71 |
--------------------------------------------------------------------------------
/internal/etlx/http.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "io"
7 | "net/http"
8 | "os"
9 | "strings"
10 | )
11 |
12 | // HTTPAction executes HTTP uploads and downloads based on the mode and params
13 | func (etlx *ETLX) HTTPAction(mode string, params map[string]any) error {
14 | url, _ := params["url"].(string)
15 | method := "GET"
16 | if m, ok := params["method"].(string); ok {
17 | method = strings.ToUpper(m)
18 | }
19 | headers, _ := params["headers"].(map[string]any)
20 | contentType, _ := params["content_type"].(string)
21 | // bodyParams, _ := params["body"].(map[string]any)
22 | source, _ := params["source"].(string)
23 | target, _ := params["target"].(string)
24 | if url == "" {
25 | return fmt.Errorf("missing 'url' parameter")
26 | }
27 | client := &http.Client{}
28 | switch mode {
29 | case "download":
30 | req, err := http.NewRequest(method, url, nil)
31 | if err != nil {
32 | return fmt.Errorf("creating request failed: %w", err)
33 | }
34 | for k, v := range headers {
35 | req.Header.Set(k, fmt.Sprintf("%v", v))
36 | }
37 | resp, err := client.Do(req)
38 | if err != nil {
39 | return fmt.Errorf("HTTP request failed: %w", err)
40 | }
41 | defer resp.Body.Close()
42 | if resp.StatusCode >= 300 {
43 | return fmt.Errorf("HTTP request returned status: %s", resp.Status)
44 | }
45 | outFile, err := os.Create(target)
46 | if err != nil {
47 | return fmt.Errorf("creating output file failed: %w", err)
48 | }
49 | defer outFile.Close()
50 | _, err = io.Copy(outFile, resp.Body)
51 | if err != nil {
52 | return fmt.Errorf("saving response failed: %w", err)
53 | }
54 | case "upload":
55 | file, err := os.Open(source)
56 | if err != nil {
57 | return fmt.Errorf("opening source file failed: %w", err)
58 | }
59 | defer file.Close()
60 | body := &bytes.Buffer{}
61 | _, err = io.Copy(body, file)
62 | if err != nil {
63 | return fmt.Errorf("copying file to body failed: %w", err)
64 | }
65 | req, err := http.NewRequest(method, url, body)
66 | if err != nil {
67 | return fmt.Errorf("creating HTTP request failed: %w", err)
68 | }
69 | if contentType != "" {
70 | req.Header.Set("Content-Type", contentType)
71 | }
72 | for k, v := range headers {
73 | req.Header.Set(k, fmt.Sprintf("%v", v))
74 | }
75 | resp, err := client.Do(req)
76 | if err != nil {
77 | return fmt.Errorf("HTTP upload failed: %w", err)
78 | }
79 | defer resp.Body.Close()
80 | if resp.StatusCode >= 300 {
81 | return fmt.Errorf("upload returned status: %s", resp.Status)
82 | }
83 | default:
84 | return fmt.Errorf("unsupported http action: %s", mode)
85 | }
86 | return nil
87 | }
88 |
--------------------------------------------------------------------------------
/internal/etlx/compress.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "archive/zip"
5 | "compress/gzip"
6 | "io"
7 | "os"
8 | "path/filepath"
9 | )
10 |
11 | func (etlx *ETLX) CompressToZip(files []string, output string) error {
12 | outFile, err := os.Create(output)
13 | if err != nil {
14 | return err
15 | }
16 | defer outFile.Close()
17 | zipWriter := zip.NewWriter(outFile)
18 | defer zipWriter.Close()
19 | for _, file := range files {
20 | inFile, err := os.Open(file)
21 | if err != nil {
22 | return err
23 | }
24 | defer inFile.Close()
25 |
26 | w, err := zipWriter.Create(filepath.Base(file))
27 | if err != nil {
28 | return err
29 | }
30 | _, err = io.Copy(w, inFile)
31 | if err != nil {
32 | return err
33 | }
34 | }
35 | return nil
36 | }
37 |
38 | func (etlx *ETLX) CompressToGZ(input string, output string) error {
39 | inFile, err := os.Open(input)
40 | if err != nil {
41 | return err
42 | }
43 | defer inFile.Close()
44 | outFile, err := os.Create(output)
45 | if err != nil {
46 | return err
47 | }
48 | defer outFile.Close()
49 | gzWriter := gzip.NewWriter(outFile)
50 | defer gzWriter.Close()
51 | _, err = io.Copy(gzWriter, inFile)
52 | return err
53 | }
54 |
55 | // Unzip a .zip archive to a specified directory
56 | func (etlx *ETLX) Unzip(zipPath string, destDir string) error {
57 | r, err := zip.OpenReader(zipPath)
58 | if err != nil {
59 | return err
60 | }
61 | defer r.Close()
62 |
63 | for _, f := range r.File {
64 | outPath := filepath.Join(destDir, f.Name)
65 | if f.FileInfo().IsDir() {
66 | os.MkdirAll(outPath, os.ModePerm)
67 | continue
68 | }
69 |
70 | rc, err := f.Open()
71 | if err != nil {
72 | return err
73 | }
74 | defer rc.Close()
75 |
76 | if err := os.MkdirAll(filepath.Dir(outPath), os.ModePerm); err != nil {
77 | return err
78 | }
79 |
80 | outFile, err := os.Create(outPath)
81 | if err != nil {
82 | return err
83 | }
84 | defer outFile.Close()
85 |
86 | if _, err = io.Copy(outFile, rc); err != nil {
87 | return err
88 | }
89 | }
90 | return nil
91 | }
92 |
93 | // Decompress a GZ file into the original file
94 | func (etlx *ETLX) DecompressGZ(gzPath string, outputPath string) error {
95 | inFile, err := os.Open(gzPath)
96 | if err != nil {
97 | return err
98 | }
99 | defer inFile.Close()
100 |
101 | gzReader, err := gzip.NewReader(inFile)
102 | if err != nil {
103 | return err
104 | }
105 | defer gzReader.Close()
106 |
107 | outFile, err := os.Create(outputPath)
108 | if err != nil {
109 | return err
110 | }
111 | defer outFile.Close()
112 |
113 | _, err = io.Copy(outFile, gzReader)
114 | return err
115 | }
116 |
--------------------------------------------------------------------------------
/examples/hf.md:
--------------------------------------------------------------------------------
1 | # ETL
2 |
3 | The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/overview, "httpfs") extension introduces support for the hf:// protocol to access data sets hosted in [Hugging Face](https://huggingface.co "Hugging Face Homepage") repositories. See the [announcement blog post](https://duckdb.org/2024/05/29/access-150k-plus-datasets-from-hugging-face-with-duckdb.html, "announcement blog post") for details.
4 |
5 | ```yaml metadata
6 | name: HF_EXTRACT
7 | description: "Example extrating from hf to a local sqlite3 file"
8 | connection: "duckdb:"
9 | active: true
10 | ```
11 |
12 | ## HF_EXTRACT
13 |
14 | ```yaml metadata
15 | name: HF_EXTRACT
16 | description: "Example extrating from hf to a local sqlite3 file"
17 | table: HF_EXTRACT
18 | load_conn: "duckdb:"
19 | load_before_sql:
20 | - load_extentions
21 | - attach_db
22 | - create_hf_token
23 | load_sql: load_query
24 | load_after_sql: detach_db
25 | drop_sql: drop_sql
26 | clean_sql: clean_sql
27 | rows_sql: nrows
28 | active: true
29 | ```
30 |
31 | ```sql
32 | -- load_extentions
33 | INSTALL sqlite;
34 | LOAD sqlite;
35 | INSTALL httpfs;
36 | LOAD httpfs;
37 | ```
38 |
39 | ```sql
40 | -- attach_db
41 | ATTACH 'examples/HF_EXTRACT.db' AS "DB" (TYPE SQLITE)
42 | ```
43 |
44 | Configure your Hugging Face Token in the DuckDB Secrets Manager to access private or gated datasets. First, [visit Hugging Face Settings – Tokens](https://huggingface.co/settings/tokens) to obtain your access token. Second, set it in your DuckDB session using [DuckDB’s Secrets Manager](https://duckdb.org/docs/configuration/secrets_manager.html). DuckDB supports two providers for managing secrets:
45 |
46 | ```sql
47 | -- create_hf_token
48 | CREATE SECRET hf_token (
49 | TYPE HUGGINGFACE,
50 | TOKEN '@HF_TOKEN'
51 | );
52 | ```
53 |
54 | ```sql
55 | -- detach_db
56 | DETACH "DB";
57 | ```
58 |
59 | ```sql
60 | -- load_query
61 | CREATE OR REPLACE TABLE "DB"."" AS
62 | SELECT *
63 | FROM 'hf://datasets/datasets-examples/doc-formats-csv-1/data.csv'
64 | LIMIT 10
65 | ```
66 |
67 | ```sql
68 | -- load_query2
69 | CREATE OR REPLACE TABLE "DB"."" AS
70 | SELECT *
71 | FROM 'hf://datasets/horus-ai-labs/WebInstructSub-150K/data/train-00000-of-00001.parquet'
72 | ```
73 |
74 | ```sql
75 | -- drop_sql
76 | DROP TABLE IF EXISTS "DB"."";
77 | ```
78 |
79 | ```sql
80 | -- clean_sql
81 | DELETE FROM "DB"."";
82 | ```
83 |
84 | ```sql
85 | -- nrows
86 | SELECT COUNT(*) AS "nrows" FROM "DB".""
87 | ```
88 |
89 | ```shell
90 | bin/etlx --config examples/hf.md
91 | ```
92 |
93 | # LOGS
94 |
95 | ```yaml metadata
96 | name: LOGS
97 | description: "Example saving logs"
98 | table: _logs
99 | connection: "duckdb:"
100 | before_sql:
101 | - load_extentions
102 | - attach_db
103 | save_log_sql: load_query
104 | after_sql: detach_db
105 | active: true
106 | ```
107 |
108 | ```sql
109 | -- load_extentions
110 | INSTALL Sqlite;
111 | LOAD Sqlite;
112 | INSTALL json;
113 | LOAD json;
114 | ```
115 |
116 | ```sql
117 | -- attach_db
118 | ATTACH 'examples/HF_EXTRACT.db' AS "DB" (TYPE SQLITE)
119 | ```
120 |
121 | ```sql
122 | -- detach_db
123 | DETACH "DB";
124 | ```
125 |
126 | ```sql
127 | -- load_query
128 | CREATE OR REPLACE TABLE "DB"."" AS
129 | SELECT *
130 | FROM read_json('');
131 | ```
132 |
--------------------------------------------------------------------------------
/.github/workflows/windows.cc.yaml:
--------------------------------------------------------------------------------
1 | name: Windows Build with CGO and Cross-Compiler
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | build-windows:
8 | runs-on: windows-latest
9 | steps:
10 | # Step 1: Checkout the code
11 | - name: Checkout Code
12 | uses: actions/checkout@v3
13 |
14 | # Step 2: Set up Go environment
15 | - name: Setup Go
16 | uses: actions/setup-go@v4
17 | with:
18 | go-version: '1.23'
19 |
20 | # Step 3: Download DuckDB Precompiled Library
21 | - name: Download DuckDB Library
22 | run: |
23 | $version = "v1.1.3"
24 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-amd64.zip"
25 | $destinationPath = "$(Get-Location)\duckdb"
26 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip"
27 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath
28 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-arm64.zip"
29 | $destinationPath = "$(Get-Location)\duckdbarm64"
30 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip"
31 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath
32 | Write-Host "DuckDB library extracted to $destinationPath"
33 |
34 | # Step 4: Install MinGW for Cross-Compilation
35 | - name: Install MinGW
36 | run: |
37 | choco install mingw -y
38 | Write-Host "MinGW installed for cross-compilation"
39 |
40 | # Step 5: Set Environment Variables
41 | - name: Set Environment Variables
42 | run: |
43 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV
44 | echo "CC=x86_64-w64-mingw32-gcc" >> $env:GITHUB_ENV
45 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdb\" >> $env:GITHUB_ENV
46 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdb\ -lduckdb" >> $env:GITHUB_ENV
47 |
48 | # Step 6: Verify DuckDB Library
49 | - name: Verify DuckDB Library
50 | run: |
51 | $libPath = "$(Get-Location)\duckdb\"
52 | if (!(Test-Path "$libPath\duckdb.lib")) {
53 | Write-Error "duckdb.lib not found in $libPath"
54 | }
55 | Write-Host "duckdb.lib found in $libPath"
56 |
57 | # Step 7: Build the Application
58 | - name: Build Windows Binary
59 | run: |
60 | mkdir dist
61 | go build -o dist/etlx-windows-amd64.exe ./cmd/main.go
62 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV
63 | echo "CC=x86_64-w64-mingw32-gcc" >> $env:GITHUB_ENV
64 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdbarm64\" >> $env:GITHUB_ENV
65 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdbarm64\ -lduckdb" >> $env:GITHUB_ENV
66 | echo "GOARCH=arm64" >> $env:GITHUB_ENV
67 | go build -o dist/etlx-windows-arm64.exe ./cmd/main.go
68 |
69 | # Step 8: Upload Build Logs for Debugging (if Build Fails)
70 | - name: Upload Logs
71 | if: failure()
72 | uses: actions/upload-artifact@v3
73 | with:
74 | name: build-logs
75 | path: ${{ github.workspace }}
76 |
77 | # Step 9: Upload Built Binary for Testing (if Successful)
78 | - name: Upload Binary
79 | if: success()
80 | uses: actions/upload-artifact@v3
81 | with:
82 | name: windows-binary
83 | path: dist/etlx-windows-amd64.exe
84 |
--------------------------------------------------------------------------------
/examples/ducklake.md:
--------------------------------------------------------------------------------
1 | # GENERATE_SAMPLE_DATA
2 |
3 | ex source [https://www.youtube.com/watch?v=NbnEVFAtx9o&ab_channel=JoeReis](DuckLake w/ Hannes Mühleisen - Practical Data Lunch and Learn)
4 |
5 | ```yaml metadata
6 | name: GENERATE_SAMPLE_DATA
7 | runs_as: SCRIPTS
8 | description: Here we are just going to generate a sample databese for the exercise mimicing a real traditional database
9 | connection: "duckdb:database/sample.duckdb"
10 | active: false
11 | ```
12 |
13 | ## SAMPLE_DB
14 |
15 | ```yaml metadata
16 | name: SAMPLE_DB
17 | description: Generate sample data
18 | connection: "duckdb:database/sample.duckdb"
19 | script_sql: CALL dbgen(sf = 1)
20 | active: true
21 | ```
22 |
23 | # DUCKLAKE
24 |
25 | ```yaml metadata
26 | name: GENERATE_SAMPLE_DATA
27 | runs_as: ETL
28 | description: Data lake exemple
29 | connection: "'ducklake:sqlite:database/dl_metadata.sqlite' AS dl (DATA_PATH 'database/dl/')"
30 | active: true
31 | ```
32 |
33 | ## lineitem
34 |
35 | ```yaml metadata
36 | name: lineitem
37 | description: lineitem
38 | table: lineitem
39 | database: "ATTACH 'ducklake:sqlite:database/dl_metadata.sqlite' AS dl (DATA_PATH 'database/dl/')"
40 | load_conn: "duckdb:"
41 | load_before_sql:
42 | - INSTALL ducklake -- OR FORCE INSTALL ducklake FROM core_nightly
43 | - INSTALL sqlite
44 | - "ATTACH 'ducklake:sqlite:database/dl_metadata.sqlite' AS dl (DATA_PATH 'database/dl/')"
45 | - ATTACH 'database/sample.duckdb' AS S
46 | load_sql: INSERT INTO dl."" BY NAME SELECT * FROM S.""
47 | load_on_err_match_patt: '(?i)table.+with.+name.+(\w+).+does.+not.+exist'
48 | load_on_err_match_sql: CREATE TABLE dl."" AS SELECT * FROM S.""
49 | load_after_sql:
50 | - DETACH S
51 | - DETACH dl
52 | drop_sql: DROP TABLE dl.""
53 | clean_sql: DELETE FROM dl.""
54 | rows_sql: SELECT COUNT(*) AS "nrows" FROM dl.""
55 | active: true
56 | ```
57 |
58 | # ETLX_LOGS
59 |
60 | ```yaml metadata
61 | name: ETLX_LOGS
62 | runs_as: LOGS
63 | description: Logging
64 | table: logs
65 | database: 'sqlite3:database/dl_etlx_logs.db'
66 | connection: "duckdb:"
67 | before_sql:
68 | - "LOAD Sqlite"
69 | - "ATTACH 'database/dl_etlx_logs.db' AS l (TYPE SQLITE)"
70 | - "USE l"
71 | - "LOAD json"
72 | - "get_dyn_queries[create_missing_columns](ATTACH 'database/dl_etlx_logs.db' AS l (TYPE SQLITE),DETACH l)"
73 | save_log_sql: |
74 | INSERT INTO "l"."" BY NAME
75 | SELECT *
76 | FROM READ_JSON('');
77 | save_on_err_patt: '(?i)table.+with.+name.+(\w+).+does.+not.+exist'
78 | save_on_err_sql: |
79 | CREATE TABLE "l"."" AS
80 | SELECT *
81 | FROM READ_JSON('');
82 | after_sql:
83 | - 'USE memory'
84 | - 'DETACH "l"'
85 | active: true
86 | ```
87 |
88 | ```sql
89 | -- create_missing_columns
90 | WITH source_columns AS (
91 | SELECT "column_name", "column_type"
92 | FROM (DESCRIBE SELECT * FROM READ_JSON(''))
93 | ),
94 | destination_columns AS (
95 | SELECT "column_name", "data_type" as "column_type"
96 | FROM "duckdb_columns"
97 | WHERE "table_name" = ''
98 | ),
99 | missing_columns AS (
100 | SELECT "s"."column_name", "s"."column_type"
101 | FROM source_columns "s"
102 | LEFT JOIN destination_columns "d" ON "s"."column_name" = "d"."column_name"
103 | WHERE "d"."column_name" IS NULL
104 | )
105 | SELECT 'ALTER TABLE "l"."" ADD COLUMN "' || "column_name" || '" ' || "column_type" || ';' AS "query"
106 | FROM missing_columns
107 | WHERE (SELECT COUNT(*) FROM destination_columns) > 0;
108 | ```
--------------------------------------------------------------------------------
/internal/etlx/run_logs.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "regexp"
7 | "time"
8 | )
9 |
10 | func (etlx *ETLX) RunLOGS(dateRef []time.Time, conf map[string]any, logs []map[string]any, keys ...string) ([]map[string]any, error) {
11 | key := "LOGS"
12 | if len(keys) > 0 && keys[0] != "" {
13 | key = keys[0]
14 | }
15 | // fmt.Println(key, dateRef)
16 | var processData []map[string]any
17 | // Check if the input conf is nil or empty
18 | if conf == nil {
19 | conf = etlx.Config
20 | }
21 | data, ok := conf[key].(map[string]any)
22 | if !ok {
23 | return nil, fmt.Errorf("missing or invalid %s section", key)
24 | }
25 | // Extract metadata
26 | metadata, ok := data["metadata"].(map[string]any)
27 | if !ok {
28 | return nil, fmt.Errorf("missing metadata in %s section", key)
29 | }
30 | if active, okActive := metadata["active"]; okActive {
31 | if !active.(bool) {
32 | return nil, fmt.Errorf("deactivated %s", key)
33 | }
34 | }
35 | beforeSQL, okBefore := metadata["before_sql"]
36 | afterSQL, okAfter := metadata["after_sql"]
37 | saveSQL, okSave := metadata["save_log_sql"]
38 | errPatt, okErrPatt := metadata["save_on_err_patt"]
39 | errSQL, okErrSQL := metadata["save_on_err_sql"]
40 | tmpDir := ""
41 | if _, ok := metadata["tmp_dir"].(string); ok {
42 | tmpDir = metadata["tmp_dir"].(string)
43 | }
44 | conn, okCon := metadata["connection"]
45 | if !okCon {
46 | return nil, fmt.Errorf("%s err no connection defined", key)
47 | }
48 | dbConn, err := etlx.GetDB(conn.(string))
49 | if err != nil {
50 | return nil, fmt.Errorf("%s ERR: connecting to %s in : %s", key, conn, err)
51 | }
52 | defer dbConn.Close()
53 | jsonData, err := json.MarshalIndent(logs, "", " ")
54 | if err != nil {
55 | return nil, fmt.Errorf("error converting logs to JSON: %v", err)
56 | }
57 | fname, err := etlx.TempFIle(tmpDir, string(jsonData), "logs.*.json")
58 | // println(fname, string(jsonData))
59 | if err != nil {
60 | return nil, fmt.Errorf("error saving logs to JSON: %v", err)
61 | }
62 | // QUERIES TO RUN AT beginning
63 | if okBefore {
64 | err = etlx.ExecuteQuery(dbConn, beforeSQL, data, fname, "", dateRef)
65 | if err != nil {
66 | return nil, fmt.Errorf("%s: Before error: %s", key, err)
67 | }
68 | }
69 | // fmt.Println(key, sql)
70 | if saveSQL != "" && okSave {
71 | // fmt.Println(data[saveSQL.(string)])
72 | err = etlx.ExecuteQuery(dbConn, saveSQL, data, fname, "", dateRef)
73 | if err != nil {
74 | _err_by_pass := false
75 | if okErrPatt && errPatt != nil && okErrSQL && errSQL != nil {
76 | //fmt.Println(onErrPatt.(string), onErrSQL.(string))
77 | re, regex_err := regexp.Compile(errPatt.(string))
78 | if regex_err != nil {
79 | return nil, fmt.Errorf("%s ERR: fallback regex matching the error failed to compile: %s", key, regex_err)
80 | } else if re.MatchString(string(err.Error())) {
81 | err = etlx.ExecuteQuery(dbConn, errSQL, data, fname, "", dateRef)
82 | if err != nil {
83 | return nil, fmt.Errorf("%s ERR: main: %s", key, err)
84 | } else {
85 | _err_by_pass = true
86 | }
87 | }
88 | }
89 | if !_err_by_pass {
90 | return nil, fmt.Errorf("%s ERR: main: %s", key, err)
91 | }
92 | }
93 | }
94 | // QUERIES TO RUN AT THE END
95 | if okAfter {
96 | err = etlx.ExecuteQuery(dbConn, afterSQL, data, fname, "", dateRef)
97 | if err != nil {
98 | return nil, fmt.Errorf("%s: After error: %s", key, err)
99 | }
100 | }
101 | return processData, nil
102 | }
103 |
--------------------------------------------------------------------------------
/internal/etlx/sftp.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "os"
7 | "time"
8 |
9 | "github.com/pkg/sftp"
10 | "golang.org/x/crypto/ssh"
11 | )
12 |
13 | // getHostKey loads and parses the host public key
14 | func getHostKey(path string) (ssh.PublicKey, error) {
15 | hostKeyBytes, err := os.ReadFile(path)
16 | if err != nil {
17 | return nil, fmt.Errorf("failed to read host key file: %w", err)
18 | }
19 | hostKey, _, _, _, err := ssh.ParseAuthorizedKey(hostKeyBytes)
20 | if err != nil {
21 | return nil, fmt.Errorf("failed to parse host key: %w", err)
22 | }
23 | return hostKey, nil
24 | }
25 |
26 | // runSFTPActionWithFixedHostKey uploads or downloads files via SFTP with host key validation
27 | func (etlx *ETLX) SFTPActionWithFixedHostKey(mode string, params map[string]any) error {
28 | // Extract and validate required params
29 | host, _ := params["host"].(string)
30 | user, _ := params["user"].(string)
31 | password, _ := params["password"].(string)
32 | source, _ := params["source"].(string)
33 | target, _ := params["target"].(string)
34 | hostKeyPath, _ := params["host_key"].(string)
35 | port := 22
36 | if p, ok := params["port"].(int); ok {
37 | port = p
38 | }
39 | if host == "" || user == "" || password == "" || source == "" || target == "" || hostKeyPath == "" {
40 | return fmt.Errorf("missing required SFTP parameters: host, user, password, source, target, host_key")
41 | }
42 | host = etlx.ReplaceEnvVariable(host)
43 | user = etlx.ReplaceEnvVariable(user)
44 | password = etlx.ReplaceEnvVariable(password)
45 | // Get host key for validation
46 | hostKey, err := getHostKey(hostKeyPath)
47 | if err != nil {
48 | return fmt.Errorf("could not load host key: %w", err)
49 | }
50 |
51 | // Create SSH config
52 | config := &ssh.ClientConfig{
53 | User: user,
54 | Auth: []ssh.AuthMethod{ssh.Password(password)},
55 | HostKeyCallback: ssh.FixedHostKey(hostKey),
56 | Timeout: 5 * time.Second,
57 | }
58 |
59 | // Connect
60 | addr := fmt.Sprintf("%s:%d", host, port)
61 | conn, err := ssh.Dial("tcp", addr, config)
62 | if err != nil {
63 | return fmt.Errorf("SSH dial failed: %w", err)
64 | }
65 | defer conn.Close()
66 |
67 | // Create SFTP client
68 | client, err := sftp.NewClient(conn)
69 | if err != nil {
70 | return fmt.Errorf("SFTP client creation failed: %w", err)
71 | }
72 | defer client.Close()
73 |
74 | switch mode {
75 | case "upload":
76 | srcFile, err := os.Open(source)
77 | if err != nil {
78 | return fmt.Errorf("could not open source file: %w", err)
79 | }
80 | defer srcFile.Close()
81 |
82 | dstFile, err := client.Create(target)
83 | if err != nil {
84 | return fmt.Errorf("could not create remote file: %w", err)
85 | }
86 | defer dstFile.Close()
87 |
88 | _, err = io.Copy(dstFile, srcFile)
89 | if err != nil {
90 | return fmt.Errorf("upload failed: %w", err)
91 | }
92 | case "download":
93 | srcFile, err := client.Open(source)
94 | if err != nil {
95 | return fmt.Errorf("could not open remote file: %w", err)
96 | }
97 | defer srcFile.Close()
98 |
99 | dstFile, err := os.Create(target)
100 | if err != nil {
101 | return fmt.Errorf("could not create local file: %w", err)
102 | }
103 | defer dstFile.Close()
104 |
105 | _, err = io.Copy(dstFile, srcFile)
106 | if err != nil {
107 | return fmt.Errorf("download failed: %w", err)
108 | }
109 | default:
110 | return fmt.Errorf("unsupported SFTP action: %s", mode)
111 | }
112 |
113 | return nil
114 | }
115 |
--------------------------------------------------------------------------------
/internal/etlx/build_query.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | // Builds query from the query doc MD config
8 | //
9 | // Input:
10 | //
11 | // -key: Represnting the markdown Level 1 Heading where the query begins
12 | //
13 | // Output:
14 | //
15 | // -sql: The SQL query generated
16 | // -query_parts: The query parts parsed from the md config input
17 | // -field_orders: The order of the fields in the parts
18 | // -error: Error returned in case something goes wrong
19 | func (etlx *ETLX) QueryBuilder(conf map[string]any, keys ...string) (string, map[string]any, []string, error) { // dateRef []time.Time, extraConf map[string]any,
20 | key := "QUERY_DOC"
21 | if len(keys) > 0 && keys[0] != "" {
22 | key = keys[0]
23 | }
24 | // Check if the input conf is nil or empty
25 | if conf == nil {
26 | conf = etlx.Config
27 | }
28 | data, ok := conf[key].(map[string]any)
29 | if !ok {
30 | return "", nil, nil, fmt.Errorf("missing or invalid %s section", key)
31 | }
32 | // Extract metadata
33 | fields, ok := data["FIELDS"].(map[string]any)
34 | if !ok {
35 | fields = data
36 | }
37 | query_parts := map[string]interface{}{}
38 | _fields_order := []string{}
39 | for key2, value := range fields {
40 | if key2 == "metadata" || key2 == "__order" || key2 == "order" {
41 | continue
42 | }
43 | _field := value.(map[string]any)
44 | field_metadata, ok := _field["metadata"].(map[string]any)
45 | //fmt.Println(1, field_metadata, len(field_metadata))
46 | if !ok {
47 | // return "", nil, nil, fmt.Errorf("missing metadata in query %s and field %s", key, _field)
48 | field_metadata = map[string]any{
49 | "name": key2,
50 | "description": key2,
51 | }
52 | } else if len(field_metadata) == 0 {
53 | field_metadata = map[string]any{
54 | "name": key2,
55 | "description": key2,
56 | }
57 | }
58 | _fields_order = append(_fields_order, field_metadata["name"].(string))
59 | active, ok := field_metadata["active"].(bool)
60 | if !ok {
61 | active = true
62 | }
63 | query_parts[field_metadata["name"].(string)] = map[string]any{
64 | "name": field_metadata["name"],
65 | "desc": field_metadata["description"],
66 | "cte": _field["cte"],
67 | "select": _field["select"],
68 | "from": _field["from"],
69 | "join": _field["join"],
70 | "where": _field["where"],
71 | "group_by": _field["group_by"],
72 | "order_by": _field["order_by"],
73 | "having": _field["having"],
74 | "window": _field["window"],
75 | "active": active,
76 | "key": key,
77 | "metadata": field_metadata,
78 | }
79 | }
80 | __order, ok := data["__order"].([]any)
81 | //fmt.Printf("%s -> %v, %v, %t", key, ok, data["__order"], data["__order"])
82 | if ok {
83 | _fields_order = []string{}
84 | for _, o := range __order {
85 | if _, ok := o.(string); ok {
86 | _field_data, _ok := data[o.(string)].(map[string]any)
87 | if _ok {
88 | _metadata, _ok := _field_data["metadata"].(map[string]any)
89 | if _ok {
90 | _name, _ok := _metadata["name"].(string)
91 | if _ok {
92 | _fields_order = append(_fields_order, _name)
93 | } else {
94 | _fields_order = append(_fields_order, o.(string))
95 | }
96 | } else {
97 | _fields_order = append(_fields_order, o.(string))
98 | }
99 | } else {
100 | _fields_order = append(_fields_order, o.(string))
101 | }
102 | }
103 | }
104 | //fmt.Println("QD ORDER:", _fields_order)
105 | }
106 | qd := QueryDoc{
107 | QueryParts: make(map[string]Field),
108 | FieldOrders: _fields_order,
109 | }
110 | err := qd.SetQueryPartsFromMap(query_parts)
111 | if err != nil {
112 | return "", nil, nil, fmt.Errorf("error setting field: %s", err)
113 | }
114 | _sql := qd.GetQuerySQLFromMap()
115 | //_sql = app.setQueryDate(_sql, date_ref)
116 | //fmt.Println("SQL", _sql)
117 | return _sql, query_parts, _fields_order, nil
118 | }
119 |
--------------------------------------------------------------------------------
/internal/etlx/ftp.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "os"
7 | "path"
8 | "path/filepath"
9 | "strings"
10 | "time"
11 |
12 | "github.com/jlaffaye/ftp"
13 | )
14 |
15 | func (etlx *ETLX) FTPUpload(host, port, user, pass, localPath, remotePath string) error {
16 | if port == "" {
17 | port = "21"
18 | }
19 | address := host + ":" + port
20 | conn, err := ftp.Dial(address, ftp.DialWithTimeout(5*time.Second))
21 | if err != nil {
22 | return fmt.Errorf("failed to dial: %w", err)
23 | }
24 | defer conn.Quit()
25 | if user != "" && pass != "" {
26 | if err := conn.Login(user, pass); err != nil {
27 | return fmt.Errorf("failed to login: %w", err)
28 | }
29 | }
30 | file, err := os.Open(localPath)
31 | if err != nil {
32 | return fmt.Errorf("failed to open local file: %w", err)
33 | }
34 | defer file.Close()
35 | if err := conn.Stor(remotePath, file); err != nil {
36 | return fmt.Errorf("failed to upload: %w", err)
37 | }
38 | return nil
39 | }
40 |
41 | func (etlx *ETLX) FTPDownload(host, port, user, pass, remotePath, localPath string) error {
42 | if port == "" {
43 | port = "21"
44 | }
45 | address := host + ":" + port
46 | conn, err := ftp.Dial(address, ftp.DialWithTimeout(5*time.Second))
47 | if err != nil {
48 | return fmt.Errorf("failed to dial: %w", err)
49 | }
50 | defer conn.Quit()
51 | if user != "" && pass != "" {
52 | if err := conn.Login(user, pass); err != nil {
53 | return fmt.Errorf("failed to login: %w", err)
54 | }
55 | }
56 | response, err := conn.Retr(remotePath)
57 | if err != nil {
58 | return fmt.Errorf("failed to download file: %w", err)
59 | }
60 | defer response.Close()
61 | outFile, err := os.Create(localPath)
62 | if err != nil {
63 | return fmt.Errorf("failed to create local file: %w", err)
64 | }
65 | defer outFile.Close()
66 | _, err = io.Copy(outFile, response)
67 | if err != nil {
68 | return fmt.Errorf("failed to save file: %w", err)
69 | }
70 | return nil
71 | }
72 |
73 | func globToRegex(glob string) string {
74 | var sb strings.Builder
75 | sb.WriteString("^")
76 | for i := 0; i < len(glob); i++ {
77 | switch glob[i] {
78 | case '*':
79 | sb.WriteString(".*")
80 | case '?':
81 | sb.WriteString(".")
82 | case '.', '(', ')', '+', '|', '^', '$', '[', ']', '{', '}', '\\':
83 | sb.WriteString(`\`)
84 | sb.WriteByte(glob[i])
85 | default:
86 | sb.WriteByte(glob[i])
87 | }
88 | }
89 | sb.WriteString("$")
90 | return sb.String()
91 | }
92 |
93 | func (etlx *ETLX) FTPDownloadBatch(host, port, user, pass, remoteDir, pattern, localDir string) error {
94 | if port == "" {
95 | port = "21"
96 | }
97 | address := host + ":" + port
98 | conn, err := ftp.Dial(address, ftp.DialWithTimeout(5*time.Second))
99 | if err != nil {
100 | return fmt.Errorf("failed to dial: %w", err)
101 | }
102 | defer conn.Quit()
103 | if user != "" && pass != "" {
104 | if err := conn.Login(user, pass); err != nil {
105 | return fmt.Errorf("failed to login: %w", err)
106 | }
107 | }
108 | // List all files in the remote directory
109 | entries, err := conn.List(remoteDir)
110 | if err != nil {
111 | return fmt.Errorf("failed to list remote directory: %w", err)
112 | }
113 | // Ensure local directory exists
114 | if err := os.MkdirAll(localDir, 0755); err != nil {
115 | return fmt.Errorf("failed to create local directory: %w", err)
116 | }
117 | // Loop over files and download matching ones
118 | for _, entry := range entries {
119 | if entry.Type != ftp.EntryTypeFile {
120 | continue // skip non-files
121 | }
122 | matched, err := filepath.Match(globToRegex(pattern), entry.Name)
123 | if err != nil {
124 | return fmt.Errorf("invalid pattern: %w", err)
125 | }
126 | if matched {
127 | remotePath := path.Join(remoteDir, entry.Name)
128 | localPath := filepath.Join(localDir, entry.Name)
129 | fmt.Printf("Downloading: %s → %s\n", remotePath, localPath)
130 | // Download each matching file using the single-file method
131 | err := etlx.FTPDownload(host, port, user, pass, remotePath, localPath)
132 | if err != nil {
133 | return fmt.Errorf("failed to download %s: %w", remotePath, err)
134 | }
135 | }
136 | }
137 |
138 | return nil
139 | }
140 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/realdatadriven/etlx
2 |
3 | go 1.25.0
4 |
5 | toolchain go1.25.5
6 |
7 | require (
8 | github.com/BurntSushi/toml v1.5.0
9 | github.com/Masterminds/sprig/v3 v3.3.0
10 | github.com/alexbrainman/odbc v0.0.0-20250601004241-49e6b2bc0cf0
11 | github.com/aws/aws-sdk-go-v2 v1.41.0
12 | github.com/aws/aws-sdk-go-v2/config v1.32.5
13 | github.com/aws/aws-sdk-go-v2/credentials v1.19.5
14 | github.com/aws/aws-sdk-go-v2/service/s3 v1.93.2
15 | github.com/duckdb/duckdb-go/v2 v2.5.4
16 | github.com/jlaffaye/ftp v0.2.0
17 | github.com/jmoiron/sqlx v1.4.0
18 | github.com/joho/godotenv v1.5.1
19 | github.com/lib/pq v1.10.9
20 | github.com/mattn/go-sqlite3 v1.14.32
21 | github.com/microsoft/go-mssqldb v1.9.5
22 | github.com/pkg/sftp v1.13.10
23 | github.com/xuri/excelize/v2 v2.10.0
24 | github.com/yuin/goldmark v1.7.13
25 | golang.org/x/crypto v0.46.0
26 | golang.org/x/text v0.32.0
27 | gopkg.in/yaml.v3 v3.0.1
28 | )
29 |
30 | require (
31 | github.com/aws/aws-sdk-go-v2/service/signin v1.0.4 // indirect
32 | github.com/duckdb/duckdb-go/arrowmapping v0.0.27 // indirect
33 | github.com/duckdb/duckdb-go/mapping v0.0.27 // indirect
34 | )
35 |
36 | require (
37 | dario.cat/mergo v1.0.2 // indirect
38 | github.com/Masterminds/goutils v1.1.1 // indirect
39 | github.com/Masterminds/semver/v3 v3.4.0 // indirect
40 | github.com/apache/arrow-go/v18 v18.5.0 // indirect
41 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect
42 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16 // indirect
43 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.16 // indirect
44 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.16 // indirect
45 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
46 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 // indirect
47 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect
48 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 // indirect
49 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.16 // indirect
50 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 // indirect
51 | github.com/aws/aws-sdk-go-v2/service/sso v1.30.7 // indirect
52 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12 // indirect
53 | github.com/aws/aws-sdk-go-v2/service/sts v1.41.5 // indirect
54 | github.com/aws/smithy-go v1.24.0 // indirect
55 | github.com/duckdb/duckdb-go-bindings v0.1.24 // indirect
56 | github.com/duckdb/duckdb-go-bindings/darwin-amd64 v0.1.24 // indirect
57 | github.com/duckdb/duckdb-go-bindings/darwin-arm64 v0.1.24 // indirect
58 | github.com/duckdb/duckdb-go-bindings/linux-amd64 v0.1.24 // indirect
59 | github.com/duckdb/duckdb-go-bindings/linux-arm64 v0.1.24 // indirect
60 | github.com/duckdb/duckdb-go-bindings/windows-amd64 v0.1.24 // indirect
61 | github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
62 | github.com/goccy/go-json v0.10.5 // indirect
63 | github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect
64 | github.com/golang-sql/sqlexp v0.1.0 // indirect
65 | github.com/google/flatbuffers v25.9.23+incompatible // indirect
66 | github.com/google/uuid v1.6.0 // indirect
67 | github.com/hashicorp/errwrap v1.1.0 // indirect
68 | github.com/hashicorp/go-multierror v1.1.1 // indirect
69 | github.com/huandu/xstrings v1.5.0 // indirect
70 | github.com/klauspost/compress v1.18.2 // indirect
71 | github.com/klauspost/cpuid/v2 v2.3.0 // indirect
72 | github.com/kr/fs v0.1.0 // indirect
73 | // github.com/duckdb/duckdb-go/arrowmapping v0.0.21 // indirect
74 | // github.com/duckdb/duckdb-go/mapping v0.0.21 // indirect
75 | github.com/mitchellh/copystructure v1.2.0 // indirect
76 | github.com/mitchellh/reflectwalk v1.0.2 // indirect
77 | github.com/pierrec/lz4/v4 v4.1.22 // indirect
78 | github.com/richardlehane/mscfb v1.0.4 // indirect
79 | github.com/richardlehane/msoleps v1.0.4 // indirect
80 | github.com/shopspring/decimal v1.4.0 // indirect
81 | github.com/spf13/cast v1.10.0 // indirect
82 | github.com/tiendc/go-deepcopy v1.7.2 // indirect
83 | github.com/xuri/efp v0.0.1 // indirect
84 | github.com/xuri/nfp v0.0.2-0.20250530014748-2ddeb826f9a9 // indirect
85 | github.com/zeebo/xxh3 v1.0.2 // indirect
86 | golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect
87 | golang.org/x/mod v0.31.0 // indirect
88 | golang.org/x/net v0.48.0 // indirect
89 | golang.org/x/sync v0.19.0 // indirect
90 | golang.org/x/sys v0.39.0 // indirect
91 | golang.org/x/telemetry v0.0.0-20251208220230-2638a1023523 // indirect
92 | golang.org/x/tools v0.40.0 // indirect
93 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
94 | )
95 |
--------------------------------------------------------------------------------
/internal/etlx/aws.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "fmt"
7 | "io"
8 | "os"
9 |
10 | "github.com/aws/aws-sdk-go-v2/aws"
11 | "github.com/aws/aws-sdk-go-v2/config"
12 | "github.com/aws/aws-sdk-go-v2/credentials"
13 | "github.com/aws/aws-sdk-go-v2/service/s3"
14 |
15 | "github.com/realdatadriven/etlx/internal/env"
16 | )
17 |
18 | // awsConfig returns an AWS config for SDK v2
19 | func (etlx *ETLX) awsConfig(ctx context.Context, AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN string) (aws.Config, error) {
20 | opts := []func(*config.LoadOptions) error{
21 | config.WithRegion(AWS_REGION),
22 | config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
23 | AWS_ACCESS_KEY_ID,
24 | AWS_SECRET_ACCESS_KEY,
25 | AWS_SESSION_TOKEN,
26 | )),
27 | }
28 | cfg, err := config.LoadDefaultConfig(ctx, opts...)
29 | if err != nil {
30 | return cfg, fmt.Errorf("failed to load AWS config: %v", err)
31 | }
32 | return cfg, nil
33 | }
34 |
35 | // fileExistsInS3 checks if a file exists in the given S3 bucket
36 | func (etlx *ETLX) FileExistsInS3(ctx context.Context, client *s3.Client, bucket, key string) bool {
37 | _, err := client.HeadObject(ctx, &s3.HeadObjectInput{
38 | Bucket: aws.String(bucket),
39 | Key: aws.String(key),
40 | })
41 | return err == nil
42 | }
43 |
44 | func (etlx *ETLX) S3(mode string, params map[string]any) (string, error) {
45 | // Create AWS session
46 | AWS_ACCESS_KEY_ID, ok := params["AWS_ACCESS_KEY_ID"].(string)
47 | if !ok {
48 | AWS_ACCESS_KEY_ID = os.Getenv("AWS_ACCESS_KEY_ID")
49 | }
50 | AWS_SECRET_ACCESS_KEY, ok := params["AWS_SECRET_ACCESS_KEY"].(string)
51 | if !ok {
52 | AWS_SECRET_ACCESS_KEY = os.Getenv("AWS_SECRET_ACCESS_KEY")
53 | }
54 | AWS_SESSION_TOKEN, ok := params["AWS_SESSION_TOKEN"].(string)
55 | if !ok {
56 | AWS_SESSION_TOKEN = os.Getenv("AWS_SESSION_TOKEN")
57 | }
58 | AWS_REGION, ok := params["AWS_REGION"].(string)
59 | if !ok {
60 | AWS_REGION = os.Getenv("AWS_REGION")
61 | }
62 | AWS_ENDPOINT, ok := params["AWS_ENDPOINT"].(string)
63 | if !ok {
64 | AWS_ENDPOINT = os.Getenv("AWS_ENDPOINT")
65 | }
66 | S3_FORCE_PATH_STYLE, ok := params["S3_FORCE_PATH_STYLE"].(bool)
67 | if !ok {
68 | S3_FORCE_PATH_STYLE = env.GetBool("S3_FORCE_PATH_STYLE", false)
69 | }
70 | /*S3_SKIP_SSL_VERIFY, ok := params["S3_SKIP_SSL_VERIFY"].(bool)
71 | if !ok {
72 | S3_SKIP_SSL_VERIFY = env.GetBool("S3_SKIP_SSL_VERIFY", false)
73 | }
74 | S3_DISABLE_SSL, ok := params["S3_DISABLE_SSL"].(bool)
75 | if !ok {
76 | S3_DISABLE_SSL = env.GetBool("S3_DISABLE_SSL", false)
77 | }*/
78 | ctx := context.Background()
79 | cfg, err := etlx.awsConfig(ctx, AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN)
80 | if err != nil {
81 | return "", fmt.Errorf("failed to create AWS config: %v", err)
82 | }
83 | client := s3.NewFromConfig(cfg, func(o *s3.Options) {
84 | if endpoint := AWS_ENDPOINT; endpoint != "" {
85 | o.BaseEndpoint = aws.String(endpoint)
86 | }
87 | o.UsePathStyle = S3_FORCE_PATH_STYLE
88 | })
89 | // Define the S3 bucket and key
90 | bucket := params["bucket"].(string)
91 | originalKey := params["key"].(string)
92 | //ext := filepath.Ext(originalKey)
93 | //baseName := originalKey[:len(originalKey)-len(ext)]
94 | // Check if the file already exists and modify the file name if necessary
95 | key := originalKey
96 | for i := 1; etlx.FileExistsInS3(ctx, client, bucket, key); i++ {
97 | //key = fmt.Sprintf("%s_%d%s", baseName, i, ext)
98 | }
99 | if mode == "upload" {
100 | source, _ := params["source"].(string)
101 | file, err := os.Open(source)
102 | if err != nil {
103 | return "", fmt.Errorf("opening source file failed: %w", err)
104 | }
105 | defer file.Close()
106 | // Read file into a buffer to allow seeking
107 | var buffer bytes.Buffer
108 | if _, err := io.Copy(&buffer, file); err != nil {
109 | return "", fmt.Errorf("failed to read file into buffer: %v", err)
110 | }
111 | // Upload file to S3
112 | _, err = client.PutObject(ctx, &s3.PutObjectInput{
113 | Bucket: aws.String(bucket),
114 | Key: aws.String(key),
115 | Body: bytes.NewReader(buffer.Bytes()),
116 | //ACL: types.ObjectCannedACLPublicRead, // Optional: Set ACL for public access if needed
117 | })
118 | if err != nil {
119 | return "", fmt.Errorf("failed to upload to S3: %v", err)
120 | }
121 | return key, nil
122 | } else if mode == "download" {
123 | target, _ := params["target"].(string)
124 | resp, err := client.GetObject(ctx, &s3.GetObjectInput{
125 | Bucket: aws.String(bucket),
126 | Key: aws.String(key),
127 | })
128 | if err != nil {
129 | return "", fmt.Errorf("failed to get file from S3 %v", err)
130 | }
131 | defer resp.Body.Close()
132 | outFile, err := os.Create(target)
133 | if err != nil {
134 | return "", fmt.Errorf("creating target file failed: %w", err)
135 | }
136 | defer outFile.Close()
137 | _, err = io.Copy(outFile, resp.Body)
138 | if err != nil {
139 | return "", fmt.Errorf("writing to target file failed: %w", err)
140 | }
141 | return key, nil
142 | } else {
143 | return "", fmt.Errorf("%s not suported", mode)
144 | }
145 | }
146 |
--------------------------------------------------------------------------------
/internal/etlx/mail.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "bytes"
5 | "encoding/base64"
6 | "fmt"
7 | "html/template"
8 | "mime/multipart"
9 | "mime/quotedprintable"
10 | "net/smtp"
11 | "net/textproto"
12 | "os"
13 | "path/filepath"
14 | "strings"
15 |
16 | "github.com/Masterminds/sprig/v3"
17 | )
18 |
19 | // parseSlice converts an interface{} into a []string safely
20 | func parseSlice(value any) []string {
21 | if value == nil {
22 | return nil
23 | }
24 | slice, ok := value.([]any)
25 | if !ok {
26 | return nil
27 | }
28 | var result []string
29 | for _, v := range slice {
30 | if str, ok := v.(string); ok {
31 | result = append(result, str)
32 | }
33 | }
34 | return result
35 | }
36 |
37 | // renderTemplate processes the HTML template with the provided data
38 | func (etlx *ETLX) RenderTemplate(tmplStr string, data map[string]any) (string, error) {
39 | // fmt.Println(tmplStr)
40 | // Create a FuncMap with some common functions
41 | // funcMap := sprig.FuncMap()
42 | tmpl, err := template.New("tmpl").Funcs(sprig.FuncMap()).Parse(tmplStr)
43 | //tmpl, err := template.New("email").Funcs(funcMap).Parse(tmplStr)
44 | if err != nil {
45 | return "", fmt.Errorf("failed to parse template: %v", err)
46 | }
47 | var buf bytes.Buffer
48 | if err := tmpl.Execute(&buf, data); err != nil {
49 | return "", fmt.Errorf("failed to execute template: %v", err)
50 | }
51 | //fmt.Println(buf.String())
52 | return buf.String(), nil
53 | }
54 |
55 | // sendEmail sends an email with dynamic HTML content, optional CC, BCC, and attachments
56 | func (etlx *ETLX) SendEmail(data map[string]any) error {
57 | // Load SMTP configuration from environment variables
58 | smtpHost := os.Getenv("SMTP_HOST")
59 | smtpPort := os.Getenv("SMTP_PORT")
60 | smtpUsername := os.Getenv("SMTP_USERNAME")
61 | smtpPassword := os.Getenv("SMTP_PASSWORD")
62 | smtpFrom := os.Getenv("SMTP_FROM")
63 | // Extract fields from data
64 | to := parseSlice(data["to"])
65 | cc := parseSlice(data["cc"])
66 | bcc := parseSlice(data["bcc"])
67 | subject, _ := data["subject"].(string)
68 | bodyTemplate, _ := data["body"].(string)
69 | templateData, _ := data["data"].(map[string]any)
70 | attachments := parseSlice(data["attachments"])
71 | if len(to) == 0 {
72 | return fmt.Errorf("recipient 'to' field is required")
73 | }
74 | // Render the HTML template with data
75 | body, err := etlx.RenderTemplate(bodyTemplate, templateData)
76 | if err != nil {
77 | return err
78 | }
79 | // SMTP authentication
80 | auth := smtp.PlainAuth("", smtpUsername, smtpPassword, smtpHost)
81 | // Create email buffer
82 | var email bytes.Buffer
83 | writer := multipart.NewWriter(&email)
84 | boundary := writer.Boundary()
85 | // Headers
86 | headers := map[string]string{
87 | "From": smtpFrom,
88 | "To": strings.Join(to, ", "),
89 | "Subject": subject,
90 | "MIME-Version": "1.0",
91 | "Content-Type": fmt.Sprintf("multipart/mixed; boundary=%s", boundary),
92 | }
93 | if len(cc) > 0 {
94 | headers["Cc"] = strings.Join(cc, ", ")
95 | }
96 | // Write headers
97 | for key, val := range headers {
98 | email.WriteString(fmt.Sprintf("%s: %s\r\n", key, val))
99 | }
100 | email.WriteString("\r\n")
101 | // Add HTML body
102 | htmlPart, _ := writer.CreatePart(textproto.MIMEHeader{
103 | "Content-Type": {"text/html; charset=UTF-8"},
104 | "Content-Transfer-Encoding": {"quoted-printable"},
105 | })
106 | qpWriter := quotedprintable.NewWriter(htmlPart)
107 | qpWriter.Write([]byte(body))
108 | qpWriter.Close()
109 | // Attach files
110 | if len(attachments) > 0 {
111 | for _, attachmentPath := range attachments {
112 | path := ""
113 | if _, okPath := data["path"].(string); okPath {
114 | path = data["path"].(string)
115 | }
116 | file, err := os.Open(fmt.Sprintf("%s/%s", path, attachmentPath))
117 | if err != nil {
118 | //return fmt.Errorf("failed to open attachment %s: %v", attachmentPath, err)
119 | continue
120 | }
121 | defer file.Close()
122 | // Read file content
123 | fileContent, err := os.ReadFile(fmt.Sprintf("%s/%s", path, attachmentPath))
124 | if err != nil {
125 | return fmt.Errorf("failed to read attachment %s: %v", attachmentPath, err)
126 | }
127 | // Create attachment part
128 | fileName := filepath.Base(attachmentPath)
129 | attachmentHeader := textproto.MIMEHeader{
130 | "Content-Type": {"application/octet-stream"},
131 | "Content-Disposition": {fmt.Sprintf("attachment; filename=\"%s\"", fileName)},
132 | "Content-Transfer-Encoding": {"base64"},
133 | }
134 | attachmentPart, _ := writer.CreatePart(attachmentHeader)
135 | // Encode file content as base64
136 | encoded := base64.StdEncoding.EncodeToString(fileContent)
137 | attachmentPart.Write([]byte(encoded))
138 | }
139 | }
140 | // Close writer
141 | writer.Close()
142 | // Merge recipients
143 | recipients := append(to, append(cc, bcc...)...)
144 | // Send email
145 | serverAddr := smtpHost + ":" + smtpPort
146 | err = smtp.SendMail(serverAddr, auth, smtpUsername, recipients, email.Bytes())
147 | if err != nil {
148 | return fmt.Errorf("failed to send email: %v", err)
149 | }
150 | return nil
151 | }
152 |
--------------------------------------------------------------------------------
/examples/hf.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/overview, \"httpfs\") extension introduces support for the hf:// protocol to access data sets hosted in [Hugging Face](https://huggingface.co \"Hugging Face Homepage\") repositories. See the [announcement blog post](https://duckdb.org/2024/05/29/access-150k-plus-datasets-from-hugging-face-with-duckdb.html, \"announcement blog post\") for details."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# ETL"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "```yaml metadata\n",
22 | "name: HF_EXTRACT\n",
23 | "description: \"Example extracting from hf to a local sqlite3 file\"\n",
24 | "connection: \"duckdb:\"\n",
25 | "active: true\n",
26 | "```"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## HF_EXTRACT"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "```yaml metadata\n",
41 | "name: HF_EXTRACT\n",
42 | "description: \"Example extracting from hf to a local sqlite3 file\"\n",
43 | "table: HF_EXTRACT\n",
44 | "load_conn: \"duckdb:\"\n",
45 | "load_before_sql:\n",
46 | " - load_extentions\n",
47 | " - attach_db\n",
48 | " - create_hf_token\n",
49 | "load_sql: load_query\n",
50 | "load_after_sql: detach_db\n",
51 | "drop_sql: drop_sql\n",
52 | "clean_sql: clean_sql\n",
53 | "rows_sql: nrows\n",
54 | "active: true\n",
55 | "```"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "```sql\n",
63 | "-- load_extentions\n",
64 | "INSTALL sqlite;\n",
65 | "LOAD sqlite;\n",
66 | "INSTALL httpfs;\n",
67 | "LOAD httpfs;\n",
68 | "```"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "```sql\n",
76 | "-- attach_db\n",
77 | "ATTACH 'examples/HF_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n",
78 | "```"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "Configure your Hugging Face Token in the DuckDB Secrets Manager to access private or gated datasets. First, [visit Hugging Face Settings – Tokens](https://huggingface.co/settings/tokens) to obtain your access token. Second, set it in your DuckDB session using [DuckDB’s Secrets Manager](https://duckdb.org/docs/configuration/secrets_manager.html). DuckDB supports two providers for managing secrets:"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "```sql\n",
93 | "-- create_hf_token\n",
94 | "CREATE SECRET hf_token (\n",
95 | " TYPE HUGGINGFACE,\n",
96 | " TOKEN '@HF_TOKEN'\n",
97 | ");\n",
98 | "```"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "```sql\n",
106 | "-- detach_db\n",
107 | "DETACH \"DB\";\n",
108 | "```"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "```sql\n",
116 | "-- load_query\n",
117 | "CREATE OR REPLACE TABLE \"DB\".\"\" AS\n",
118 | "SELECT *\n",
119 | "FROM 'hf://datasets/datasets-examples/doc-formats-csv-1/data.csv'\n",
120 | "LIMIT 10\n",
121 | "```"
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {},
127 | "source": [
128 | "```sql\n",
129 | "-- load_query2\n",
130 | "CREATE OR REPLACE TABLE \"DB\".\"\" AS\n",
131 | "SELECT *\n",
132 | "FROM 'hf://datasets/horus-ai-labs/WebInstructSub-150K/data/train-00000-of-00001.parquet'\n",
133 | "```"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "```sql\n",
141 | "-- drop_sql\n",
142 | "DROP TABLE IF EXISTS \"DB\".\"\";\n",
143 | "```"
144 | ]
145 | },
146 | {
147 | "cell_type": "markdown",
148 | "metadata": {},
149 | "source": [
150 | "```sql\n",
151 | "-- clean_sql\n",
152 | "DELETE FROM \"DB\".\"\";\n",
153 | "```"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "```sql\n",
161 | "-- nrows\n",
162 | "SELECT COUNT(*) AS \"nrows\" FROM \"DB\".\"\"\n",
163 | "```"
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "```shell\n",
171 | "bin/etlx --config examples/hf.ipynb\n",
172 | "```"
173 | ]
174 | }
175 | ],
176 | "metadata": {
177 | "kernelspec": {
178 | "display_name": "Python 3",
179 | "language": "python",
180 | "name": "python3"
181 | },
182 | "language_info": {
183 | "codemirror_mode": {
184 | "name": "ipython",
185 | "version": 3
186 | },
187 | "file_extension": ".py",
188 | "mimetype": "text/x-python",
189 | "name": "python",
190 | "nbconvert_exporter": "python",
191 | "pygments_lexer": "ipython3",
192 | "version": "3.11.7"
193 | }
194 | },
195 | "nbformat": 4,
196 | "nbformat_minor": 2
197 | }
198 |
--------------------------------------------------------------------------------
/examples/s3.md:
--------------------------------------------------------------------------------
1 | # ETL
2 |
3 | The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/s3api, "httpfs") extension supports reading/writing/globbing files on object storage servers using the S3 API. S3 offers a standard API to read and write to remote files (while regular http servers, predating S3, do not offer a common write API). DuckDB conforms to the S3 API, that is now common among industry storage providers.
4 | The preferred way to configure and authenticate to S3 endpoints is to use secrets. Multiple secret providers are available
5 |
6 | ```yaml metadata
7 | name: S3_EXTRACT
8 | description: "Example extrating from S3 to a local sqlite3 file"
9 | connection: "duckdb:"
10 | active: true
11 | ```
12 |
13 | ## VERSION
14 |
15 | ```yaml metadata
16 | name: VERSION
17 | description: "DDB Version"
18 | table: VERSION
19 | load_conn: "duckdb:"
20 | load_before_sql: "ATTACH 'database/S3_EXTRACT.db' AS DB (TYPE SQLITE)"
21 | load_sql: 'CREATE OR REPLACE TABLE DB."" AS SELECT version() AS "VERSION";'
22 | load_after_sql: "DETACH DB;"
23 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB.""'
24 | active: true
25 | ```
26 |
27 | ## train_services
28 |
29 | ```yaml metadata
30 | name: train_services
31 | description: "train_services"
32 | table: train_services
33 | load_conn: "duckdb:"
34 | load_before_sql:
35 | - load_extentions
36 | - attach_db
37 | load_sql: load_query
38 | load_after_sql: detach_db
39 | drop_sql: drop_sql
40 | clean_sql: clean_sql
41 | rows_sql: nrows
42 | active: false
43 | ```
44 |
45 | ```sql
46 | -- load_extentions
47 | INSTALL sqlite;
48 | LOAD sqlite;
49 | INSTALL httpfs;
50 | LOAD httpfs;
51 | ```
52 |
53 | ```sql
54 | -- attach_db
55 | ATTACH 'database/S3_EXTRACT.db' AS "DB" (TYPE SQLITE)
56 | ```
57 |
58 | ```sql
59 | -- detach_db
60 | DETACH "DB";
61 | ```
62 |
63 | ```sql
64 | -- load_query
65 | CREATE OR REPLACE TABLE "DB"."" AS
66 | FROM 's3://duckdb-blobs/train_services.parquet';
67 | ```
68 |
69 | ```sql
70 | -- drop_sql
71 | DROP TABLE IF EXISTS "DB"."";
72 | ```
73 |
74 | ```sql
75 | -- clean_sql
76 | DELETE FROM "DB"."";
77 | ```
78 |
79 | ```sql
80 | -- nrows
81 | SELECT COUNT(*) AS "nrows" FROM "DB".""
82 | ```
83 |
84 | ## S3_EXTRACT
85 |
86 | ```yaml metadata
87 | name: S3_EXTRACT
88 | description: "Example extrating from S3 to a local sqlite3 file"
89 | table: S3_EXTRACT
90 | load_conn: "duckdb:"
91 | load_before_sql:
92 | - load_extentions
93 | - attach_db
94 | - create_S3_token
95 | load_sql: load_query
96 | load_after_sql: detach_db
97 | drop_sql: drop_sql
98 | clean_sql: clean_sql
99 | rows_sql: nrows
100 | active: false
101 | ```
102 |
103 | ```sql
104 | -- load_extentions
105 | INSTALL httpfs;
106 | LOAD httpfs;
107 | ```
108 |
109 | ```sql
110 | -- attach_db
111 | ATTACH 'database/S3_EXTRACT.db' AS "DB" (TYPE SQLITE)
112 | ```
113 |
114 | Example with a [Minio](https://min.io/) local instance
115 |
116 | ```sql
117 | -- create_S3_token
118 | CREATE SECRET S3_token (
119 | TYPE S3,
120 | KEY_ID '@S3_KEY_ID',
121 | SECRET '@S3_SECRET',
122 | ENDPOINT '127.0.0.1:3000',
123 | URL_STYLE 'path'
124 | );
125 | ```
126 |
127 | ```sql
128 | -- detach_db
129 | DETACH "DB";
130 | ```
131 |
132 | ```sql
133 | -- load_query
134 | CREATE OR REPLACE TABLE "DB"."" AS
135 | SELECT *
136 | FROM 's3://uploads/flights.csv';
137 | ```
138 |
139 | ```sql
140 | -- drop_sql
141 | DROP TABLE IF EXISTS "DB"."";
142 | ```
143 |
144 | ```sql
145 | -- clean_sql
146 | DELETE FROM "DB"."";
147 | ```
148 |
149 | ```sql
150 | -- nrows
151 | SELECT COUNT(*) AS "nrows" FROM "DB".""
152 | ```
153 |
154 | # LOGS
155 |
156 | ```yaml metadata
157 | name: LOGS
158 | description: "Example saving logs"
159 | table: etlx_logs
160 | connection: "duckdb:"
161 | before_sql:
162 | - load_extentions
163 | - attach_db
164 | - 'USE DB;'
165 | save_log_sql: load_logs
166 | save_on_err_patt: '(?i)table.+does.+not.+exist|does.+not.+have.+column.+with.+name'
167 | save_on_err_sql:
168 | - create_logs
169 | - get_dyn_queries[create_columns_missing]
170 | - load_logs
171 | after_sql:
172 | - 'USE memory;'
173 | - detach_db
174 | tmp_dir: database
175 | active: true
176 | ```
177 |
178 | ```sql
179 | -- load_extentions
180 | INSTALL Sqlite;
181 | LOAD Sqlite;
182 | INSTALL json;
183 | LOAD json;
184 | ```
185 |
186 | ```sql
187 | -- attach_db
188 | ATTACH 'database/S3_EXTRACT.db' AS "DB" (TYPE SQLITE)
189 | ```
190 |
191 | ```sql
192 | -- detach_db
193 | DETACH "DB";
194 | ```
195 |
196 | ```sql
197 | -- load_logs
198 | INSERT INTO "DB"."" BY NAME
199 | SELECT *
200 | FROM read_json('');
201 | ```
202 |
203 | ```sql
204 | -- create_logs
205 | CREATE TABLE IF NOT EXISTS "DB"."" AS
206 | SELECT *
207 | FROM read_json('');
208 | ```
209 |
210 | ```sql
211 | -- create_columns_missing
212 | WITH source_columns AS (
213 | SELECT column_name, column_type
214 | FROM (DESCRIBE SELECT * FROM read_json(''))
215 | ),
216 | destination_columns AS (
217 | SELECT column_name, data_type as column_type
218 | FROM duckdb_columns
219 | WHERE table_name = ''
220 | ),
221 | missing_columns AS (
222 | SELECT s.column_name, s.column_type
223 | FROM source_columns s
224 | LEFT JOIN destination_columns d ON s.column_name = d.column_name
225 | WHERE d.column_name IS NULL
226 | )
227 | SELECT 'ALTER TABLE "DB"."" ADD COLUMN "' || column_name || '" ' || column_type || ';' AS query
228 | FROM missing_columns;
229 | ```
230 |
--------------------------------------------------------------------------------
/examples/actions.md:
--------------------------------------------------------------------------------
1 | # ACTIONS
2 |
3 | ```yaml metadata
4 | name: FileOperations
5 | description: "Transfer and organize generated reports"
6 | path: examples
7 | active: true
8 | ```
9 |
10 | ## FTP DOWNLOAD
11 |
12 | ```yaml metadata
13 | name: FetchRemoteReport
14 | description: "Download data file from external FTP"
15 | type: ftp_download
16 | params:
17 | host: "ftp.example.com"
18 | port: "21"
19 | user: "myuser"
20 | password: "@FTP_PASSWORD"
21 | source: "/data/daily_report.csv"
22 | target: "downloads/daily_report.csv"
23 | active: true
24 | ```
25 |
26 | ---
27 |
28 | ## COPY LOCAL FILE
29 |
30 | ```yaml metadata
31 | name: CopyReportToArchive
32 | description: "Move final report to archive folder"
33 | type: copy_file
34 | params:
35 | source: "nyc_taxy_YYYYMMDD.xlsx"
36 | target: "copy_nyc_taxy_YYYYMMDD.xlsx"
37 | active: false
38 | ```
39 |
40 | ---
41 |
42 | ## Compress to ZIP
43 |
44 | ```yaml metadata
45 | name: CompressReports
46 | description: "Compress report files into a .zip archive"
47 | type: compress
48 | params:
49 | compression: zip
50 | files:
51 | - "nyc_taxy_YYYYMMDD.xlsx"
52 | - "copy_nyc_taxy_YYYYMMDD.xlsx"
53 | output: "nyc_taxy.zip"
54 | active: false
55 | ```
56 |
57 | ---
58 |
59 | ## Compress to GZ
60 |
61 | ```yaml metadata
62 | name: CompressToGZ
63 | description: "Compress a summary file to .gz"
64 | type: compress
65 | params:
66 | compression: gz
67 | files:
68 | - "nyc_taxy_YYYYMMDD.xlsx"
69 | output: "nyc_taxy_YYYYMMDD.xlsx.gz"
70 | active: false
71 | ```
72 |
73 | ---
74 |
75 | ## HTTP DOWNLOAD
76 |
77 | ```yaml metadata
78 | name: DownloadFromAPI
79 | description: "Download dataset from HTTP endpoint"
80 | type: http_download
81 | params:
82 | url: "https://api.example.com/data"
83 | target: "data/today.json"
84 | method: GET
85 | headers:
86 | Authorization: "Bearer @API_TOKEN"
87 | Accept: "application/json"
88 | params:
89 | date: "YYYYMMDD"
90 | limit: "1000"
91 | active: false
92 | ```
93 |
94 | ---
95 |
96 | ## HTTP UPLOAD
97 |
98 | ```yaml metadata
99 | name: PushReportToWebhook
100 | description: "Upload final report to an HTTP endpoint"
101 | type: http_upload
102 | params:
103 | url: "https://webhook.example.com/upload"
104 | method: POST
105 | source: "reports/final.csv"
106 | headers:
107 | Authorization: "Bearer @WEBHOOK_TOKEN"
108 | Content-Type: "multipart/form-data"
109 | params:
110 | type: "summary"
111 | date: "YYYYMMDD"
112 | active: false
113 | ```
114 |
115 | ---
116 |
117 | ## FTP DOWNLOAD
118 |
119 | ```yaml metadata
120 | name: FetchRemoteReport
121 | description: "Download data file from external FTP"
122 | type: ftp_download
123 | params:
124 | host: "ftp.example.com"
125 | username: "myuser"
126 | password: "@FTP_PASSWORD"
127 | source: "/data/daily_report.csv"
128 | target: "downloads/daily_report.csv"
129 | active: false
130 | ```
131 |
132 | ## SFTP DOWNLOAD
133 |
134 | ```yaml metadata
135 | name: FetchRemoteReport
136 | description: "Download data file from external SFTP"
137 | type: sftp_download
138 | params:
139 | host: "sftp.example.com"
140 | username: "myuser"
141 | password: "@FTP_PASSWORD"
142 | source: "/data/daily_report.csv"
143 | target: "downloads/daily_report.csv"
144 | active: false
145 | ```
146 |
147 | ---
148 |
149 | ## S3 UPLOAD
150 |
151 | ```yaml metadata
152 | name: ArchiveToS3
153 | description: "Send latest results to S3 bucket"
154 | type: s3_upload
155 | params:
156 | AWS_ACCESS_KEY_ID: '@AWS_ACCESS_KEY_ID'
157 | AWS_SECRET_ACCESS_KEY: '@AWS_SECRET_ACCESS_KEY'
158 | AWS_REGION: '@AWS_REGION'
159 | AWS_ENDPOINT: 127.0.0.1:3000
160 | S3_FORCE_PATH_STYLE: true
161 | S3_DISABLE_SSL: false
162 | S3_SKIP_SSL_VERIFY: true
163 | bucket: "my-etlx-bucket"
164 | key: "exports/summary_YYYYMMDD.xlsx"
165 | source: "reports/summary.xlsx"
166 | active: false
167 | ```
168 |
169 | ## S3 DOWNLOAD
170 |
171 | ```yaml metadata
172 | name: DownalodFromS3
173 | description: "Download file S3 from bucket"
174 | type: s3_download
175 | params:
176 | AWS_ACCESS_KEY_ID: '@AWS_ACCESS_KEY_ID'
177 | AWS_SECRET_ACCESS_KEY: '@AWS_SECRET_ACCESS_KEY'
178 | AWS_REGION: '@AWS_REGION'
179 | AWS_ENDPOINT: 127.0.0.1:3000
180 | S3_FORCE_PATH_STYLE: true
181 | S3_DISABLE_SSL: false
182 | S3_SKIP_SSL_VERIFY: true
183 | bucket: "my-etlx-bucket"
184 | key: "exports/summary_YYYYMMDD.xlsx"
185 | target: "reports/summary.xlsx"
186 | active: false
187 | ```
188 |
189 | ## DB 2 DB EX
190 |
191 | ```yaml metadata
192 | name: WRITE_RESULTS_MSSQL
193 | description: "MSSQL, as of this moment DDB does not have the same suport to MSSQL like it has for SQLite, PG or MySQL so this could be a way to pu results in db like MSSQL or nay other DB suported by sqlx"
194 | type: db_2_db
195 | params:
196 | source:
197 | conn: sqlite3:database/HTTP_EXTRACT.db
198 | before: null
199 | chunk_size: 3
200 | timeout: 30
201 | sql: origin_query
202 | after: null
203 | target:
204 | conn: mssql:sqlserver://sa:@MSSQL_PASSWORD@localhost?database=master
205 | timeout: 30
206 | before:
207 | - create_schema
208 | sql: mssql_sql
209 | after: null
210 | active: true
211 | ```
212 |
213 | ```sql
214 | -- origin_query
215 | SELECT "description", "duration", STRFTIME('%Y-%m-%d %H:%M:%S', "start_at") AS "start_at", "ref"
216 | FROM "etlx_logs"
217 | ORDER BY "start_at" DESC
218 | ```
219 |
220 | ```sql
221 | -- create_schema
222 | IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'etlx_logs' AND type = 'U')
223 | CREATE TABLE [dbo].[etlx_logs] (
224 | [description] NVARCHAR(MAX) NULL,
225 | [duration] BIGINT NULL,
226 | [start_at] DATETIME NULL,
227 | [ref] DATE NULL
228 | );
229 | ```
230 |
231 | ```sql
232 | -- mssql_sql
233 | INSERT INTO [dbo].[etlx_logs] ([:columns]) VALUES
234 | ```
235 |
236 | # ETL
237 |
238 | ```yaml metadata
239 | name: MSSQL_EXTRACT
240 | description: "Example extrating from mssql sqlite3 file"
241 | connection: "duckdb:"
242 | database: MSSQL_EXTRACT.db
243 | active: false
244 | ```
245 |
246 | ## MSSQL_EXTRACT
247 |
248 | ```yaml metadata
249 | name: MSSQL_EXTRACT
250 | description: "Example extrating from mssql sqlite3 file"
251 | table: logs
252 | to_csv: true
253 | extract_conn: mssql:sqlserver://sa:@MSSQL_PASSWORD@localhost?database=master
254 | extract_sql: SELECT * FROM [dbo].[etlx_logs]
255 | load_conn: "duckdb:"
256 | load_before_sql:
257 | - load_extentions
258 | - attach_db
259 | load_sql: load_query
260 | load_after_sql: detach_db
261 | active: true
262 | ```
263 |
264 | ```sql
265 | -- load_extentions
266 | INSTALL sqlite;
267 | LOAD sqlite;
268 | ```
269 |
270 | ```sql
271 | -- attach_db
272 | ATTACH 'database/MSSQL_EXTRACT.db' AS "DB" (TYPE SQLITE)
273 | ```
274 |
275 | ```sql
276 | -- detach_db
277 | DETACH "DB";
278 | ```
279 |
280 | ```sql
281 | -- load_query
282 | CREATE OR REPLACE TABLE "DB"."" AS
283 | SELECT *
284 | FROM '';
285 | ```
286 |
--------------------------------------------------------------------------------
/examples/pg.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # ETL
5 |
6 |
7 |
8 | ```yaml metadata
9 | name: HTTP_EXTRACT
10 | description: "Example extrating from web to a local postgres file"
11 | connection: "postgres:user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable"
12 | database: "postgres:user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable"
13 | active: true
14 | ```
15 |
16 | ## VERSION
17 |
18 | ```yaml metadata
19 | name: VERSION
20 | description: "DDB Version"
21 | table: VERSION
22 | load_conn: "duckdb:"
23 | load_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)"
24 | load_sql: 'CREATE OR REPLACE TABLE DB."" AS SELECT version() AS "VERSION";'
25 | load_after_sql: "DETACH DB;"
26 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB.""'
27 | active: true
28 | ```
29 |
30 | ## NYC_TAXI
31 |
32 | ```yaml metadata
33 | name: NYC_TAXI
34 | description: "Example extrating from web to a local postgres file"
35 | table: NYC_TAXI
36 | load_conn: "duckdb:"
37 | load_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)"
38 | load_sql: load_query
39 | load_after_sql: DETACH "DB"
40 | drop_sql: DROP TABLE IF EXISTS "DB".""
41 | clean_sql: DELETE FROM "DB".""
42 | rows_sql: SELECT COUNT(*) AS "nrows" FROM "DB".""
43 | active: false
44 | ```
45 |
46 | ```sql
47 | -- load_query
48 | CREATE OR REPLACE TABLE "DB"."" AS
49 | SELECT *
50 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-01.parquet';
51 | ```
52 |
53 | ## PeadkHours
54 |
55 | ```yaml metadata
56 | name: PeadkHours
57 | description: Peask Hours Analysis
58 | table: PeadkHours
59 | transform_conn: "duckdb:"
60 | transform_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)"
61 | transform_sql: preadk_hours_load_query
62 | transform_after_sql: DETACH "DB"
63 | drop_sql: DROP TABLE IF EXISTS "DB".""
64 | clean_sql: DELETE FROM "DB".""
65 | rows_sql: SELECT COUNT(*) AS "nrows" FROM "DB".""
66 | active: true
67 | ```
68 |
69 | ```sql
70 | -- preadk_hours_load_query
71 | CREATE OR REPLACE TABLE "DB"."" AS
72 | [[PeakHoursAnalysis]]
73 | ```
74 |
75 | ## DailyRevenueTripVolume
76 |
77 | ```yaml metadata
78 | name: DailyRevenueTripVolume
79 | description: Daily Revenue and Trip Volume
80 | has_placeholders: true
81 | schema: TRF
82 | database: "postgres:user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable search_path=TRF"
83 | table: DailyRevenueTripVolume
84 | transform_conn: "duckdb:"
85 | transform_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)"
86 | transform_sql:
87 | - CREATE SCHEMA IF NOT EXISTS "DB".""
88 | - DailyRevenueTripVolume
89 | transform_after_sql: DETACH "DB"
90 | drop_sql: DROP TABLE IF EXISTS "DB"."".""
91 | clean_sql: DELETE FROM "DB"."".""
92 | rows_sql: SELECT COUNT(*) AS "nrows" FROM "DB"."".""
93 | active: true
94 | ```
95 |
96 | ```sql
97 | -- DailyRevenueTripVolume
98 | CREATE OR REPLACE TABLE "DB"."TRF"."" AS
99 | SELECT CAST(tpep_pickup_datetime AS DATE) AS trip_date,
100 | COUNT(*) AS total_trips,
101 | ROUND(SUM(total_amount), 2) AS total_revenue,
102 | ROUND(AVG(total_amount), 2) AS avg_revenue_per_trip,
103 | ROUND(SUM(trip_distance), 2) AS total_miles,
104 | ROUND(AVG(trip_distance), 2) AS avg_trip_distance
105 | FROM DB.NYC_TAXI
106 | GROUP BY trip_date
107 | ORDER BY trip_date
108 | ```
109 |
110 | # EXPORTS
111 |
112 | Exports data to files.
113 |
114 | ```yaml metadata
115 | name: EXPORTS
116 | description: Exports Examples
117 | connection: "duckdb:"
118 | path: "static/uploads/tmp"
119 | active: true
120 | ```
121 |
122 | ## ExportDailyRevenueTripVolume
123 |
124 | ```yaml metadata
125 | name: ExportDailyRevenueTripVolume
126 | description: "Export data to CSV"
127 | connection: "duckdb:"
128 | before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)"
129 | export_sql: export
130 | after_sql: "DETACH DB"
131 | path: 'ExportDailyRevenueTripVolume_{YYYYMMDD}.{TSTAMP}.parquet'
132 | tmp_prefix: 'tmp'
133 | active: true
134 | ```
135 |
136 | ```sql
137 | -- export
138 | COPY (
139 | SELECT CAST(tpep_pickup_datetime AS DATE) AS trip_date,
140 | COUNT(*) AS total_trips,
141 | ROUND(SUM(total_amount), 2) AS total_revenue,
142 | ROUND(AVG(total_amount), 2) AS avg_revenue_per_trip,
143 | ROUND(SUM(trip_distance), 2) AS total_miles,
144 | ROUND(AVG(trip_distance), 2) AS avg_trip_distance
145 | FROM DB.NYC_TAXI
146 | GROUP BY trip_date
147 | ORDER BY trip_date
148 | ) TO ''
149 | ```
150 |
151 | ## hist_logs
152 |
153 | ```yaml metadata
154 | name: hist_logs
155 | description: "Export data to CSV"
156 | connection: "duckdb:"
157 | before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)"
158 | export_sql: export
159 | after_sql: "DETACH DB"
160 | path: 'hist_logs_{YYYYMMDD}.{TSTAMP}.parquet'
161 | tmp_prefix: 'tmp'
162 | active: true
163 | ```
164 |
165 | ```sql
166 | -- export
167 | COPY (
168 | SELECT *
169 | FROM DB.etlx_logs
170 | ) TO ''
171 | ```
172 |
173 | # LOGS
174 |
175 | ```yaml metadata
176 | name: LOGS
177 | description: "Example saving logs"
178 | table: etlx_logs
179 | connection: "duckdb:"
180 | before_sql:
181 | - "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)"
182 | - 'USE DB;'
183 | - LOAD json
184 | - "get_dyn_queries[create_columns_missing](ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES), DETACH DB)"
185 | save_log_sql: load_logs
186 | save_on_err_patt: '(?i)table.+does.+not.+exist'
187 | save_on_err_sql: create_logs
188 | after_sql:
189 | - 'USE memory;'
190 | - DETACH "DB"
191 | active: true
192 | ```
193 |
194 | ```sql
195 | -- load_logs
196 | INSERT INTO "DB"."" BY NAME
197 | SELECT *
198 | FROM read_json('');
199 | ```
200 |
201 | ```sql
202 | -- create_logs
203 | CREATE TABLE IF NOT EXISTS "DB"."" AS
204 | SELECT *
205 | FROM read_json('');
206 | ```
207 |
208 | ```sql
209 | -- create_columns_missing
210 | WITH source_columns AS (
211 | SELECT column_name, column_type
212 | FROM (DESCRIBE SELECT * FROM read_json(''))
213 | ),
214 | destination_columns AS (
215 | SELECT column_name, data_type as column_type
216 | FROM duckdb_columns
217 | WHERE table_name = ''
218 | ),
219 | missing_columns AS (
220 | SELECT s.column_name, s.column_type
221 | FROM source_columns s
222 | LEFT JOIN destination_columns d ON s.column_name = d.column_name
223 | WHERE d.column_name IS NULL
224 | )
225 | SELECT 'ALTER TABLE "DB"."" ADD COLUMN "' || column_name || '" ' || column_type || ';' AS query
226 | FROM missing_columns
227 | WHERE (SELECT COUNT(*) FROM destination_columns) > 0;
228 | ```
229 |
230 | # REQUIRES
231 |
232 | ```yaml metadata
233 | name: REQUIRES
234 | description: "Example requires"
235 | active: true
236 | ```
237 |
238 | ## PeakHoursAnalysis
239 |
240 | ```yaml metadata
241 | name: PeakHoursAnalysis
242 | description: "Analyze peak hours for NYC Yellow Taxi rides"
243 | path: examples/PeakHoursAnalysis.sql
244 | ```
245 |
--------------------------------------------------------------------------------
/internal/etlx/ducklake.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "regexp"
5 | "strings"
6 | )
7 |
8 | // DuckLakeParseResult represents the result of parsing a DuckLake string
9 | type DuckLakeParseResult struct {
10 | IsDuckLake bool `json:"is_ducklake"`
11 | HasAttach bool `json:"has_attach"`
12 | DSN string `json:"dsn"`
13 | DuckLakeName string `json:"ducklake_name"`
14 | DataPath string `json:"data_path"`
15 | }
16 |
17 | // DuckLakeOccurrence represents a single DuckLake occurrence found in text
18 | type DuckLakeOccurrence struct {
19 | DuckLakeString string `json:"ducklake_string"`
20 | HasAttach bool `json:"has_attach"`
21 | DSN string `json:"dsn"`
22 | DuckLakeName string `json:"ducklake_name"`
23 | DataPath string `json:"data_path"`
24 | }
25 |
26 | // DuckLakeParser handles parsing of DuckLake format strings
27 | type DuckLakeParser struct {
28 | mainPattern *regexp.Regexp
29 | dataPathPattern *regexp.Regexp
30 | scanPattern *regexp.Regexp
31 | }
32 |
33 | // NewDuckLakeParser creates a new DuckLakeParser instance
34 | func NewDuckLakeParser() *DuckLakeParser {
35 | // Main regex pattern to match ducklake format
36 | // This pattern handles:
37 | // 1. Optional ATTACH keyword at the beginning
38 | // 2. Required 'ducklake:' prefix
39 | // 3. DSN (data source name) - can contain various characters
40 | // 4. Optional AS clause with ducklake_name
41 | // 5. Optional parameters like DATA_PATH
42 | mainPattern := regexp.MustCompile(`(?i)^\s*(ATTACH\s+)?['"]?ducklake:([^'"\)\s]+)['"]?(?:\s+AS\s+([a-zA-Z_][a-zA-Z0-9_]*))?(?:\s*\(([^)]*)\))?\s*;?\s*$`)
43 |
44 | // Pattern to extract DATA_PATH from parameters
45 | dataPathPattern := regexp.MustCompile(`(?i)DATA_PATH\s+['"]([^'"]+)['"]`)
46 |
47 | // Pattern for finding potential ducklake occurrences in logs
48 | // This is more flexible and can find partial matches
49 | scanPattern := regexp.MustCompile(`(?i)(?:(?:^|\s)(ATTACH)\s+)?['"]?(ducklake:[^'"\)\s;]+)['"]?(?:\s+AS\s+([a-zA-Z_][a-zA-Z0-9_]*))?(?:\s*\([^)]*\))?\s*;?`)
50 |
51 | return &DuckLakeParser{
52 | mainPattern: mainPattern,
53 | dataPathPattern: dataPathPattern,
54 | scanPattern: scanPattern,
55 | }
56 | }
57 |
58 | // Parse parses a string to check if it's in ducklake format and extract components
59 | func (p *DuckLakeParser) Parse(input string) DuckLakeParseResult {
60 | result := DuckLakeParseResult{
61 | IsDuckLake: false,
62 | HasAttach: false,
63 | DSN: "",
64 | DuckLakeName: "",
65 | DataPath: "",
66 | }
67 |
68 | if input == "" {
69 | return result
70 | }
71 |
72 | matches := p.mainPattern.FindStringSubmatch(strings.TrimSpace(input))
73 |
74 | if len(matches) > 0 {
75 | result.IsDuckLake = true
76 |
77 | // Check if ATTACH keyword is present (group 1)
78 | if matches[1] != "" {
79 | result.HasAttach = true
80 | }
81 |
82 | // Extract DSN (group 2)
83 | if len(matches) > 2 && matches[2] != "" {
84 | result.DSN = matches[2]
85 | }
86 |
87 | // Extract ducklake name (group 3)
88 | if len(matches) > 3 && matches[3] != "" {
89 | result.DuckLakeName = matches[3]
90 | }
91 |
92 | // Extract DATA_PATH from parameters (group 4)
93 | if len(matches) > 4 && matches[4] != "" {
94 | dataPathMatches := p.dataPathPattern.FindStringSubmatch(matches[4])
95 | if len(dataPathMatches) > 1 {
96 | result.DataPath = dataPathMatches[1]
97 | }
98 | }
99 | }
100 |
101 | return result
102 | }
103 |
104 | // FindDuckLakeOccurrences finds all DuckLake occurrences in a multi-line string
105 | func (p *DuckLakeParser) FindDuckLakeOccurrences(text string) []DuckLakeOccurrence {
106 | if text == "" {
107 | return []DuckLakeOccurrence{}
108 | }
109 |
110 | var occurrences []DuckLakeOccurrence
111 | matches := p.scanPattern.FindAllStringSubmatch(text, -1)
112 |
113 | for _, match := range matches {
114 | if len(match) > 2 {
115 | // Extract components
116 | hasAttach := match[1] != ""
117 | fullDSN := match[2]
118 | duckLakeName := ""
119 | if len(match) > 3 {
120 | duckLakeName = match[3]
121 | }
122 | parameters := ""
123 | if len(match) > 4 {
124 | parameters = match[4]
125 | }
126 |
127 | // Extract DSN (remove ducklake: prefix)
128 | dsn := fullDSN
129 | if strings.HasPrefix(fullDSN, "ducklake:") {
130 | dsn = fullDSN[9:]
131 | }
132 |
133 | // Extract DATA_PATH if present
134 | dataPath := ""
135 | if parameters != "" {
136 | dataPathMatch := p.dataPathPattern.FindStringSubmatch(parameters)
137 | if len(dataPathMatch) > 1 {
138 | dataPath = dataPathMatch[1]
139 | }
140 | }
141 |
142 | // Create occurrence record
143 | occurrence := DuckLakeOccurrence{
144 | DuckLakeString: strings.TrimSpace(match[0]),
145 | HasAttach: hasAttach,
146 | DSN: dsn,
147 | DuckLakeName: duckLakeName,
148 | DataPath: dataPath,
149 | }
150 |
151 | occurrences = append(occurrences, occurrence)
152 | }
153 | }
154 |
155 | return occurrences
156 | }
157 |
158 | // FindDuckLakeStrings finds all DuckLake strings in a multi-line text (simple version)
159 | func (p *DuckLakeParser) FindDuckLakeStrings(text string) []string {
160 | occurrences := p.FindDuckLakeOccurrences(text)
161 | var strings []string
162 |
163 | for _, occ := range occurrences {
164 | strings = append(strings, occ.DuckLakeString)
165 | }
166 |
167 | return strings
168 | }
169 |
170 | // FindDuckLakeDSNs finds all unique DSNs in a multi-line text
171 | func (p *DuckLakeParser) FindDuckLakeDSNs(text string) []string {
172 | occurrences := p.FindDuckLakeOccurrences(text)
173 | dsnMap := make(map[string]bool)
174 | var dsns []string
175 |
176 | for _, occ := range occurrences {
177 | if occ.DSN != "" && !dsnMap[occ.DSN] {
178 | dsnMap[occ.DSN] = true
179 | dsns = append(dsns, occ.DSN)
180 | }
181 | }
182 |
183 | return dsns
184 | }
185 |
186 | // IsDuckLakeFormat quickly checks if a string is in ducklake format
187 | func (p *DuckLakeParser) IsDuckLakeFormat(input string) bool {
188 | return p.Parse(input).IsDuckLake
189 | }
190 |
191 | // ExtractDSN extracts just the DSN from a ducklake format string
192 | func (p *DuckLakeParser) ExtractDSN(input string) string {
193 | return p.Parse(input).DSN
194 | }
195 |
196 | // ExtractDuckLakeName extracts just the ducklake name from a ducklake format string
197 | func (p *DuckLakeParser) ExtractDuckLakeName(input string) string {
198 | return p.Parse(input).DuckLakeName
199 | }
200 |
201 | // ExtractDataPath extracts just the DATA_PATH value from a ducklake format string
202 | func (p *DuckLakeParser) ExtractDataPath(input string) string {
203 | return p.Parse(input).DataPath
204 | }
205 |
206 | // HasAttachKeyword checks if the string contains the ATTACH keyword
207 | func (p *DuckLakeParser) HasAttachKeyword(input string) bool {
208 | return p.Parse(input).HasAttach
209 | }
210 |
211 | // ParseDuckLakeString is a convenience function to parse a ducklake string
212 | func ParseDuckLakeString(input string) DuckLakeParseResult {
213 | parser := NewDuckLakeParser()
214 | return parser.Parse(input)
215 | }
216 |
217 | // FindDuckLakeOccurrences is a convenience function to find all DuckLake occurrences in a multi-line string
218 | func FindDuckLakeOccurrences(text string) []DuckLakeOccurrence {
219 | parser := NewDuckLakeParser()
220 | return parser.FindDuckLakeOccurrences(text)
221 | }
222 |
223 | // FindDuckLakeStrings is a convenience function to find all DuckLake strings in a multi-line text
224 | func FindDuckLakeStrings(text string) []string {
225 | parser := NewDuckLakeParser()
226 | return parser.FindDuckLakeStrings(text)
227 | }
228 |
229 | // FindDuckLakeDSNs is a convenience function to find all unique DSNs in a multi-line text
230 | func FindDuckLakeDSNs(text string) []string {
231 | parser := NewDuckLakeParser()
232 | return parser.FindDuckLakeDSNs(text)
233 | }
234 |
--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
1 | name: Build and Release
2 |
3 | on:
4 | push:
5 | tags:
6 | - 'v*'
7 |
8 | jobs:
9 | build-linux:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Install Dependencies
13 | run: |
14 | sudo apt-get update
15 | sudo apt-get install -y build-essential gcc g++ unixodbc unixodbc-dev
16 | - name: Checkout Code
17 | uses: actions/checkout@v3
18 | - name: Setup Go
19 | uses: actions/setup-go@v4
20 | with:
21 | go-version: '1.25'
22 | - name: Build Linux Binaries
23 | run: |
24 | mkdir -p dist
25 | CGO_ENABLED=1 go build -o dist/etlx-linux-amd64 ./cmd/main.go
26 | - name: Upload Artifacts
27 | uses: actions/upload-artifact@v4
28 | with:
29 | name: linux-binary
30 | path: dist/*linux*
31 |
32 | build-windows-linking:
33 | runs-on: windows-latest
34 | steps:
35 | # Step 1: Checkout the code
36 | - name: Checkout Code
37 | uses: actions/checkout@v3
38 |
39 | # Step 2: Set up Go environment
40 | - name: Setup Go
41 | uses: actions/setup-go@v4
42 | with:
43 | go-version: '1.25'
44 |
45 | - name: Setup MSBuild (for Visual Studio environment)
46 | uses: microsoft/setup-msbuild@v2
47 |
48 | # Step 3: Download DuckDB Precompiled Library
49 | - name: Download DuckDB Library
50 | run: |
51 | $version = "v1.4.3"
52 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-amd64.zip"
53 | $destinationPath = "$(Get-Location)\duckdb"
54 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip"
55 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath
56 | Write-Host "DuckDB library extracted to $destinationPath"
57 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-arm64.zip"
58 | $destinationPath = "$(Get-Location)\duckdbarm64"
59 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip"
60 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath
61 | Write-Host "DuckDB library extracted to $destinationPath"
62 |
63 | # Step 5: Set Environment Variables
64 | - name: Set Environment Variables
65 | run: |
66 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV
67 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdb\" >> $env:GITHUB_ENV
68 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdb\ -lduckdb" >> $env:GITHUB_ENV
69 |
70 | # Step 6: Verify DuckDB Library
71 | - name: Verify DuckDB Library
72 | run: |
73 | $libPath = "$(Get-Location)\duckdb\"
74 | if (!(Test-Path "$libPath\duckdb.lib")) {
75 | Write-Error "duckdb.lib not found in $libPath"
76 | }
77 | Write-Host "duckdb.lib found in $libPath"
78 |
79 | # Step 7: Build the Application
80 | - name: Build Windows Binary
81 | run: |
82 | mkdir dist
83 | go build -o dist/etlx-windows-linking-amd64.exe ./cmd/main.go
84 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV
85 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdbarm64\" >> $env:GITHUB_ENV
86 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdbarm64\ -lduckdb" >> $env:GITHUB_ENV
87 | echo "GOARCH=arm64" >> $env:GITHUB_ENV
88 | go build -o dist/etlx-windows-linking-arm64.exe ./cmd/main.go
89 | #go build -tags=duckdb_use_lib -o dist/etlx-windows-amd64.exe ./cmd/main.go
90 |
91 | - name: Upload Artifacts
92 | uses: actions/upload-artifact@v4
93 | with:
94 | name: windows-binary-linking
95 | path: dist/*windows*
96 | build-windows:
97 | runs-on: windows-latest
98 | steps:
99 | - name: Checkout Code
100 | uses: actions/checkout@v3
101 |
102 | - name: Setup Go
103 | uses: actions/setup-go@v4
104 | with:
105 | go-version: '1.25'
106 |
107 | - name: Setup MSBuild (for Visual Studio environment)
108 | uses: microsoft/setup-msbuild@v2
109 |
110 | - name: Set Environment Variables for CGO
111 | run: |
112 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV
113 | # echo "CC=cl.exe" >> $env:GITHUB_ENV
114 |
115 | - name: Build with MSVC (Visual Studio compiler)
116 | run: |
117 | mkdir dist
118 | go build -o dist/etlx-windows-amd64.exe ./cmd/main.go
119 | echo "GOARCH=arm64" >> $env:GITHUB_ENV
120 | go build -o dist/etlx-windows-arm64.exe ./cmd/main.go
121 |
122 | - name: Upload MSVC Artifacts
123 | uses: actions/upload-artifact@v4
124 | with:
125 | name: windows-msvc-binary
126 | path: dist/*windows*
127 | build-darwin:
128 | runs-on: macos-latest
129 | steps:
130 | # Step 1: Checkout the code
131 | - name: Checkout Code
132 | uses: actions/checkout@v3
133 |
134 | # Step 2: Setup Go
135 | - name: Setup Go
136 | uses: actions/setup-go@v4
137 | with:
138 | go-version: '1.25'
139 |
140 | # Step 3: Install UnixODBC
141 | - name: Install UnixODBC
142 | run: |
143 | brew install unixodbc
144 | brew --prefix unixodbc
145 |
146 | # Step 4: Set Environment Variables
147 | - name: Set Environment Variables
148 | run: |
149 | ODBC_PREFIX=$(brew --prefix unixodbc)
150 | echo "CGO_ENABLED=1" >> $GITHUB_ENV
151 | echo "CGO_CFLAGS=-I$ODBC_PREFIX/include" >> $GITHUB_ENV
152 | echo "CGO_LDFLAGS=-L$ODBC_PREFIX/lib -lodbc" >> $GITHUB_ENV
153 |
154 | # Step 5: Build the Application
155 | - name: Build MacOS Binary
156 | run: |
157 | mkdir dist
158 | go build -o dist/etlx-macos-amd64 ./cmd/main.go
159 | GOARCH=arm64 go build -o dist/etlx-macos-arm64 ./cmd/main.go
160 |
161 | - name: Upload Artifacts
162 | uses: actions/upload-artifact@v4
163 | with:
164 | name: macos-binary
165 | path: dist/*macos*
166 |
167 | release:
168 | permissions: write-all
169 | runs-on: ubuntu-latest
170 | needs: [build-linux, build-windows, build-windows-linking, build-darwin]
171 | steps:
172 | - name: Download Windows Binary with MSVC
173 | uses: actions/download-artifact@v4
174 | with:
175 | name: windows-msvc-binary
176 | path: dist
177 | - name: Download MacOS Binary
178 | uses: actions/download-artifact@v4
179 | with:
180 | name: macos-binary
181 | path: dist
182 | - name: Download Linux Binary
183 | uses: actions/download-artifact@v4
184 | with:
185 | name: linux-binary
186 | path: dist
187 | - name: Download Windows Binary With Linking
188 | uses: actions/download-artifact@v4
189 | with:
190 | name: windows-binary-linking
191 | path: dist
192 | - name: Changelog
193 | uses: scottbrenner/generate-changelog-action@master
194 | id: Changelog
195 | env:
196 | REPO: ${{ github.repository }}
197 | - name: Zip Binaries
198 | run: |
199 | zip -j dist/etlx-linux-amd64.zip dist/etlx-linux-amd64
200 | zip -j dist/etlx-macos-amd64.zip dist/etlx-macos-amd64
201 | zip -j dist/etlx-windows-amd64.zip dist/etlx-windows-amd64.exe
202 | zip -j dist/etlx-windows-linking-amd64.zip dist/etlx-windows-linking-amd64.exe
203 | zip -j dist/etlx-macos-arm64.zip dist/etlx-macos-arm64
204 | zip -j dist/etlx-windows-arm64.zip dist/etlx-windows-arm64.exe
205 | zip -j dist/etlx-windows-linking-arm64.zip dist/etlx-windows-linking-arm64.exe
206 | #zip -j dist/etlx-linux-arm64.zip dist/etlx-linux-arm64
207 | - name: Create Release
208 | uses: softprops/action-gh-release@v1
209 | with:
210 | tag_name: ${{ github.ref_name }}
211 | draft: false
212 | prerelease: false
213 | files: |
214 | dist/etlx-linux-amd64.zip
215 | dist/etlx-macos-amd64.zip
216 | dist/etlx-windows-amd64.zip
217 | dist/etlx-windows-arm64.zip
218 | dist/etlx-windows-linking-amd64.zip
219 | dist/etlx-macos-arm64.zip
220 | dist/etlx-windows-linking-arm64.zip
221 | # dist/etlx-linux-arm64.zip
222 | env:
223 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
224 |
--------------------------------------------------------------------------------
/internal/etlx/action_db2db.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "fmt"
7 | "os"
8 | "regexp"
9 | "strconv"
10 | "strings"
11 | "time"
12 |
13 | "github.com/realdatadriven/etlx/internal/db"
14 | )
15 |
16 | func ScanRowToMap(rows *sql.Rows) (map[string]interface{}, error) {
17 | columns, err := rows.Columns()
18 | if err != nil {
19 | return nil, fmt.Errorf("failed to get columns: %w", err)
20 | }
21 | values := make([]interface{}, len(columns))
22 | valuePointers := make([]interface{}, len(columns))
23 | for i := range values {
24 | valuePointers[i] = &values[i]
25 | }
26 | if err := rows.Scan(valuePointers...); err != nil {
27 | return nil, fmt.Errorf("failed to scan row: %w", err)
28 | }
29 | rowMap := make(map[string]interface{})
30 | for i, colName := range columns {
31 | rowMap[colName] = values[i]
32 | }
33 | return rowMap, nil
34 | }
35 |
36 | func (etlx *ETLX) DB2DB(params map[string]any, item map[string]any, dateRef []time.Time) error {
37 | // Extract and validate required params
38 | source, _ := params["source"].(map[string]any)
39 | target, _ := params["target"].(map[string]any)
40 | source_conn, ok := source["conn"].(string)
41 | if !ok {
42 | return fmt.Errorf("no source conn string detected %s", source_conn)
43 | }
44 | target_conn, ok := target["conn"].(string)
45 | if !ok {
46 | return fmt.Errorf("no target conn string detected %s", target_conn)
47 | }
48 | source_sql, ok := source["sql"].(string)
49 | if !ok {
50 | return fmt.Errorf("no source conn string detected %s", source_sql)
51 | }
52 | target_sql, ok := target["sql"].(string)
53 | if !ok {
54 | return fmt.Errorf("no target conn string detected %s", target_sql)
55 | }
56 | dbSourceConn, err := etlx.GetDB(source_conn)
57 | if err != nil {
58 | return fmt.Errorf("error connecting to source: %v %s", err, source_conn)
59 | }
60 | defer dbSourceConn.Close()
61 | dbTargetConn, err := etlx.GetDB(target_conn)
62 | if err != nil {
63 | return fmt.Errorf("error connecting to target: %v %s", err, target_conn)
64 | }
65 | defer dbTargetConn.Close()
66 | // BEGIN / STARTING QUERIES
67 | before_source, ok := source["before"]
68 | if ok {
69 | err = etlx.ExecuteQuery(dbSourceConn, before_source, item, "", "", dateRef)
70 | if err != nil {
71 | return fmt.Errorf("error executing source preparation queries: %s", err)
72 | }
73 | }
74 | //fmt.Println(target_sql, item)
75 | sql_target := target_sql
76 | if _, ok := item[target_sql]; ok {
77 | sql_target = item[target_sql].(string)
78 | }
79 | sql := source_sql
80 | if _, ok := item[source_sql]; ok {
81 | sql = item[sql].(string)
82 | }
83 | chunk_size := 1_000
84 | if _, ok := source["chunk_size"]; ok {
85 | j, err := strconv.Atoi(fmt.Sprintf("%v", source["chunk_size"]))
86 | if err == nil {
87 | chunk_size = j
88 | }
89 | }
90 | //fmt.Printf("%T->%d", chunk_size, chunk_size)
91 | timeout := 1200
92 | if _, ok := source["timeout"]; ok {
93 | j, err := strconv.Atoi(fmt.Sprintf("%v", source["timeout"]))
94 | if err == nil {
95 | timeout = j
96 | }
97 | }
98 | sql = etlx.SetQueryPlaceholders(sql, "", "", dateRef)
99 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
100 | defer cancel()
101 | rows, err := dbSourceConn.QueryRows(ctx, sql, []any{}...)
102 | if err != nil {
103 | return fmt.Errorf("failed to execute source query %s", err)
104 | }
105 | defer rows.Close()
106 | /*columns, err := rows.Columns()
107 | if err != nil {
108 | fmt.Printf("failed to get columns: %s", err)
109 | }*/
110 | // BEGIN / STARTING QUERIES
111 | before_target, ok := target["before"]
112 | if ok {
113 | err = etlx.ExecuteQuery(dbTargetConn, before_target, item, "", "", dateRef)
114 | if err != nil {
115 | return fmt.Errorf("error executing target preparation queries: %s", err)
116 | }
117 | }
118 | i := 0
119 | var result []map[string]any
120 | for rows.Next() {
121 | i += 1
122 | row, err := ScanRowToMap(rows)
123 | if err != nil {
124 | return fmt.Errorf("failed to scan row to map: %w", err)
125 | }
126 | result = append(result, row)
127 | // send to target
128 | if i >= chunk_size {
129 | i = 0
130 | _, err = etlx.UpdateTarget(dbTargetConn, sql_target, result)
131 | if err != nil {
132 | // fmt.Printf("failed update the target: %s", err)
133 | return fmt.Errorf("main target query faild: %w", err)
134 | }
135 | result = []map[string]any{} //result[:0]
136 | }
137 | }
138 | if err := rows.Err(); err != nil {
139 | fmt.Printf("row iteration error: %s", err)
140 | return fmt.Errorf("row iteration error: %w", err)
141 | }
142 | if len(result) > 0 {
143 | _, err = etlx.UpdateTarget(dbTargetConn, sql_target, result)
144 | if err != nil {
145 | return fmt.Errorf("main target query faild: %w", err)
146 | }
147 | }
148 | // END / CLOSING QUERIES
149 | after_source, ok := source["after"]
150 | if ok {
151 | err = etlx.ExecuteQuery(dbSourceConn, after_source, item, "", "", dateRef)
152 | if err != nil {
153 | return fmt.Errorf("error executing source closing queries: %s", err)
154 | }
155 | }
156 | after_target, ok := target["after"]
157 | if ok {
158 | err = etlx.ExecuteQuery(dbTargetConn, after_target, item, "", "", dateRef)
159 | if err != nil {
160 | return fmt.Errorf("error executing source closing queries: %s", err)
161 | }
162 | }
163 | return nil
164 | }
165 |
166 | func (etlx *ETLX) BuildInsertSQL(sql_header string, data []map[string]any) (string, error) {
167 | if len(data) == 0 {
168 | return "", fmt.Errorf("no data to insert")
169 | }
170 | // Use keys from the first map as column names
171 | columns := make([]string, 0, len(data[0]))
172 | for k := range data[0] {
173 | columns = append(columns, k)
174 | }
175 | var valueRows []string
176 | for _, row := range data {
177 | var values []string
178 | for _, col := range columns {
179 | val := row[col]
180 | values = append(values, formatValue(val))
181 | }
182 | valueRows = append(valueRows, "("+strings.Join(values, ", ")+")")
183 | }
184 | sql, err := ReplaceColumnsWithDetectedIdentifier(sql_header, columns)
185 | if err == nil {
186 | sql = fmt.Sprintf("%s %s;", sql, strings.Join(valueRows, ",\n"))
187 | } else {
188 | //fmt.Println(err)
189 | // Escape column names (basic, you might need to adapt for SQL Server specifics)
190 | colList := strings.Join(columns, ", ")
191 | /*sql := fmt.Sprintf("INSERT INTO %s (%s) VALUES %s;",
192 | table,
193 | colList,
194 | strings.Join(valueRows, ",\n"),
195 | )*/
196 | re := regexp.MustCompile(`:columns\b`)
197 | sql_header = re.ReplaceAllString(sql_header, colList)
198 | sql = fmt.Sprintf("%s %s;", sql_header, strings.Join(valueRows, ",\n"))
199 | }
200 | if os.Getenv("ETLX_DEBUG_QUERY") == "true" {
201 | _file, err := etlx.TempFIle("", sql, fmt.Sprintf("query.%s.*.sql", "db2db"))
202 | if err != nil {
203 | fmt.Println(err)
204 | }
205 | fmt.Println(_file)
206 | }
207 | return sql, nil
208 | }
209 |
210 | // Detects the quote character around :columns and replaces it with the appropriate formatted column list.
211 | func ReplaceColumnsWithDetectedIdentifier(query string, columns []string) (string, error) {
212 | // Regex to capture optional identifier wrapping
213 | re := regexp.MustCompile("([[\"`]?):columns([]\"`]?)")
214 | matches := re.FindStringSubmatch(query)
215 | var open, close string
216 | if len(matches) == 3 {
217 | open, close = matches[1], matches[2]
218 | }
219 | // Default if nothing matched
220 | if open == "" && close == "" {
221 | open, close = "", ""
222 | } else if open == "[" && close != "]" {
223 | close = "]"
224 | } else if open == `"` && close != `"` {
225 | close = `"`
226 | } else if open == "`" && close != "`" {
227 | close = "`"
228 | } else if open == "(" && close == ")" {
229 | open, close = "", "" // treat as no identifier
230 | }
231 | // Escape square brackets inside column names for MSSQL
232 | formatIdentifier := func(col string) string {
233 | if open == "[" && close == "]" {
234 | col = strings.ReplaceAll(col, "]", "]]")
235 | }
236 | return open + col + close
237 | }
238 | // Apply identifier
239 | var escapedCols []string
240 | for _, col := range columns {
241 | escapedCols = append(escapedCols, formatIdentifier(col))
242 | }
243 | finalCols := strings.Join(escapedCols, ", ")
244 | // Replace the whole match with column list
245 | finalQuery := re.ReplaceAllString(query, finalCols)
246 | return finalQuery, nil
247 | }
248 |
249 | func formatValue(v any) string {
250 | switch val := v.(type) {
251 | case nil:
252 | return "NULL"
253 | case int, int32, int64:
254 | return fmt.Sprintf("%d", val)
255 | case float32, float64:
256 | return fmt.Sprintf("%f", val)
257 | case bool:
258 | if val {
259 | return "1"
260 | }
261 | return "0"
262 | case time.Time:
263 | return "'" + val.Format("2006-01-02 15:04:05") + "'"
264 | case []byte:
265 | return "'" + strings.ReplaceAll(string(val), "'", "''") + "'"
266 | case string:
267 | return "'" + strings.ReplaceAll(val, "'", "''") + "'"
268 | default:
269 | return "'" + strings.ReplaceAll(fmt.Sprintf("%v", val), "'", "''") + "'"
270 | }
271 | }
272 |
273 | func (etlx *ETLX) UpdateTarget(dbTargetConn db.DBInterface, sql_target string, data []map[string]any) (int, error) {
274 | sql, err := etlx.BuildInsertSQL(sql_target, data)
275 | if err != nil {
276 | return 0, err
277 | }
278 | //fmt.Println(sql)
279 | return dbTargetConn.ExecuteQuery(sql, []any{}...)
280 | }
281 |
--------------------------------------------------------------------------------
/internal/db/odbc.go:
--------------------------------------------------------------------------------
1 | package db
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "encoding/csv"
7 | "fmt"
8 | "io"
9 | "os"
10 | "reflect"
11 | "strings"
12 | "time"
13 | "unicode/utf8"
14 |
15 | "github.com/realdatadriven/etlx/internal/env"
16 | "golang.org/x/text/encoding/charmap"
17 | "golang.org/x/text/transform"
18 |
19 | _ "github.com/alexbrainman/odbc"
20 | )
21 |
22 | type ODBC struct {
23 | *sql.DB
24 | }
25 |
26 | func NewODBC(dsn string) (*ODBC, error) {
27 | //fmt.Printf("DSN: %s\n", dsn)
28 | db, err := sql.Open("odbc", dsn)
29 | if err != nil {
30 | return nil, err
31 | }
32 |
33 | defaultTimeoutODBC = time.Duration(env.GetInt("ODBC_DFLT_TIMEOUT", 15)) * time.Minute
34 | //fmt.Println(driverName, dsn)
35 | db.SetMaxOpenConns(25)
36 | db.SetMaxIdleConns(25)
37 | db.SetConnMaxIdleTime(defaultTimeoutODBC)
38 | db.SetConnMaxLifetime(2 * time.Hour)
39 | return &ODBC{db}, nil
40 | }
41 |
42 | func (db *ODBC) New(dsn string) (*ODBC, error) {
43 | //fmt.Printf("DSN: %s\n", dsn)
44 | _db, err := sql.Open("odbc", dsn)
45 | if err != nil {
46 | return nil, err
47 | }
48 | //fmt.Println(driverName, dsn)
49 | _db.SetMaxOpenConns(25)
50 | _db.SetMaxIdleConns(25)
51 | _db.SetConnMaxIdleTime(5 * time.Minute)
52 | _db.SetConnMaxLifetime(2 * time.Hour)
53 | return &ODBC{_db}, nil
54 | }
55 |
56 | func (db *ODBC) ExecuteQuery(query string, data ...interface{}) (int, error) {
57 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC)
58 | defer cancel()
59 | result, err := db.ExecContext(ctx, query, data...)
60 | if err != nil {
61 | return 0, err
62 | }
63 | id, err := result.LastInsertId()
64 | if err != nil {
65 | id = 0
66 | }
67 | return int(id), err
68 | }
69 |
70 | func (db *ODBC) ExecuteQueryRowsAffected(query string, data ...interface{}) (int64, error) {
71 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC)
72 | defer cancel()
73 | result, err := db.ExecContext(ctx, query, data...)
74 | if err != nil {
75 | return 0, err
76 | }
77 | id, err := result.RowsAffected()
78 | if err != nil {
79 | return 0, err
80 | }
81 | return id, err
82 | }
83 |
84 | func (db *ODBC) QueryMultiRows(query string, params ...interface{}) (*[]map[string]interface{}, bool, error) {
85 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC)
86 | defer cancel()
87 | var result []map[string]interface{}
88 | rows, err := db.QueryContext(ctx, query, params...)
89 | if err != nil {
90 | return nil, false, err
91 | }
92 | defer rows.Close()
93 | for rows.Next() {
94 | row, err := ScanRowToMap(rows)
95 | if err != nil {
96 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err)
97 | }
98 | result = append(result, row)
99 | }
100 | return &result, true, err
101 | }
102 |
103 | func (db *ODBC) QueryRows(ctx context.Context, query string, params ...interface{}) (*sql.Rows, error) {
104 | return db.QueryContext(ctx, query, params...)
105 | }
106 |
107 | func (db *ODBC) QueryMultiRowsWithCols(query string, params ...interface{}) (*[]map[string]interface{}, []string, bool, error) {
108 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC)
109 | defer cancel()
110 | var result []map[string]interface{}
111 | rows, err := db.QueryContext(ctx, query, params...)
112 | if err != nil {
113 | return nil, nil, false, err
114 | }
115 | defer rows.Close()
116 | columns, err := rows.Columns()
117 | if err != nil {
118 | fmt.Printf("failed to get columns: %s", err)
119 | }
120 | for rows.Next() {
121 | row, err := ScanRowToMap(rows)
122 | if err != nil {
123 | return nil, nil, false, fmt.Errorf("failed to scan row to map: %w", err)
124 | }
125 | result = append(result, row)
126 | }
127 | return &result, columns, true, err
128 | }
129 |
130 | func (db *ODBC) AllTables(params map[string]interface{}, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) {
131 | // Logic to get all tables for DuckDB
132 | return nil, false, nil
133 | }
134 |
135 | func (db *ODBC) TableSchema(params map[string]interface{}, table string, dbName string, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) {
136 | return nil, false, nil
137 | }
138 |
139 | func (db *ODBC) ExecuteNamedQuery(query string, data map[string]interface{}) (int, error) {
140 | return 0, fmt.Errorf("not suported yet %s", "_")
141 | }
142 |
143 | func (db *ODBC) ExecuteQueryPGInsertWithLastInsertId(query string, data ...interface{}) (int, error) {
144 | return 0, fmt.Errorf("not suported %s", "_")
145 | }
146 |
147 | func isUTF8(s string) bool {
148 | return utf8.ValidString(s)
149 | }
150 |
151 | func convertToUTF8(isoStr string) (string, error) {
152 | if isUTF8(isoStr) {
153 | return isoStr, nil
154 | }
155 | reader := strings.NewReader(isoStr)
156 | transformer := charmap.ISO8859_1.NewDecoder()
157 | utf8Bytes, err := io.ReadAll(transform.NewReader(reader, transformer))
158 | if err != nil {
159 | return "", err
160 | }
161 | return string(utf8Bytes), nil
162 | }
163 |
164 | func hasDecimalPlace(v interface{}) (bool, error) {
165 | // Try to cast v to float64
166 | floatVal, ok := v.(float64)
167 | if !ok {
168 | return false, fmt.Errorf("value is not a float64, it is %v", reflect.TypeOf(v))
169 | }
170 |
171 | // Check if the float has a decimal part
172 | if floatVal != float64(int(floatVal)) {
173 | return true, nil
174 | }
175 | return false, nil
176 | }
177 |
178 | func (db *ODBC) Query2CSV(query string, csv_path string, params ...interface{}) (bool, error) {
179 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC)
180 | defer cancel()
181 | rows, err := db.QueryContext(ctx, query, params...)
182 | if err != nil {
183 | //fmt.Println(1, err)
184 | return false, err
185 | }
186 | defer rows.Close()
187 | csvFile, err := os.Create(csv_path)
188 | if err != nil {
189 | return false, fmt.Errorf("error creating CSV file: %w", err)
190 | }
191 | defer csvFile.Close()
192 | // CSV
193 | csvWriter := csv.NewWriter(csvFile)
194 | defer csvWriter.Flush()
195 | // Get column names
196 | columns, err := rows.Columns()
197 | if err != nil {
198 | return false, fmt.Errorf("error getting column names: %w", err)
199 | }
200 | // Write column names to CSV header
201 | csvWriter.Write(columns)
202 | for rows.Next() {
203 | row, err := ScanRowToMap(rows)
204 | if err != nil {
205 | return false, fmt.Errorf("failed to scan row to map: %w", err)
206 | }
207 | var rowData []string
208 | //for _, value := range row {
209 | for _, col := range columns {
210 | value := row[col]
211 | //rowData = append(rowData, fmt.Sprintf("%v", value))
212 | switch v := value.(type) {
213 | case nil:
214 | // Format integer types
215 | rowData = append(rowData, "")
216 | case int, int8, int16, int32, int64:
217 | // Format integer types
218 | rowData = append(rowData, fmt.Sprintf("%d", v))
219 | case float64, float32:
220 | //fmt.Println(col, v)
221 | // Format large numbers without scientific notation
222 | hasDec, err := hasDecimalPlace(v)
223 | if err != nil {
224 | fmt.Println(err)
225 | rowData = append(rowData, fmt.Sprintf("%v", value))
226 | } else if hasDec {
227 | rowData = append(rowData, fmt.Sprintf("%f", v))
228 | } else {
229 | rowData = append(rowData, fmt.Sprintf("%.f", v))
230 | }
231 | case []byte:
232 | // Convert byte slice (UTF-8 data) to a string
233 | utf8Str, err := convertToUTF8(string(v))
234 | if err != nil {
235 | fmt.Println("Failed to convert to UTF-8:", v, err)
236 | }
237 | rowData = append(rowData, strings.TrimSpace(string(utf8Str)))
238 | default:
239 | // Default formatting for other types
240 | rowData = append(rowData, fmt.Sprintf("%v", value))
241 | }
242 | }
243 | csvWriter.Write(rowData)
244 | }
245 | if err := rows.Err(); err != nil {
246 | return false, fmt.Errorf("error iterating rows: %w", err)
247 | }
248 | return true, nil
249 | }
250 |
251 | func (db *ODBC) QuerySingleRow(query string, params ...interface{}) (*map[string]interface{}, bool, error) {
252 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC)
253 | defer cancel()
254 | result := map[string]interface{}{}
255 | rows, err := db.QueryContext(ctx, query, params...)
256 | if err != nil {
257 | return nil, false, err
258 | }
259 | defer rows.Close()
260 | if rows.Next() {
261 | /*if err := rows.Scan(result); err != nil {
262 | return nil, false, err
263 | }*/
264 | result, err = ScanRowToMap(rows)
265 | if err != nil {
266 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err)
267 | }
268 | }
269 | return &result, true, err
270 | }
271 |
272 | func (db *ODBC) FromParams(params map[string]interface{}, extra_conf map[string]interface{}) (*DB, string, string, error) {
273 | return nil, "", "", fmt.Errorf("not implemented yet %s", "_")
274 | }
275 |
276 | func (db *ODBC) GetDriverName() string {
277 | return "odbc"
278 | }
279 |
280 | func (db *ODBC) GetUserByNameOrEmail(email string) (map[string]interface{}, bool, error) {
281 | return nil, false, fmt.Errorf("not implemented yet %s", "_")
282 | }
283 |
284 | func (db *ODBC) IsEmpty(value interface{}) bool {
285 | switch v := value.(type) {
286 | case nil:
287 | return true
288 | case string:
289 | return len(v) == 0
290 | case []interface{}:
291 | return len(v) == 0
292 | case map[interface{}]interface{}:
293 | return len(v) == 0
294 | default:
295 | return false
296 | }
297 | }
298 |
--------------------------------------------------------------------------------
/internal/etlx/load_requirements.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "time"
7 | )
8 |
9 | func (etlx *ETLX) LoadREQUIRES(conf map[string]any, keys ...string) ([]map[string]any, error) {
10 | key := "REQUIRES"
11 | if len(keys) > 0 && keys[0] != "" {
12 | key = keys[0]
13 | }
14 | //fmt.Println(key, dateRef)
15 | var processLogs []map[string]any
16 | start := time.Now()
17 | processLogs = append(processLogs, map[string]any{
18 | "name": key,
19 | "start_at": start,
20 | })
21 | mainDescription := ""
22 | // Define the runner as a simple function
23 | REQUIRESRunner := func(metadata map[string]any, itemKey string, item map[string]any) error {
24 | //fmt.Println(metadata, itemKey, item)
25 | // ACTIVE
26 | if active, okActive := metadata["active"]; okActive {
27 | if !active.(bool) {
28 | processLogs = append(processLogs, map[string]any{
29 | "name": fmt.Sprintf("KEY %s", key),
30 | "description": metadata["description"].(string),
31 | "start_at": time.Now(),
32 | "end_at": time.Now(),
33 | "success": true,
34 | "msg": "Deactivated",
35 | })
36 | return fmt.Errorf("dectivated %s", "")
37 | }
38 | }
39 | mainConn, _ := metadata["connection"].(string)
40 | mainDescription = metadata["description"].(string)
41 | itemMetadata, ok := item["metadata"].(map[string]any)
42 | if !ok {
43 | processLogs = append(processLogs, map[string]any{
44 | "name": fmt.Sprintf("%s->%s", key, itemKey),
45 | "description": itemMetadata["description"].(string),
46 | "start_at": time.Now(),
47 | "end_at": time.Now(),
48 | "success": true,
49 | "msg": "Missing metadata in item",
50 | })
51 | return nil
52 | }
53 | // ACTIVE
54 | if active, okActive := itemMetadata["active"]; okActive {
55 | if !active.(bool) {
56 | processLogs = append(processLogs, map[string]any{
57 | "name": fmt.Sprintf("%s->%s", key, itemKey),
58 | "description": itemMetadata["description"].(string),
59 | "start_at": time.Now(),
60 | "end_at": time.Now(),
61 | "success": true,
62 | "msg": "Deactivated",
63 | })
64 | return nil
65 | }
66 | }
67 | start3 := time.Now()
68 | _log2 := map[string]any{
69 | "name": fmt.Sprintf("%s->%s", key, itemKey),
70 | "description": itemMetadata["description"].(string),
71 | "start_at": start3,
72 | }
73 | path, okPath := itemMetadata["path"]
74 | beforeSQL, okBefore := itemMetadata["before_sql"]
75 | query, okQuery := itemMetadata["query"]
76 | column, okColumn := itemMetadata["column"]
77 | afterSQL, okAfter := itemMetadata["after_sql"]
78 | config := make(map[string]any)
79 | etl := &ETLX{Config: config, autoLogsDisabled: true}
80 | var mdConf any
81 | if okQuery && query != "" {
82 | conn, okCon := itemMetadata["connection"]
83 | if !okCon {
84 | conn = mainConn
85 | }
86 | dbConn, err := etlx.GetDB(conn.(string))
87 | if err != nil {
88 | _log2["success"] = false
89 | _log2["msg"] = fmt.Sprintf("%s -> %s ERR: connecting to %s in : %s", key, itemKey, conn, err)
90 | _log2["end_at"] = time.Now()
91 | _log2["duration"] = time.Since(start3).Seconds()
92 | processLogs = append(processLogs, _log2)
93 | return nil
94 | }
95 | defer dbConn.Close()
96 | _log2["success"] = true
97 | _log2["msg"] = fmt.Sprintf("%s -> %s CONN: Connectinon to %s successfull", key, itemKey, conn)
98 | _log2["end_at"] = time.Now()
99 | _log2["duration"] = time.Since(start3).Seconds()
100 | processLogs = append(processLogs, _log2)
101 | // QUERIES TO RUN AT BEGINING
102 | if okBefore {
103 | start3 := time.Now()
104 | _log2 := map[string]any{
105 | "name": fmt.Sprintf("%s->%s", key, itemKey),
106 | "description": itemMetadata["description"].(string),
107 | "start_at": start3,
108 | }
109 | err = etlx.ExecuteQuery(dbConn, beforeSQL, item, "", "", nil)
110 | if err != nil {
111 | _log2["success"] = false
112 | _log2["msg"] = fmt.Sprintf("%s -> %s Before error: %s", key, itemKey, err)
113 | _log2["end_at"] = time.Now()
114 | _log2["duration"] = time.Since(start3).Seconds()
115 | } else {
116 | _log2["success"] = true
117 | _log2["msg"] = fmt.Sprintf("%s -> %s Before ", key, itemKey)
118 | _log2["end_at"] = time.Now()
119 | _log2["duration"] = time.Since(start3).Seconds()
120 | }
121 | processLogs = append(processLogs, _log2)
122 | }
123 | // MAIN QUERY
124 | rows, _, err := etlx.Query(dbConn, query.(string), item, "", "", nil)
125 | // Fetch data from the database using the provided SQL query
126 | if err != nil {
127 | _log2["success"] = false
128 | _log2["msg"] = fmt.Sprintf("%s -> %s -> failed to execute get md conf query: %s", key, itemKey, err)
129 | _log2["end_at"] = time.Now()
130 | _log2["duration"] = time.Since(start3).Seconds()
131 | processLogs = append(processLogs, _log2)
132 | return nil
133 | }
134 | if len(*rows) > 0 {
135 | okConf := false
136 | if column != nil && okColumn {
137 | mdConf, okConf = (*rows)[0][column.(string)]
138 | } else {
139 | mdConf, okConf = (*rows)[0]["conf"]
140 | }
141 | if okConf && mdConf != nil {
142 | err := etl.ConfigFromMDText(mdConf.(string))
143 | if err != nil {
144 | _log2["success"] = false
145 | _log2["msg"] = fmt.Sprintf("Error parsing config string: %s", err)
146 | _log2["end_at"] = time.Now()
147 | _log2["duration"] = time.Since(start3).Seconds()
148 | processLogs = append(processLogs, _log2)
149 | return nil
150 | }
151 | } else {
152 | _log2["success"] = false
153 | _log2["msg"] = fmt.Sprintf("%s -> %s -> failed to get md conf string query: %s column %s", key, itemKey, query, column)
154 | _log2["end_at"] = time.Now()
155 | _log2["duration"] = time.Since(start3).Seconds()
156 | processLogs = append(processLogs, _log2)
157 | return nil
158 | }
159 | } else {
160 | _log2["success"] = false
161 | _log2["msg"] = fmt.Sprintf("%s -> %s -> failed to execute get md conf query: %s", key, itemKey, err)
162 | _log2["end_at"] = time.Now()
163 | _log2["duration"] = time.Since(start3).Seconds()
164 | processLogs = append(processLogs, _log2)
165 | return nil
166 | }
167 | // QUERIES TO RUN AT THE END
168 | if okAfter {
169 | start3 := time.Now()
170 | _log2 := map[string]any{
171 | "name": fmt.Sprintf("%s->%s", key, itemKey),
172 | "description": itemMetadata["description"].(string),
173 | "start_at": start3,
174 | }
175 | err = etlx.ExecuteQuery(dbConn, afterSQL, item, "", "", nil)
176 | if err != nil {
177 | _log2["success"] = false
178 | _log2["msg"] = fmt.Sprintf("%s -> %s After error: %s", key, itemKey, err)
179 | _log2["end_at"] = time.Now()
180 | _log2["duration"] = time.Since(start3).Seconds()
181 | } else {
182 | _log2["success"] = true
183 | _log2["msg"] = fmt.Sprintf("%s -> %s After ", key, itemKey)
184 | _log2["end_at"] = time.Now()
185 | _log2["duration"] = time.Since(start3).Seconds()
186 | }
187 | processLogs = append(processLogs, _log2)
188 | }
189 | } else if path != nil && okPath {
190 | if ok, _ := fileExists(path.(string)); ok {
191 | err := etl.ConfigFromFile(path.(string))
192 | if err != nil {
193 | _log2["success"] = false
194 | _log2["msg"] = fmt.Sprintf("Error parsing config: %s -> %s", path, err)
195 | _log2["end_at"] = time.Now()
196 | _log2["duration"] = time.Since(start3).Seconds()
197 | processLogs = append(processLogs, _log2)
198 | }
199 | } else {
200 | _log2["success"] = false
201 | _log2["msg"] = fmt.Sprintf("file doesn't exists: %s", path)
202 | _log2["end_at"] = time.Now()
203 | _log2["duration"] = time.Since(start3).Seconds()
204 | processLogs = append(processLogs, _log2)
205 | return nil
206 | }
207 | }
208 | //fmt.Println("LOADED ETLX CONF:", etl.Config)
209 | if len(etl.Config) == 1 && etl.Config["__order"] != nil {
210 | etlx.Config[itemKey] = map[string]any{}
211 | if okQuery && query != "" && mdConf != nil {
212 | //etlx.Config[itemKey].(map[string]any)[itemKey] = mdConf.(string)
213 | etlx.Config[itemKey] = mdConf.(string)
214 | } else if path != nil && okPath {
215 | data, err := os.ReadFile(path.(string))
216 | if err != nil {
217 | fmt.Printf("LOAD RAW FILE: failed to read file: %s", err)
218 | } else {
219 | etlx.Config[itemKey] = string(data)
220 | }
221 | }
222 | } else {
223 | for newConfKey, value := range etl.Config {
224 | if newConfKey == "metadata" || newConfKey == "__order" || newConfKey == "order" {
225 | continue
226 | }
227 | if _, ok := etlx.Config[newConfKey]; !ok {
228 | etlx.Config[newConfKey] = value
229 | } else {
230 | fmt.Println(newConfKey, "Already exists!")
231 | }
232 | }
233 | }
234 | _log2["success"] = true
235 | _log2["msg"] = "Successfully loaded!"
236 | _log2["end_at"] = time.Now()
237 | _log2["duration"] = time.Since(start3).Seconds()
238 | processLogs = append(processLogs, _log2)
239 | return nil
240 | }
241 | // Check if the input conf is nil or empty
242 | if conf == nil {
243 | conf = etlx.Config
244 | }
245 | // Process the MD KEY
246 | err := etlx.ProcessMDKey(key, conf, REQUIRESRunner)
247 | if err != nil {
248 | return processLogs, fmt.Errorf("%s failed: %v", key, err)
249 | }
250 | processLogs[0] = map[string]any{
251 | "name": key,
252 | "description": mainDescription,
253 | "start_at": processLogs[0]["start_at"],
254 | "end_at": time.Now(),
255 | "duration": time.Since(start).Seconds(),
256 | }
257 | return processLogs, nil
258 | }
259 |
--------------------------------------------------------------------------------
/examples/s3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/s3api, \"httpfs\") extension supports reading/writing/globbing files on object storage servers using the S3 API. S3 offers a standard API to read and write to remote files (while regular http servers, predating S3, do not offer a common write API). DuckDB conforms to the S3 API, that is now common among industry storage providers."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "The preferred way to configure and authenticate to S3 endpoints is to use secrets. Multiple secret providers are available"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# ETL"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "```yaml metadata\n",
29 | "name: S3_EXTRACT\n",
30 | "description: \"Example extrating from S3 to a local sqlite3 file\"\n",
31 | "connection: \"duckdb:\"\n",
32 | "active: true\n",
33 | "```"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "## train_services"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "```yaml metadata\n",
48 | "name: train_services\n",
49 | "description: \"train_services\"\n",
50 | "table: train_services\n",
51 | "load_conn: \"duckdb:\"\n",
52 | "load_before_sql:\n",
53 | " - load_extentions\n",
54 | " - attach_db\n",
55 | "load_sql: load_query\n",
56 | "load_after_sql: detach_db\n",
57 | "drop_sql: drop_sql\n",
58 | "clean_sql: clean_sql\n",
59 | "rows_sql: nrows\n",
60 | "active: true\n",
61 | "```"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "```sql\n",
69 | "-- load_extentions\n",
70 | "INSTALL Sqlite;\n",
71 | "LOAD Sqlite;\n",
72 | "INSTALL httpfs;\n",
73 | "LOAD httpfs;\n",
74 | "```"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "```sql\n",
82 | "-- attach_db\n",
83 | "ATTACH 'examples/S3_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n",
84 | "```"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "```sql\n",
92 | "-- detach_db\n",
93 | "DETACH \"DB\";\n",
94 | "```"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "```sql\n",
102 | "-- load_query\n",
103 | "CREATE OR REPLACE TABLE \"DB\".\"\" AS\n",
104 | "FROM 's3://duckdb-blobs/train_services.parquet';\n",
105 | "```"
106 | ]
107 | },
108 | {
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "```sql\n",
113 | "-- drop_sql\n",
114 | "DROP TABLE IF EXISTS \"DB\".\"\";\n",
115 | "```"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "metadata": {},
121 | "source": [
122 | "```sql\n",
123 | "-- clean_sql\n",
124 | "DELETE FROM \"DB\".\"\";\n",
125 | "```"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "```sql\n",
133 | "-- nrows\n",
134 | "SELECT COUNT(*) AS \"nrows\" FROM \"DB\".\"\"\n",
135 | "```"
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "## S3_EXTRACT"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "```yaml metadata\n",
150 | "name: S3_EXTRACT\n",
151 | "description: \"Example extrating from S3 to a local sqlite3 file\"\n",
152 | "table: S3_EXTRACT\n",
153 | "load_conn: \"duckdb:\"\n",
154 | "load_before_sql:\n",
155 | " - load_extentions\n",
156 | " - attach_db\n",
157 | " - create_S3_token\n",
158 | "load_sql: load_query\n",
159 | "load_after_sql: detach_db\n",
160 | "drop_sql: drop_sql\n",
161 | "clean_sql: clean_sql\n",
162 | "rows_sql: nrows\n",
163 | "active: true\n",
164 | "```"
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "metadata": {},
170 | "source": [
171 | "```sql\n",
172 | "-- load_extentions\n",
173 | "INSTALL httpfs;\n",
174 | "LOAD httpfs;\n",
175 | "```"
176 | ]
177 | },
178 | {
179 | "cell_type": "markdown",
180 | "metadata": {},
181 | "source": [
182 | "```sql\n",
183 | "-- attach_db\n",
184 | "ATTACH 'examples/S3_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n",
185 | "```"
186 | ]
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "metadata": {},
191 | "source": [
192 | "Example with a [Minio](https://min.io/) local instance"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "```sql\n",
200 | "-- create_S3_token\n",
201 | "CREATE SECRET S3_token (\n",
202 | " TYPE S3,\n",
203 | " KEY_ID '@S3_KEY_ID',\n",
204 | " SECRET '@S3_SECRET',\n",
205 | " ENDPOINT '127.0.0.1:3000',\n",
206 | " URL_STYLE 'path'\n",
207 | ");\n",
208 | "```"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "```sql\n",
216 | "-- detach_db\n",
217 | "DETACH \"DB\";\n",
218 | "```"
219 | ]
220 | },
221 | {
222 | "cell_type": "markdown",
223 | "metadata": {},
224 | "source": [
225 | "```sql\n",
226 | "-- load_query\n",
227 | "CREATE OR REPLACE TABLE \"DB\".\"\" AS\n",
228 | "SELECT * \n",
229 | "FROM 's3://uploads/flights.csv';\n",
230 | "```"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "```sql\n",
238 | "-- drop_sql\n",
239 | "DROP TABLE IF EXISTS \"DB\".\"\";\n",
240 | "```"
241 | ]
242 | },
243 | {
244 | "cell_type": "markdown",
245 | "metadata": {},
246 | "source": [
247 | "```sql\n",
248 | "-- clean_sql\n",
249 | "DELETE FROM \"DB\".\"\";\n",
250 | "```"
251 | ]
252 | },
253 | {
254 | "cell_type": "markdown",
255 | "metadata": {},
256 | "source": [
257 | "```sql\n",
258 | "-- nrows\n",
259 | "SELECT COUNT(*) AS \"nrows\" FROM \"DB\".\"\"\n",
260 | "```"
261 | ]
262 | },
263 | {
264 | "cell_type": "markdown",
265 | "metadata": {},
266 | "source": [
267 | "```shell\n",
268 | "bin/etlx --config examples/s3.ipynb\n",
269 | "```"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "```shell\n",
277 | "bin/etlx --config examples/s3.ipynb\n",
278 | "```"
279 | ]
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "metadata": {},
284 | "source": [
285 | "# LOGS"
286 | ]
287 | },
288 | {
289 | "cell_type": "markdown",
290 | "metadata": {},
291 | "source": [
292 | "```yaml metadata\n",
293 | "name: LOGS\n",
294 | "description: \"Example saving logs\"\n",
295 | "table: logs\n",
296 | "connection: \"duckdb:\"\n",
297 | "before_sql:\n",
298 | " - load_extentions\n",
299 | " - attach_db\n",
300 | " - get_dyn_queries[create_columns_missing]\n",
301 | "save_log_sql: load_query\n",
302 | "after_sql: detach_db\n",
303 | "active: true\n",
304 | "```"
305 | ]
306 | },
307 | {
308 | "cell_type": "markdown",
309 | "metadata": {},
310 | "source": [
311 | "```sql\n",
312 | "-- load_extentions\n",
313 | "INSTALL Sqlite;\n",
314 | "LOAD Sqlite;\n",
315 | "```"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {},
321 | "source": [
322 | "```sql\n",
323 | "-- attach_db\n",
324 | "ATTACH 'examples/S3_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n",
325 | "```"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "```sql\n",
333 | "-- detach_db\n",
334 | "DETACH \"DB\";\n",
335 | "```"
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "```sql\n",
343 | "-- load_query\n",
344 | "CREATE OR REPLACE TABLE \"DB\".\"\" AS\n",
345 | "SELECT * \n",
346 | "FROM '';\n",
347 | "```"
348 | ]
349 | },
350 | {
351 | "cell_type": "markdown",
352 | "metadata": {},
353 | "source": [
354 | "```sql\n",
355 | "-- create_columns_missing\n",
356 | "WITH source_columns AS (\n",
357 | " SELECT column_name, column_type \n",
358 | " FROM (DESCRIBE SELECT * FROM read_json(''))\n",
359 | "),\n",
360 | "destination_columns AS (\n",
361 | " SELECT column_name, data_type as column_type\n",
362 | " FROM duckdb_columns \n",
363 | " WHERE table_name = ''\n",
364 | "),\n",
365 | "missing_columns AS (\n",
366 | " SELECT s.column_name, s.column_type\n",
367 | " FROM source_columns s\n",
368 | " LEFT JOIN destination_columns d ON s.column_name = d.column_name\n",
369 | " WHERE d.column_name IS NULL\n",
370 | ")\n",
371 | "SELECT 'ALTER TABLE \"DB\".\"\" ADD COLUMN \"' || column_name || '\" ' || column_type || ';' AS query\n",
372 | "FROM missing_columns"
373 | ]
374 | },
375 | {
376 | "cell_type": "markdown",
377 | "metadata": {},
378 | "source": []
379 | }
380 | ],
381 | "metadata": {
382 | "kernelspec": {
383 | "display_name": "Python 3",
384 | "language": "python",
385 | "name": "python3"
386 | },
387 | "language_info": {
388 | "codemirror_mode": {
389 | "name": "ipython",
390 | "version": 3
391 | },
392 | "file_extension": ".py",
393 | "mimetype": "text/x-python",
394 | "name": "python",
395 | "nbconvert_exporter": "python",
396 | "pygments_lexer": "ipython3",
397 | "version": "3.11.7"
398 | }
399 | },
400 | "nbformat": 4,
401 | "nbformat_minor": 2
402 | }
403 |
--------------------------------------------------------------------------------
/internal/db/duckdb.go:
--------------------------------------------------------------------------------
1 | package db
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "fmt"
7 | "time"
8 |
9 | "github.com/realdatadriven/etlx/internal/env"
10 |
11 | _ "github.com/duckdb/duckdb-go/v2"
12 | )
13 |
14 | type DuckDB struct {
15 | *sql.DB
16 | }
17 |
18 | // ScanRowToMap converts a single row into a map[string]interface{}.
19 | func ScanRowToMap(rows *sql.Rows) (map[string]interface{}, error) {
20 | columns, err := rows.Columns()
21 | if err != nil {
22 | return nil, fmt.Errorf("failed to get columns: %w", err)
23 | }
24 | values := make([]interface{}, len(columns))
25 | valuePointers := make([]interface{}, len(columns))
26 | for i := range values {
27 | valuePointers[i] = &values[i]
28 | }
29 | if err := rows.Scan(valuePointers...); err != nil {
30 | return nil, fmt.Errorf("failed to scan row: %w", err)
31 | }
32 | rowMap := make(map[string]interface{})
33 | for i, colName := range columns {
34 | rowMap[colName] = values[i]
35 | }
36 | return rowMap, nil
37 | }
38 |
39 | func NewDuckDB(dsn string) (*DuckDB, error) {
40 | //fmt.Printf("db DRIVER: %s DSN: %s\n", driverName, dsn)
41 | db, err := sql.Open("duckdb", dsn)
42 | if err != nil {
43 | return nil, err
44 | }
45 | defaultTimeoutDuckDB = time.Duration(env.GetInt("DUCKDB_DFLT_TIMEOUT", 15)) * time.Minute
46 | //fmt.Println(driverName, dsn)
47 | db.SetMaxOpenConns(25)
48 | db.SetMaxIdleConns(25)
49 | db.SetConnMaxIdleTime(defaultTimeoutDuckDB)
50 | db.SetConnMaxLifetime(2 * time.Hour)
51 | return &DuckDB{db}, nil
52 | }
53 |
54 | func (db *DuckDB) New(dsn string) (*DuckDB, error) {
55 | //fmt.Printf("db DRIVER: %s DSN: %s\n", driverName, dsn)
56 | _db, err := sql.Open("duckdb", dsn)
57 | if err != nil {
58 | return nil, err
59 | }
60 | //fmt.Println(driverName, dsn)
61 | _db.SetMaxOpenConns(25)
62 | _db.SetMaxIdleConns(25)
63 | _db.SetConnMaxIdleTime(defaultTimeoutDuckDB)
64 | _db.SetConnMaxLifetime(2 * time.Hour)
65 | return &DuckDB{_db}, nil
66 | }
67 |
68 | func (db *DuckDB) ExecuteQuery(query string, data ...interface{}) (int, error) {
69 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB)
70 | defer cancel()
71 | result, err := db.ExecContext(ctx, query, data...)
72 | if err != nil {
73 | return 0, err
74 | }
75 | id, err := result.LastInsertId()
76 | if err != nil {
77 | return 0, err
78 | }
79 | return int(id), err
80 | }
81 |
82 | func (db *DuckDB) ExecuteQueryRowsAffected(query string, data ...interface{}) (int64, error) {
83 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB)
84 | defer cancel()
85 | result, err := db.ExecContext(ctx, query, data...)
86 | if err != nil {
87 | return 0, err
88 | }
89 | id, err := result.RowsAffected()
90 | if err != nil {
91 | return 0, err
92 | }
93 | return id, err
94 | }
95 |
96 | func (db *DuckDB) AllTables(params map[string]interface{}, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) {
97 | _query := `SELECT table_name as name FROM information_schema.tables`
98 | _query = `SHOW TABLES`
99 | // fmt.Println(_query)
100 | return db.QueryMultiRows(_query, []interface{}{}...)
101 | }
102 |
103 | func (db *DuckDB) TableSchema(params map[string]interface{}, table string, dbName string, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) {
104 | user_id := int(params["user"].(map[string]interface{})["user_id"].(float64))
105 | /*_query := fmt.Sprintf(`SELECT ROW_NUMBER() OVER () - 1 AS cid
106 | , column_name AS name
107 | , data_type AS type
108 | , CASE is_nullable WHEN 'NO' THEN 1 ELSE 0 END AS notnull
109 | , column_default AS dflt_value
110 | , CASE
111 | WHEN column_name IN (
112 | SELECT kcu.column_name
113 | FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
114 | JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
115 | ON kcu.constraint_name = tc.constraint_name
116 | WHERE tc.constraint_type = 'PRIMARY KEY' AND kcu.table_name = '%s'
117 | ) THEN 1
118 | ELSE 0
119 | END AS pk
120 | FROM information_schema.tables
121 | WHERE table_schema = 'public'
122 | AND table_name = '%s';`, table, table)*/
123 | _query := fmt.Sprintf(`PRAGMA table_info("%s")`, table)
124 | //fmt.Println(table, _query)
125 | _aux_data := []map[string]interface{}{}
126 | _aux_data_fk := map[string]interface{}{}
127 | res, _, err := db.QueryMultiRows(_query, []interface{}{}...)
128 | if err != nil {
129 | return nil, false, err
130 | }
131 | _query = fmt.Sprintf(`WITH foreign_keys AS (
132 | SELECT rc.constraint_name AS fk_name,
133 | rc.unique_constraint_name AS unique_name,
134 | kcu.table_name AS table,
135 | kcu.column_name AS "from",
136 | kcu.ordinal_position AS seq,
137 | kcu.table_name AS "to",
138 | kcu.column_name AS to_column,
139 | 'tc.delete_rule' AS on_delete,
140 | 'tc.update_rule' AS on_update
141 | FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
142 | JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu ON rc.constraint_name = kcu.constraint_name
143 | JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc ON rc.constraint_name = tc.constraint_name
144 | WHERE kcu.table_name = '%s'
145 | )
146 | SELECT ROW_NUMBER() OVER () - 1 AS id,
147 | seq,
148 | "table" AS parent_table,
149 | "from",
150 | "to",
151 | on_update,
152 | on_delete,
153 | 'NONE' AS match
154 | FROM foreign_keys;`, table)
155 | res_fk, _, err := db.QueryMultiRows(_query, []interface{}{}...)
156 | if err != nil {
157 | return nil, false, err
158 | }
159 | for _, row := range *res_fk {
160 | // fmt.Println(row)
161 | _aux_data_fk[row["from"].(string)] = map[string]interface{}{
162 | "referred_table": row["table"].(string),
163 | "referred_column": row["to"].(string),
164 | }
165 | }
166 | for _, row := range *res {
167 | fk := false
168 | var referred_table string
169 | var referred_column string
170 | if _, exists := _aux_data_fk[row["name"].(string)]; exists {
171 | fk = true
172 | referred_table = _aux_data_fk[row["name"].(string)].(map[string]interface{})["referred_table"].(string)
173 | referred_column = _aux_data_fk[row["name"].(string)].(map[string]interface{})["referred_column"].(string)
174 | }
175 | pk := false
176 | if _pk, ok := row["pk"].(bool); ok {
177 | pk = _pk
178 | } else if _pk, ok := row["pk"].(int); ok {
179 | if _pk == 1 {
180 | pk = true
181 | }
182 | }
183 | nullable := false
184 | if notnull, ok := row["notnull"].(bool); ok {
185 | nullable = notnull
186 | } else if notnull, ok := row["notnull"].(int); ok {
187 | if notnull == 0 {
188 | nullable = true
189 | }
190 | }
191 | _aux_row := map[string]interface{}{
192 | "db": dbName,
193 | "table": table,
194 | "field": row["name"].(string),
195 | "type": row["type"].(string),
196 | "comment": nil,
197 | "pk": pk,
198 | "autoincrement": nil,
199 | "nullable": nullable,
200 | "computed": nil,
201 | "default": nil,
202 | "fk": fk,
203 | "referred_table": referred_table,
204 | "referred_column": referred_column,
205 | "user_id": user_id,
206 | "created_at": time.Now(),
207 | "updated_at": time.Now(),
208 | "excluded": false,
209 | }
210 | // fmt.Println(1, row["name"].(string), _aux_row)
211 | _aux_data = append(_aux_data, _aux_row)
212 | }
213 | return &_aux_data, true, nil
214 | }
215 |
216 | func (db *DuckDB) QueryMultiRowsWithCols(query string, params ...interface{}) (*[]map[string]interface{}, []string, bool, error) {
217 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB)
218 | defer cancel()
219 | var result []map[string]interface{}
220 | rows, err := db.QueryContext(ctx, query, params...)
221 | if err != nil {
222 | return nil, nil, false, err
223 | }
224 | defer rows.Close()
225 | columns, err := rows.Columns()
226 | if err != nil {
227 | fmt.Printf("failed to get columns: %s", err)
228 | }
229 | for rows.Next() {
230 | row, err := ScanRowToMap(rows)
231 | if err != nil {
232 | return nil, nil, false, fmt.Errorf("failed to scan row to map: %w", err)
233 | }
234 | result = append(result, row)
235 | }
236 | return &result, columns, true, err
237 | }
238 |
239 | func (db *DuckDB) QueryMultiRows(query string, params ...interface{}) (*[]map[string]interface{}, bool, error) {
240 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB)
241 | defer cancel()
242 | var result []map[string]interface{}
243 | rows, err := db.QueryContext(ctx, query, params...)
244 | if err != nil {
245 | //fmt.Println(1, err)
246 | return nil, false, err
247 | }
248 | defer rows.Close()
249 | //for rows.Next() {
250 | // row := map[string]interface{}{}
251 | for rows.Next() {
252 | row, err := ScanRowToMap(rows)
253 | if err != nil {
254 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err)
255 | }
256 | result = append(result, row)
257 | }
258 | /*if err := rows.Scan(row); err != nil {
259 | return nil, false, err
260 | }*/
261 | // result = append(result, row)
262 | //}
263 | return &result, true, err
264 | }
265 |
266 | func (db *DuckDB) QueryRows(ctx context.Context, query string, params ...interface{}) (*sql.Rows, error) {
267 | return db.QueryContext(ctx, query, params...)
268 | }
269 |
270 | func (db *DuckDB) QuerySingleRow(query string, params ...interface{}) (*map[string]interface{}, bool, error) {
271 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB)
272 | defer cancel()
273 | result := map[string]interface{}{}
274 | rows, err := db.QueryContext(ctx, query, params...)
275 | if err != nil {
276 | return nil, false, err
277 | }
278 | defer rows.Close()
279 | if rows.Next() {
280 | result, err = ScanRowToMap(rows)
281 | if err != nil {
282 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err)
283 | }
284 | }
285 | return &result, true, err
286 | }
287 |
288 | func (db *DuckDB) ExecuteNamedQuery(query string, data map[string]interface{}) (int, error) {
289 | return 0, fmt.Errorf("not implemented yet %s", "_")
290 | }
291 |
292 | func (db *DuckDB) ExecuteQueryPGInsertWithLastInsertId(query string, data ...interface{}) (int, error) {
293 | return 0, fmt.Errorf("not implemented yet %s", "_")
294 | }
295 |
296 | func (db *DuckDB) FromParams(params map[string]interface{}, extra_conf map[string]interface{}) (*DB, string, string, error) {
297 | return nil, "", "", fmt.Errorf("not implemented yet %s", "_")
298 | }
299 |
300 | func (db *DuckDB) GetDriverName() string {
301 | return "duckdb"
302 | }
303 |
304 | func (db *DuckDB) GetUserByNameOrEmail(email string) (map[string]interface{}, bool, error) {
305 | return nil, false, fmt.Errorf("not implemented yet %s", "_")
306 | }
307 |
308 | func (db *DuckDB) Query2CSV(query string, csv_path string, params ...interface{}) (bool, error) {
309 | return false, fmt.Errorf("not implemented yet %s", "_")
310 | }
311 |
312 | func (db *DuckDB) IsEmpty(value interface{}) bool {
313 | switch v := value.(type) {
314 | case nil:
315 | return true
316 | case string:
317 | return len(v) == 0
318 | case []interface{}:
319 | return len(v) == 0
320 | case map[interface{}]interface{}:
321 | return len(v) == 0
322 | default:
323 | return false
324 | }
325 | }
326 |
--------------------------------------------------------------------------------
/examples/http.cs.md:
--------------------------------------------------------------------------------
1 | # ETL
2 |
3 |
4 |
5 | ```yaml metadata
6 | name: HTTP_EXTRACT
7 | description: "Example extrating from web to a local sqlite3 file"
8 | connection: "duckdb:"
9 | database: HTTP_EXTRACT.db
10 | active: true
11 | ```
12 |
13 | ## VERSION
14 |
15 | ```yaml metadata
16 | name: VERSION
17 | description: "DDB Version"
18 | table: VERSION
19 | load_conn: "duckdb:"
20 | load_before_sql: "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)"
21 | load_sql: 'CREATE OR REPLACE TABLE DB."" AS SELECT version() AS "VERSION";'
22 | load_after_sql: "DETACH DB;"
23 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB.""'
24 | active: true
25 | ```
26 |
27 | ## NYC_TAXI
28 |
29 | ```yaml metadata
30 | name: NYC_TAXI
31 | description: "Example extrating from web to a local sqlite3 file"
32 | table: NYC_TAXI
33 | load_conn: "duckdb:"
34 | load_before_sql:
35 | - load_extentions
36 | - attach_db
37 | load_sql: load_query
38 | load_after_sql: detach_db
39 | drop_sql: drop_sql
40 | clean_sql: clean_sql
41 | rows_sql: nrows
42 | active: false
43 | ```
44 |
45 | ```sql
46 | -- load_extentions
47 | INSTALL sqlite;
48 | LOAD sqlite;
49 | ```
50 |
51 | ```sql
52 | -- attach_db
53 | ATTACH 'database/HTTP_EXTRACT.db' AS "DB" (TYPE SQLITE)
54 | ```
55 |
56 | ```sql
57 | -- detach_db
58 | DETACH "DB";
59 | ```
60 |
61 | ```sql
62 | -- load_query
63 | CREATE OR REPLACE TABLE "DB"."" AS
64 | SELECT *
65 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-01.parquet';
66 | ```
67 |
68 | ```sql
69 | -- drop_sql
70 | DROP TABLE IF EXISTS "DB"."";
71 | ```
72 |
73 | ```sql
74 | -- clean_sql
75 | DELETE FROM "DB"."";
76 | ```
77 |
78 | ```sql
79 | -- nrows
80 | SELECT COUNT(*) AS "nrows" FROM "DB".""
81 | ```
82 |
83 | # DATA_QUALITY
84 |
85 | ```yaml
86 | description: "Runs some queries to check quality / validate."
87 | active: false
88 | ```
89 |
90 | ## Rule0001
91 |
92 | ```yaml
93 | name: Rule0001
94 | description: "Check if the field trip_distance from the NYC_TAXI is missing or zero"
95 | connection: "duckdb:"
96 | before_sql:
97 | - "LOAD sqlite"
98 | - "ATTACH 'database/HTTP_EXTRACT.db' AS \"DB\" (TYPE SQLITE)"
99 | query: quality_check_query
100 | fix_quality_err: fix_quality_err_query
101 | column: total_reg_with_err # Defaults to 'total'.
102 | check_only: true
103 | fix_only: false
104 | after_sql: "DETACH DB"
105 | active: true
106 | ```
107 |
108 | ```sql
109 | -- quality_check_query
110 | SELECT COUNT(*) AS "total_reg_with_err"
111 | FROM "DB"."NYC_TAXI"
112 | WHERE "trip_distance" IS NULL
113 | OR "trip_distance" = 0;
114 | ```
115 |
116 | ```sql
117 | -- fix_quality_err_query
118 | UPDATE "DB"."NYC_TAXI"
119 | SET "trip_distance" = "trip_distance"
120 | WHERE "trip_distance" IS NULL
121 | OR "trip_distance" = 0;
122 | ```
123 |
124 | # MULTI_QUERIES
125 |
126 | ```yaml
127 | description: "Define multiple structured queries combined with UNION."
128 | connection: "duckdb:"
129 | before_sql:
130 | - "LOAD sqlite"
131 | - "ATTACH 'database/HTTP_EXTRACT.db' AS \"DB\" (TYPE SQLITE)"
132 | save_sql: save_mult_query_res
133 | save_on_err_patt: '(?i)table.+with.+name.+(\w+).+does.+not.+exist'
134 | save_on_err_sql: create_mult_query_res
135 | after_sql: "DETACH DB"
136 | union_key: "UNION ALL\n" # Defaults to UNION.
137 | active: false
138 | ```
139 |
140 | ```sql
141 | -- save_mult_query_res
142 | INSERT INTO "DB"."MULTI_QUERY" BY NAME
143 | [[final_query]]
144 | ```
145 |
146 | ```sql
147 | -- create_mult_query_res
148 | CREATE OR REPLACE TABLE "DB"."MULTI_QUERY" AS
149 | [[final_query]]
150 | ```
151 |
152 | ## Row1
153 |
154 | ```yaml
155 | name: Row1
156 | description: "Row 1"
157 | query: row_query
158 | active: true
159 | ```
160 |
161 | ```sql
162 | -- row_query
163 | SELECT '# number of rows' AS "variable", COUNT(*) AS "value"
164 | FROM "DB"."NYC_TAXI"
165 | ```
166 |
167 | ## Row2
168 |
169 | ```yaml
170 | name: Row2
171 | description: "Row 2"
172 | query: row_query
173 | active: true
174 | ```
175 |
176 | ```sql
177 | -- row_query
178 | SELECT 'total revenue' AS "variable", SUM("total_amount") AS "value"
179 | FROM "DB"."NYC_TAXI"
180 | ```
181 |
182 | ## Row3
183 |
184 | ```yaml
185 | name: Row3
186 | description: "Row 3"
187 | query: row_query
188 | active: true
189 | ```
190 |
191 | ```sql
192 | -- row_query
193 | SELECT *
194 | FROM (
195 | SELECT "DOLocationID" AS "variable", SUM("total_amount") AS "value"
196 | FROM "DB"."NYC_TAXI"
197 | GROUP BY "DOLocationID"
198 | ORDER BY "DOLocationID"
199 | ) AS "T"
200 | ```
201 |
202 | # EXPORTS
203 |
204 | Exports data to files.
205 |
206 | ```yaml metadata
207 | name: DailyReports
208 | description: "Daily reports"
209 | connection: "duckdb:"
210 | path: "static/uploads/tmp"
211 | active: true
212 | ```
213 |
214 | ## CSV_EXPORT
215 |
216 | ```yaml metadata
217 | name: CSV_EXPORT
218 | description: "Export data to CSV"
219 | connection: "duckdb:"
220 | before_sql:
221 | - "INSTALL sqlite"
222 | - "LOAD sqlite"
223 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)"
224 | export_sql: export
225 | after_sql: "DETACH DB"
226 | path: 'nyc_taxy_YYYYMMDD.csv'
227 | tmp_prefix: 'tmp'
228 | active: false
229 | ```
230 |
231 | ```sql
232 | -- export
233 | COPY (
234 | SELECT *
235 | FROM "DB"."NYC_TAXI"
236 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}'
237 | LIMIT 100
238 | ) TO '' (FORMAT 'csv', HEADER TRUE);
239 | ```
240 |
241 | ## XLSX_EXPORT
242 |
243 | ```yaml metadata
244 | name: XLSX_EXPORT
245 | description: "Export data to Excel file"
246 | connection: "duckdb:"
247 | before_sql:
248 | - "INSTALL sqlite"
249 | - "LOAD sqlite"
250 | - "INSTALL excel"
251 | - "LOAD excel"
252 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)"
253 | export_sql: xl_export
254 | after_sql: "DETACH DB"
255 | path: 'nyc_taxy_YYYYMMDD.xlsx'
256 | tmp_prefix: 'tmp'
257 | active: false
258 | ```
259 |
260 | ```sql
261 | -- xl_export
262 | COPY (
263 | SELECT *
264 | FROM "DB"."NYC_TAXI"
265 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}'
266 | LIMIT 100
267 | ) TO '' (FORMAT XLSX, HEADER TRUE, SHEET 'NYC');
268 | ```
269 |
270 | ## XLSX_TMPL
271 |
272 | ```yaml metadata
273 | name: XLSX_TMPL
274 | description: "Export data to Excel template"
275 | connection: "duckdb:"
276 | before_sql:
277 | - "INSTALL sqlite"
278 | - "LOAD sqlite"
279 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)"
280 | after_sql: "DETACH DB"
281 | tmp_prefix: 'tmp'
282 | template: "../nyc_taxy_YYYYMMDD.xlsx"
283 | path: "nyc_taxy_YYYYMMDD.xlsx"
284 | mapping:
285 | - sheet: resume
286 | range: A2
287 | sql: resume
288 | type: value
289 | key: total
290 | - sheet: detail
291 | range: A1
292 | sql: detail
293 | type: range
294 | header: true
295 | active: true
296 | ```
297 |
298 | ```sql
299 | -- resume
300 | SELECT COUNT(*) AS "total"
301 | FROM "DB"."NYC_TAXI"
302 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}'
303 | ```
304 |
305 | ```sql
306 | -- detail2
307 | SELECT *
308 | FROM "DB"."NYC_TAXI"
309 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}'
310 | LIMIT 100
311 | ```
312 |
313 | ```sql
314 | -- detail
315 | pivot (select * from "DB"."NYC_TAXI") as t
316 | on strftime("tpep_pickup_datetime"::datetime, '%d')
317 | using sum(total_amount) AS total, count(*) AS total_trips
318 | group by PULocationID
319 | ```
320 |
321 |
322 | ```sql
323 | -- data_to_export
324 | SELECT *
325 | FROM "DB"."NYC_TAXI"
326 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}'
327 | LIMIT 100
328 | ```
329 |
330 | # LOGS
331 |
332 | ```yaml metadata
333 | name: LOGS
334 | description: "Example saving logs"
335 | table: etlx_logs
336 | connection: "duckdb:"
337 | before_sql:
338 | - load_extentions
339 | - attach_db
340 | - 'USE DB;'
341 | save_log_sql: load_logs
342 | save_on_err_patt: '(?i)table.+does.+not.+exist|does.+not.+have.+column.+with.+name'
343 | save_on_err_sql:
344 | - create_logs
345 | - get_dyn_queries[create_columns_missing]
346 | - load_logs
347 | after_sql:
348 | - 'USE memory;'
349 | - detach_db
350 | tmp_dir: /tmp
351 | active: true
352 | ```
353 |
354 | ```sql
355 | -- load_extentions
356 | INSTALL Sqlite;
357 | LOAD Sqlite;
358 | INSTALL json;
359 | LOAD json;
360 | ```
361 |
362 | ```sql
363 | -- attach_db
364 | ATTACH 'database/HTTP_EXTRACT.db' AS "DB" (TYPE SQLITE)
365 | ```
366 |
367 | ```sql
368 | -- detach_db
369 | DETACH "DB";
370 | ```
371 |
372 | ```sql
373 | -- load_logs
374 | INSERT INTO "DB"."" BY NAME
375 | SELECT *
376 | FROM read_json('');
377 | ```
378 |
379 | ```sql
380 | -- create_logs
381 | CREATE TABLE IF NOT EXISTS "DB"."" AS
382 | SELECT *
383 | FROM read_json('');
384 | ```
385 |
386 | ```sql
387 | -- create_columns_missing
388 | WITH source_columns AS (
389 | SELECT column_name, column_type
390 | FROM (DESCRIBE SELECT * FROM read_json(''))
391 | ),
392 | destination_columns AS (
393 | SELECT column_name, data_type as column_type
394 | FROM duckdb_columns
395 | WHERE table_name = ''
396 | ),
397 | missing_columns AS (
398 | SELECT s.column_name, s.column_type
399 | FROM source_columns s
400 | LEFT JOIN destination_columns d ON s.column_name = d.column_name
401 | WHERE d.column_name IS NULL
402 | )
403 | SELECT 'ALTER TABLE "DB"."" ADD COLUMN "' || column_name || '" ' || column_type || ';' AS query
404 | FROM missing_columns;
405 | ```
406 |
407 | # NOTIFY
408 |
409 | ```yaml metadata
410 | name: Notefication
411 | description: "Notefication"
412 | connection: "duckdb:"
413 | path: "examples"
414 | active: false
415 | ```
416 |
417 | ## ETL_STATUS
418 |
419 | ```yaml metadata
420 | name: ETL_STATUS
421 | description: "ETL Satus"
422 | connection: "duckdb:"
423 | before_sql:
424 | - "INSTALL sqlite"
425 | - "LOAD sqlite"
426 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)"
427 | data_sql:
428 | - logs
429 | after_sql: "DETACH DB"
430 | to:
431 | - real.datadriven@gmail.com
432 | cc: null
433 | bcc: null
434 | subject: 'ETLX YYYYMMDD'
435 | body: body_tml
436 | attachments_:
437 | - hf.md
438 | - http.md
439 | active: true
440 | ```
441 |
442 | ```html body_tml
443 | Good Morning
444 | This email is gebnerated by ETLX automatically!
445 | {{ with .logs }}
446 | {{ if eq .success true }}
447 |
448 |
449 | | Name |
450 | Ref |
451 | Start |
452 | End |
453 | Duration |
454 | Success |
455 | Message |
456 |
457 | {{ range .data }}
458 |
459 | | {{ .name }} |
460 | {{ .ref }} |
461 | {{ .start_at }} |
462 | {{ .end_at }} |
463 | {{ .duration }} |
464 | {{ .success }} |
465 | {{ .msg }} |
466 |
467 | {{ else }}
468 |
469 | | No items available |
470 |
471 | {{ end }}
472 |
473 | {{ else }}
474 | {{.msg}}
475 | {{ end }}
476 | {{ else }}
477 | Logs information missing.
478 | {{ end }}
479 | ```
480 |
481 | ```sql
482 | -- logs
483 | SELECT *
484 | FROM "DB"."etlx_logs"
485 | WHERE "ref" = '{YYYY-MM-DD}'
486 | ```
487 |
--------------------------------------------------------------------------------
/internal/etlx/run_notify.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "path/filepath"
7 | "time"
8 | )
9 |
10 | func (etlx *ETLX) RunNOTIFY(dateRef []time.Time, conf map[string]any, extraConf map[string]any, keys ...string) ([]map[string]any, error) {
11 | key := "NOTIFY"
12 | if len(keys) > 0 && keys[0] != "" {
13 | key = keys[0]
14 | }
15 | //fmt.Println(key, dateRef)
16 | var processLogs []map[string]any
17 | start := time.Now()
18 | processLogs = append(processLogs, map[string]any{
19 | "name": key,
20 | "key": key, "start_at": start,
21 | })
22 | mainDescription := ""
23 | // Define the runner as a simple function
24 | NOTIFYRunner := func(metadata map[string]any, itemKey string, item map[string]any) error {
25 | //fmt.Println(metadata, itemKey, item)
26 | // ACTIVE
27 | if active, okActive := metadata["active"]; okActive {
28 | if !active.(bool) {
29 | processLogs = append(processLogs, map[string]any{
30 | "name": fmt.Sprintf("KEY %s", key),
31 | "description": metadata["description"].(string),
32 | "key": key, "item_key": itemKey, "start_at": time.Now(),
33 | "end_at": time.Now(),
34 | "success": true,
35 | "msg": "Deactivated",
36 | })
37 | return fmt.Errorf("deactivated %s", "")
38 | }
39 | }
40 | // MAIN PATH
41 | mainPath, okMainPath := metadata["path"].(string)
42 | mainConn, _ := metadata["connection"].(string)
43 | mainDescription = metadata["description"].(string)
44 | itemMetadata, ok := item["metadata"].(map[string]any)
45 | if !ok {
46 | processLogs = append(processLogs, map[string]any{
47 | "name": fmt.Sprintf("%s->%s", key, itemKey),
48 | "description": itemMetadata["description"].(string),
49 | "key": key, "item_key": itemKey, "start_at": time.Now(),
50 | "end_at": time.Now(),
51 | "success": true,
52 | "msg": "Missing metadata in item",
53 | })
54 | return nil
55 | }
56 | // ACTIVE
57 | if active, okActive := itemMetadata["active"]; okActive {
58 | if !active.(bool) {
59 | processLogs = append(processLogs, map[string]any{
60 | "name": fmt.Sprintf("%s->%s", key, itemKey),
61 | "description": itemMetadata["description"].(string),
62 | "key": key, "item_key": itemKey, "start_at": time.Now(),
63 | "end_at": time.Now(),
64 | "success": true,
65 | "msg": "Deactivated",
66 | })
67 | return nil
68 | }
69 | }
70 | beforeSQL, okBefore := itemMetadata["before_sql"]
71 | dataSQL, okData := itemMetadata["data_sql"]
72 | afterSQL, okAfter := itemMetadata["after_sql"]
73 | conn, okCon := itemMetadata["connection"]
74 | if !okCon {
75 | conn = mainConn
76 | }
77 | dtRef, okDtRef := itemMetadata["date_ref"]
78 | if okDtRef && dtRef != "" {
79 | _dt, err := time.Parse("2006-01-02", dtRef.(string))
80 | if err == nil {
81 | dateRef = append([]time.Time{}, _dt)
82 | }
83 | } else {
84 | if len(dateRef) > 0 {
85 | dtRef = dateRef[0].Format("2006-01-02")
86 | }
87 | }
88 | start3 := time.Now()
89 | _log2 := map[string]any{
90 | "name": fmt.Sprintf("%s->%s", key, itemKey),
91 | "description": itemMetadata["description"].(string),
92 | "key": key, "item_key": itemKey, "start_at": start3,
93 | "ref": dtRef,
94 | }
95 | dbConn, err := etlx.GetDB(conn.(string))
96 | if err != nil {
97 | _log2["success"] = false
98 | _log2["msg"] = fmt.Sprintf("%s -> %s ERR: connecting to %s in : %s", key, itemKey, conn, err)
99 | _log2["end_at"] = time.Now()
100 | _log2["duration"] = time.Since(start3).Seconds()
101 | processLogs = append(processLogs, _log2)
102 | return nil
103 | }
104 | defer dbConn.Close()
105 | _log2["success"] = true
106 | _log2["msg"] = fmt.Sprintf("%s -> %s CONN: connection to %s successfull", key, itemKey, conn)
107 | _log2["end_at"] = time.Now()
108 | _log2["duration"] = time.Since(start3).Seconds()
109 | processLogs = append(processLogs, _log2)
110 | // FILE
111 | table := itemMetadata["name"].(string)
112 | path, okPath := itemMetadata["path"].(string)
113 | if !okPath {
114 | if okMainPath {
115 | var pth any = mainPath
116 | itemMetadata["path"] = pth
117 | }
118 | }
119 | fname := fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, os.TempDir(), table)
120 | if okPath && path != "" {
121 | fname = path
122 | if filepath.IsAbs(fname) {
123 | } else if filepath.IsLocal(fname) {
124 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname)
125 | } else if filepath.Dir(fname) != "" && okMainPath && mainPath != "" {
126 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname)
127 | }
128 | } else if okMainPath && mainPath != "" {
129 | fname = fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, mainPath, table)
130 | }
131 | // QUERIES TO RUN AT beginning
132 | if okBefore {
133 | start3 := time.Now()
134 | _log2 := map[string]any{
135 | "name": fmt.Sprintf("%s->%s", key, itemKey),
136 | "description": itemMetadata["description"].(string),
137 | "key": key, "item_key": itemKey, "start_at": start3,
138 | }
139 | err = etlx.ExecuteQuery(dbConn, beforeSQL, item, fname, "", dateRef)
140 | if err != nil {
141 | _log2["success"] = false
142 | _log2["msg"] = fmt.Sprintf("%s -> %s Before error: %s", key, itemKey, err)
143 | _log2["end_at"] = time.Now()
144 | _log2["duration"] = time.Since(start3).Seconds()
145 | } else {
146 | _log2["success"] = true
147 | _log2["msg"] = fmt.Sprintf("%s -> %s Before ", key, itemKey)
148 | _log2["end_at"] = time.Now()
149 | _log2["duration"] = time.Since(start3).Seconds()
150 | }
151 | processLogs = append(processLogs, _log2)
152 | }
153 | // CHECK CONDITION
154 | condition, okCondition := itemMetadata["condition"].(string)
155 | condMsg, okCondMsg := itemMetadata["condition_msg"].(string)
156 | failedCondition := false
157 | if okCondition && condition != "" {
158 | cond, err := etlx.ExecuteCondition(dbConn, condition, itemMetadata, fname, "", dateRef)
159 | if err != nil {
160 | _log2["success"] = false
161 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, err)
162 | _log2["end_at"] = time.Now()
163 | _log2["duration"] = time.Since(start3).Seconds()
164 | processLogs = append(processLogs, _log2)
165 | //return fmt.Errorf("%s", _log2["msg"])
166 | failedCondition = true
167 | } else if !cond {
168 | _log2["success"] = false
169 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed the condition %s was not met!", key, itemKey, condition)
170 | _log2["end_at"] = time.Now()
171 | _log2["duration"] = time.Since(start3).Seconds()
172 | if okCondMsg && condMsg != "" {
173 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, etlx.SetQueryPlaceholders(condMsg, table, fname, dateRef))
174 | }
175 | processLogs = append(processLogs, _log2)
176 | // return fmt.Errorf("%s", _log2["msg"])
177 | failedCondition = true
178 | }
179 | }
180 | data := map[string]any{}
181 | // MAIN QUERIES
182 | if okData && !failedCondition {
183 | start3 := time.Now()
184 | _log2 := map[string]any{
185 | "name": fmt.Sprintf("%s->%s", key, itemKey),
186 | "description": itemMetadata["description"].(string),
187 | "key": key, "item_key": itemKey, "start_at": start3,
188 | }
189 | switch _map := dataSQL.(type) {
190 | case string:
191 | sql := _map
192 | if _, ok := item[_map]; ok {
193 | sql = item[sql].(string)
194 | }
195 | sql = etlx.SetQueryPlaceholders(sql, table, fname, dateRef)
196 | rows, _, err := etlx.Query(dbConn, sql, item, fname, "", dateRef)
197 | if err != nil {
198 | data[_map] = map[string]any{
199 | "success": false,
200 | "msg": fmt.Sprintf("failed to execute map query %s %s", _map, err),
201 | "data": []map[string]any{},
202 | }
203 | } else {
204 | data[_map] = map[string]any{
205 | "success": true,
206 | "data": *rows,
207 | }
208 | }
209 | case []any:
210 | for _, _sql := range dataSQL.([]any) {
211 | sql := _sql.(string)
212 | if _, ok := item[_sql.(string)]; ok {
213 | sql = item[_sql.(string)].(string)
214 | }
215 | sql = etlx.SetQueryPlaceholders(sql, table, fname, dateRef)
216 | rows, _, err := etlx.Query(dbConn, sql, item, fname, "", dateRef)
217 | if err != nil {
218 | data[_sql.(string)] = map[string]any{
219 | "success": false,
220 | "msg": fmt.Sprintf("failed to execute map query %s %s", _map, err),
221 | "data": []map[string]any{},
222 | }
223 | } else {
224 | data[_sql.(string)] = map[string]any{
225 | "success": true,
226 | "data": *rows,
227 | }
228 | }
229 | }
230 | default:
231 | _log2["success"] = false
232 | _log2["msg"] = fmt.Sprintf("%s -> %s invalid queries data type: %T", key, itemKey, _map)
233 | _log2["end_at"] = time.Now()
234 | _log2["duration"] = time.Since(start3).Seconds()
235 | }
236 | if _, ok := itemMetadata["data"].(map[string]any); ok {
237 | for key, d := range data {
238 | itemMetadata["data"].(map[string]any)[key] = d
239 | }
240 | } else {
241 | itemMetadata["data"] = data
242 | }
243 | itemMetadata["subject"] = etlx.SetQueryPlaceholders(itemMetadata["subject"].(string), table, fname, dateRef)
244 | body, ok := item[itemMetadata["body"].(string)].(string)
245 | if ok {
246 | itemMetadata["body"] = body
247 | }
248 | //itemMetadata["body"] = etlx.SetQueryPlaceholders(itemMetadata["body"].(string), table, fname, dateRef)
249 | attachments, okAtt := itemMetadata["attachments"].([]any)
250 | atts := []any{}
251 | var aux_att any
252 | if okAtt {
253 | for _, att := range attachments {
254 | aux_att = etlx.SetQueryPlaceholders(att.(string), table, fname, dateRef)
255 | // fmt.Println("ATT:", aux_att)
256 | atts = append(atts, aux_att)
257 | }
258 | itemMetadata["attachments"] = atts
259 | }
260 | err := etlx.SendEmail(itemMetadata)
261 | if err != nil {
262 | _log2["success"] = false
263 | _log2["msg"] = fmt.Sprintf("%s -> %s err sending email: %s", key, itemKey, err)
264 | _log2["end_at"] = time.Now()
265 | _log2["duration"] = time.Since(start3).Seconds()
266 | } else {
267 | _log2["success"] = true
268 | _log2["msg"] = fmt.Sprintf("%s -> %s Notefication sent!", key, itemKey)
269 | _log2["end_at"] = time.Now()
270 | _log2["duration"] = time.Since(start3).Seconds()
271 | }
272 | //fmt.Println(key, _log2["msg"])
273 | processLogs = append(processLogs, _log2)
274 | }
275 | // QUERIES TO RUN AT THE END
276 | if okAfter {
277 | start3 := time.Now()
278 | _log2 := map[string]any{
279 | "name": fmt.Sprintf("%s->%s", key, itemKey),
280 | "description": itemMetadata["description"].(string),
281 | "key": key, "item_key": itemKey, "start_at": start3,
282 | }
283 | err = etlx.ExecuteQuery(dbConn, afterSQL, item, fname, "", dateRef)
284 | if err != nil {
285 | _log2["success"] = false
286 | _log2["msg"] = fmt.Sprintf("%s -> %s After error: %s", key, itemKey, err)
287 | _log2["end_at"] = time.Now()
288 | _log2["duration"] = time.Since(start3).Seconds()
289 | } else {
290 | _log2["success"] = true
291 | _log2["msg"] = fmt.Sprintf("%s -> %s After ", key, itemKey)
292 | _log2["end_at"] = time.Now()
293 | _log2["duration"] = time.Since(start3).Seconds()
294 | }
295 | processLogs = append(processLogs, _log2)
296 | }
297 | // fmt.Println(processLogs)
298 | return nil
299 | }
300 | // Check if the input conf is nil or empty
301 | if conf == nil {
302 | conf = etlx.Config
303 | }
304 | // Process the MD KEY
305 | err := etlx.ProcessMDKey(key, conf, NOTIFYRunner)
306 | if err != nil {
307 | return processLogs, fmt.Errorf("%s failed: %v", key, err)
308 | }
309 | processLogs[0] = map[string]any{
310 | "name": key,
311 | "description": mainDescription,
312 | "key": key, "start_at": processLogs[0]["start_at"],
313 | "end_at": time.Now(),
314 | "duration": time.Since(start).Seconds(),
315 | }
316 | return processLogs, nil
317 | }
318 |
--------------------------------------------------------------------------------
/cmd/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "flag"
5 | "fmt"
6 | "log"
7 | "os"
8 | "strings"
9 | "time"
10 |
11 | "github.com/realdatadriven/etlx"
12 | )
13 |
14 | func main() {
15 | etlx.LoadDotEnv()
16 | // Config file path
17 | filePath := flag.String("config", "config.md", "Config File")
18 | // date of reference
19 | date_ref := flag.String("date", time.Now().AddDate(0, 0, -1).Format("2006-01-02"), "Date Reference format YYYY-MM-DD")
20 | // to skip
21 | skip := flag.String("skip", "", "The keys to skip")
22 | // to skip
23 | only := flag.String("only", "", "The only keys to run")
24 | // to steps
25 | steps := flag.String("steps", "", "The steps to run")
26 | // extrat from a file
27 | file := flag.String("file", "", "The file to extract data from, the flag shoud be used in combination with the only appointing to the ETL key the data is meant to")
28 | // To clean / delete data (execute clean_sql on every item)
29 | clean := flag.Bool("clean", false, "To clean data (execute clean_sql on every item, conditioned by only and skip)")
30 | // To drop the table (execute drop_sql on every item condition by only and skip)
31 | drop := flag.Bool("drop", false, "To drop the table (execute drop_sql on every item, conditioned by only and skip)")
32 | // To get number of rows in the table (execute rows_sql on every item, conditioned by only and skip)
33 | rows := flag.Bool("rows", false, "To get number of rows in the table (execute rows_sql on every item, conditioned by only and skip)")
34 | flag.Parse()
35 | config := make(map[string]any)
36 | // Parse the file content
37 | etlxlib := &etlx.ETLX{Config: config}
38 | err := etlxlib.ConfigFromFile(*filePath)
39 | if err != nil {
40 | log.Fatalf("Error parsing Markdown: %v", err)
41 | }
42 | if _, ok := etlxlib.Config["REQUIRES"]; ok {
43 | _logs, err := etlxlib.LoadREQUIRES(nil)
44 | if err != nil {
45 | fmt.Printf("REQUIRES ERR: %v\n", err)
46 | }
47 | for _, _log := range _logs {
48 | fmt.Println(_log["start_at"], _log["end_at"], _log["duration"], _log["name"], _log["success"], _log["msg"], _log["rows"])
49 | }
50 | }
51 | // Print the parsed configuration
52 | if os.Getenv("ETLX_DEBUG_QUERY") == "true" {
53 | etlxlib.PrintConfigAsJSON(etlxlib.Config)
54 | }
55 | /*/ Walk through the data and process each key-value pair
56 | etlxlib.Walk(etlxlib.Config, "", func(keyPath string, value any) {
57 | fmt.Printf("Key: %s, Value: %v\n", keyPath, value)
58 | if reflect.TypeOf(value).Kind() != reflect.Map {
59 | fmt.Printf("Key: %s, Value: %v\n", keyPath, value)
60 | } else {
61 | fmt.Printf("Entering: %s\n", keyPath)
62 | }
63 | })*/
64 | var dateRef []time.Time
65 | _dt, _ := time.Parse("2006-01-02", *date_ref)
66 | dateRef = append(dateRef, _dt)
67 | // fmt.Println("date_ref:", *date_ref, dateRef)
68 | extraConf := map[string]any{
69 | "clean": *clean,
70 | "drop": *drop,
71 | "rows": *rows,
72 | "file": *file,
73 | }
74 | if *only != "" {
75 | extraConf["only"] = strings.Split(*only, ",")
76 | }
77 | if *skip != "" {
78 | extraConf["skip"] = strings.Split(*skip, ",")
79 | }
80 | if *steps != "" {
81 | extraConf["steps"] = strings.Split(*steps, ",")
82 | }
83 | logs := []map[string]any{}
84 | // RUN ETL
85 | if _, ok := etlxlib.Config["ETL"]; ok {
86 | _logs, err := etlxlib.RunETL(dateRef, nil, extraConf)
87 | if err != nil {
88 | fmt.Printf("ETL ERR: %v\n", err)
89 | } else {
90 | // LOGS
91 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
92 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
93 | if err != nil {
94 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
95 | }
96 | }
97 | logs = append(logs, _logs...)
98 | }
99 | }
100 | // DATA_QUALITY
101 | if _, ok := etlxlib.Config["DATA_QUALITY"]; ok {
102 | _logs, err := etlxlib.RunDATA_QUALITY(dateRef, nil, extraConf)
103 | if err != nil {
104 | fmt.Printf("DATA_QUALITY ERR: %v\n", err)
105 | } else {
106 | // LOGS
107 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
108 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
109 | if err != nil {
110 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
111 | }
112 | }
113 | logs = append(logs, _logs...)
114 | }
115 | }
116 | // EXPORTS
117 | if _, ok := etlxlib.Config["EXPORTS"]; ok {
118 | _logs, err := etlxlib.RunEXPORTS(dateRef, nil, extraConf)
119 | if err != nil {
120 | fmt.Printf("EXPORTS ERR: %v\n", err)
121 | } else {
122 | // LOGS
123 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
124 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
125 | if err != nil {
126 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
127 | }
128 | }
129 | logs = append(logs, _logs...)
130 | }
131 | }
132 | // SCRIPTS
133 | if _, ok := etlxlib.Config["SCRIPTS"]; ok {
134 | _logs, err := etlxlib.RunSCRIPTS(dateRef, nil, extraConf)
135 | if err != nil {
136 | fmt.Printf("SCRIPTS ERR: %v\n", err)
137 | } else {
138 | // LOGS
139 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
140 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
141 | if err != nil {
142 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
143 | }
144 | }
145 | logs = append(logs, _logs...)
146 | }
147 | }
148 | // MULTI_QUERIES
149 | if _, ok := etlxlib.Config["MULTI_QUERIES"]; ok {
150 | _logs, _, err := etlxlib.RunMULTI_QUERIES(dateRef, nil, extraConf)
151 | if err != nil {
152 | fmt.Printf("MULTI_QUERIES ERR: %v\n", err)
153 | } else {
154 | // LOGS
155 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
156 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
157 | if err != nil {
158 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
159 | }
160 | }
161 | logs = append(logs, _logs...)
162 | }
163 | }
164 | // ACTIONS
165 | if _, ok := etlxlib.Config["ACTIONS"]; ok {
166 | _logs, err := etlxlib.RunACTIONS(dateRef, nil, extraConf)
167 | if err != nil {
168 | fmt.Printf("ACTIONS ERR: %v\n", err)
169 | } else {
170 | // LOGS
171 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
172 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
173 | if err != nil {
174 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
175 | }
176 | }
177 | logs = append(logs, _logs...)
178 | }
179 | }
180 | // LOGS
181 | if _, ok := etlxlib.Config["LOGS"]; ok {
182 | _logs, err := etlxlib.RunLOGS(dateRef, nil, logs)
183 | if err != nil {
184 | fmt.Printf("LOGS ERR: %v\n", err)
185 | } else {
186 | // LOGS
187 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
188 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
189 | if err != nil {
190 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
191 | }
192 | }
193 | }
194 | }
195 | // NOTIFY
196 | if _, ok := etlxlib.Config["NOTIFY"]; ok {
197 | _logs, err := etlxlib.RunNOTIFY(dateRef, nil, extraConf)
198 | if err != nil {
199 | fmt.Printf("LOGS ERR: %v\n", err)
200 | } else {
201 | // LOGS
202 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
203 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
204 | if err != nil {
205 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
206 | }
207 | }
208 | }
209 | }
210 |
211 | _keys := []string{"NOTIFY", "LOGS", "SCRIPTS", "MULTI_QUERIES", "EXPORTS", "DATA_QUALITY", "ETL", "ACTIONS", "AUTO_LOGS", "REQUIRES"}
212 | __order, ok := etlxlib.Config["__order"].([]string)
213 | hasOrderedKeys := false
214 | if !ok {
215 | __order2, ok := etlxlib.Config["__order"].([]any)
216 | if ok {
217 | hasOrderedKeys = true
218 | __order = []string{}
219 | for _, key := range __order2 {
220 | __order = append(__order, key.(string))
221 | }
222 | }
223 | } else {
224 | hasOrderedKeys = true
225 | }
226 | // fmt.Println("LEVEL 1 H:", __order, len(__order))
227 | if !hasOrderedKeys {
228 | } else if len(__order) > 0 {
229 | //fmt.Print("LEVEL 1 H:", __order)
230 | for _, key := range __order {
231 | if !etlxlib.Contains(_keys, any(key)) {
232 | _key_conf, ok := etlxlib.Config[key].(map[string]any)
233 | if !ok {
234 | continue
235 | }
236 | _key_conf_metadata, ok := _key_conf["metadata"].(map[string]any)
237 | if !ok {
238 | continue
239 | }
240 | if runs_as, ok := _key_conf_metadata["runs_as"]; ok {
241 | fmt.Printf("%s RUN AS %s:\n", key, runs_as)
242 | if etlxlib.Contains(_keys, runs_as) {
243 | switch runs_as {
244 | case "ETL":
245 | _logs, err := etlxlib.RunETL(dateRef, nil, extraConf, key)
246 | if err != nil {
247 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
248 | } else {
249 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
250 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
251 | if err != nil {
252 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
253 | }
254 | }
255 | logs = append(logs, _logs...)
256 | }
257 | case "DATA_QUALITY":
258 | _logs, err := etlxlib.RunDATA_QUALITY(dateRef, nil, extraConf, key)
259 | if err != nil {
260 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
261 | } else {
262 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
263 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
264 | if err != nil {
265 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
266 | }
267 | }
268 | logs = append(logs, _logs...)
269 | }
270 | case "MULTI_QUERIES":
271 | _logs, _, err := etlxlib.RunMULTI_QUERIES(dateRef, nil, extraConf, key)
272 | if err != nil {
273 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
274 | } else {
275 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
276 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
277 | if err != nil {
278 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
279 | }
280 | }
281 | logs = append(logs, _logs...)
282 | }
283 | case "EXPORTS":
284 | _logs, err := etlxlib.RunEXPORTS(dateRef, nil, extraConf, key)
285 | if err != nil {
286 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
287 | } else {
288 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
289 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
290 | if err != nil {
291 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
292 | }
293 | }
294 | logs = append(logs, _logs...)
295 | }
296 | case "NOTIFY":
297 | _logs, err := etlxlib.RunNOTIFY(dateRef, nil, extraConf, key)
298 | if err != nil {
299 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
300 | } else {
301 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
302 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
303 | if err != nil {
304 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
305 | }
306 | }
307 | logs = append(logs, _logs...)
308 | }
309 | case "ACTIONS":
310 | _logs, err := etlxlib.RunACTIONS(dateRef, nil, extraConf, key)
311 | if err != nil {
312 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
313 | } else {
314 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
315 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
316 | if err != nil {
317 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
318 | }
319 | }
320 | logs = append(logs, _logs...)
321 | }
322 | case "SCRIPTS":
323 | _logs, err := etlxlib.RunSCRIPTS(dateRef, nil, extraConf, key)
324 | if err != nil {
325 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
326 | } else {
327 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
328 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
329 | if err != nil {
330 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
331 | }
332 | }
333 | logs = append(logs, _logs...)
334 | }
335 | case "LOGS":
336 | _logs, err := etlxlib.RunLOGS(dateRef, nil, logs, key)
337 | if err != nil {
338 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
339 | } else {
340 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
341 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
342 | if err != nil {
343 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
344 | }
345 | }
346 | logs = append(logs, _logs...)
347 | }
348 | case "REQUIRES":
349 | _logs, err := etlxlib.LoadREQUIRES(nil, key)
350 | if err != nil {
351 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err)
352 | } else {
353 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 {
354 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS")
355 | if err != nil {
356 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err)
357 | }
358 | }
359 | logs = append(logs, _logs...)
360 | }
361 | default:
362 | //
363 | }
364 | }
365 | }
366 | }
367 | }
368 | }
369 | }
370 |
--------------------------------------------------------------------------------
/internal/etlx/run_multiples_queries.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "fmt"
5 | "regexp"
6 | "strings"
7 | "time"
8 | )
9 |
10 | func (etlx *ETLX) RunMULTI_QUERIES(dateRef []time.Time, conf map[string]any, extraConf map[string]any, keys ...string) ([]map[string]any, []map[string]any, error) {
11 | key := "MULTI_QUERIES"
12 | if len(keys) > 0 && keys[0] != "" {
13 | key = keys[0]
14 | }
15 | //fmt.Println(key, dateRef)
16 | var processData []map[string]any
17 | var processLogs []map[string]any
18 | start := time.Now()
19 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats()
20 | processLogs = append(processLogs, map[string]any{
21 | "name": key,
22 | "key": key, "start_at": start,
23 | "ref": nil,
24 | "mem_alloc_start": mem_alloc,
25 | "mem_total_alloc_start": mem_total_alloc,
26 | "mem_sys_start": mem_sys,
27 | "num_gc_start": num_gc,
28 | })
29 | // Check if the input conf is nil or empty
30 | if conf == nil {
31 | conf = etlx.Config
32 | }
33 | data, ok := conf[key].(map[string]any)
34 | if !ok {
35 | return nil, nil, fmt.Errorf("missing or invalid %s section", key)
36 | }
37 | // Extract metadata
38 | metadata, ok := data["metadata"].(map[string]any)
39 | if !ok {
40 | return nil, nil, fmt.Errorf("missing metadata in %s section", key)
41 | }
42 | // ACTIVE
43 | if active, okActive := metadata["active"]; okActive {
44 | if !active.(bool) {
45 | processLogs = append(processLogs, map[string]any{
46 | "name": fmt.Sprintf("KEY %s", key),
47 | "description": metadata["description"].(string),
48 | "key": key,
49 | "start_at": time.Now(),
50 | "end_at": time.Now(),
51 | "success": true,
52 | "msg": "Deactivated",
53 | })
54 | return nil, nil, fmt.Errorf("%s deactivated", key)
55 | }
56 | }
57 | beforeSQL, okBefore := metadata["before_sql"]
58 | afterSQL, okAfter := metadata["after_sql"]
59 | saveSQL, okSave := metadata["save_sql"]
60 | errPatt, okErrPatt := metadata["save_on_err_patt"]
61 | errSQL, okErrSQL := metadata["save_on_err_sql"]
62 | dtRef, okDtRef := metadata["date_ref"]
63 | if okDtRef && dtRef != "" {
64 | _dt, err := time.Parse("2006-01-02", dtRef.(string))
65 | if err == nil {
66 | dateRef = append([]time.Time{}, _dt)
67 | }
68 | } else {
69 | if len(dateRef) > 0 {
70 | dtRef = dateRef[0].Format("2006-01-02")
71 | }
72 | }
73 | if processLogs[0]["ref"] == nil {
74 | processLogs[0]["ref"] = dtRef
75 | }
76 | queries := []string{}
77 | order := []string{}
78 | __order, okOrder := data["__order"].([]any)
79 | if !okOrder {
80 | for key, _ := range data {
81 | order = append(order, key)
82 | }
83 | } else {
84 | for _, itemKey := range __order {
85 | order = append(order, itemKey.(string))
86 | }
87 | }
88 | for _, itemKey := range order {
89 | if itemKey == "metadata" || itemKey == "__order" || itemKey == "order" {
90 | continue
91 | }
92 | item := data[itemKey]
93 | if _, isMap := item.(map[string]any); !isMap {
94 | //fmt.Println(itemKey, "NOT A MAP:", item)
95 | continue
96 | }
97 | /*if only, okOnly := extraConf["only"]; okOnly {
98 | if len(only.([]string)) == 0 {
99 | } else if !etlx.Contains(only.([]string), itemKey) {
100 | continue
101 | }
102 | }*/
103 | if skip, okSkip := extraConf["skip"]; okSkip {
104 | if len(skip.([]string)) == 0 {
105 | } else if etlx.Contains(skip.([]string), itemKey) {
106 | continue
107 | }
108 | }
109 | itemMetadata, ok := item.(map[string]any)["metadata"]
110 | if !ok {
111 | continue
112 | }
113 | // ACTIVE
114 | if active, okActive := itemMetadata.(map[string]any)["active"]; okActive {
115 | if !active.(bool) {
116 | continue
117 | }
118 | }
119 | query, okQuery := itemMetadata.(map[string]any)["query"]
120 | if query != nil && okQuery {
121 | sql := query.(string)
122 | query, ok := item.(map[string]any)[sql].(string)
123 | _, queryDoc := etlx.Config[sql]
124 | if !ok && queryDoc {
125 | query = sql
126 | _sql, _, _, err := etlx.QueryBuilder(nil, sql)
127 | if err != nil {
128 | fmt.Printf("QUERY DOC ERR ON KEY %s: %v\n", queries, err)
129 | _q, _e := etlx.Config[sql].(string)
130 | //fmt.Println(sql, "IS A LOADED SQL STR QUERY?", _q, _e)
131 | if _e {
132 | query = _q
133 | }
134 | } else {
135 | query = _sql
136 | }
137 | }
138 | sql = etlx.SetQueryPlaceholders(query, "", "", dateRef)
139 | queries = append(queries, sql)
140 | }
141 | }
142 | conn, okCon := metadata["connection"]
143 | if !okCon {
144 | return nil, nil, fmt.Errorf("%s err no connection defined", key)
145 | }
146 | start3 := time.Now()
147 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
148 | _log2 := map[string]any{
149 | "name": key,
150 | "description": metadata["description"].(string),
151 | "key": key, "start_at": start3,
152 | "ref": dtRef,
153 | "mem_alloc_start": mem_alloc,
154 | "mem_total_alloc_start": mem_total_alloc,
155 | "mem_sys_start": mem_sys,
156 | "num_gc_start": num_gc,
157 | }
158 | dbConn, err := etlx.GetDB(conn.(string))
159 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
160 | _log2["mem_alloc_end"] = mem_alloc
161 | _log2["mem_total_alloc_end"] = mem_total_alloc
162 | _log2["mem_sys_end"] = mem_sys
163 | _log2["num_gc_end"] = num_gc
164 | if err != nil {
165 | _log2["success"] = false
166 | _log2["msg"] = fmt.Sprintf("%s ERR: connecting to %s in : %s", key, conn, err)
167 | _log2["end_at"] = time.Now()
168 | _log2["duration"] = time.Since(start3).Seconds()
169 | processLogs = append(processLogs, _log2)
170 | return nil, nil, fmt.Errorf("%s ERR: connecting to %s in : %s", key, conn, err)
171 | }
172 | defer dbConn.Close()
173 | _log2["success"] = true
174 | _log2["msg"] = fmt.Sprintf("%s CONN: connection to %s successfull", key, conn)
175 | _log2["end_at"] = time.Now()
176 | _log2["duration"] = time.Since(start3).Seconds()
177 | processLogs = append(processLogs, _log2)
178 | // QUERIES TO RUN AT beginning
179 | if okBefore {
180 | start3 := time.Now()
181 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
182 | _log2 = map[string]any{
183 | "name": key,
184 | "description": metadata["description"].(string),
185 | "key": key, "start_at": start3,
186 | "ref": dtRef,
187 | "mem_alloc_start": mem_alloc,
188 | "mem_total_alloc_start": mem_total_alloc,
189 | "mem_sys_start": mem_sys,
190 | "num_gc_start": num_gc,
191 | }
192 | err = etlx.ExecuteQuery(dbConn, beforeSQL, data, "", "", dateRef)
193 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
194 | if err != nil {
195 | _log2["success"] = false
196 | _log2["msg"] = fmt.Sprintf("%s Before error: %s", key, err)
197 | _log2["end_at"] = time.Now()
198 | _log2["duration"] = time.Since(start3).Seconds()
199 | } else {
200 | _log2["success"] = true
201 | _log2["msg"] = fmt.Sprintf("%s Before ", key)
202 | _log2["end_at"] = time.Now()
203 | _log2["duration"] = time.Since(start3).Seconds()
204 | }
205 | _log2["mem_alloc_end"] = mem_alloc
206 | _log2["mem_total_alloc_end"] = mem_total_alloc
207 | _log2["mem_sys_end"] = mem_sys
208 | _log2["num_gc_end"] = num_gc
209 | processLogs = append(processLogs, _log2)
210 | }
211 | // MAIN QUERY
212 | unionKey, ok := metadata["union_key"].(string)
213 | if !ok {
214 | unionKey = "UNION\n"
215 | }
216 | sql := strings.Join(queries, unionKey)
217 | // fmt.Println(key, sql)
218 | start3 = time.Now()
219 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
220 | _log2 = map[string]any{
221 | "name": key,
222 | "description": metadata["description"].(string),
223 | "key": key, "start_at": start3,
224 | "ref": dtRef,
225 | "mem_alloc_start": mem_alloc,
226 | "mem_total_alloc_start": mem_total_alloc,
227 | "mem_sys_start": mem_sys,
228 | "num_gc_start": num_gc,
229 | }
230 | // CHECK CONDITION
231 | condition, okCondition := metadata["condition"].(string)
232 | condMsg, okCondMsg := metadata["condition_msg"].(string)
233 | failedCondition := false
234 | if okCondition && condition != "" {
235 | cond, err := etlx.ExecuteCondition(dbConn, condition, metadata, "", "", dateRef)
236 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
237 | _log2["mem_alloc_end"] = mem_alloc
238 | _log2["mem_total_alloc_end"] = mem_total_alloc
239 | _log2["mem_sys_end"] = mem_sys
240 | _log2["num_gc_end"] = num_gc
241 | if err != nil {
242 | _log2["success"] = false
243 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, "", err)
244 | _log2["end_at"] = time.Now()
245 | _log2["duration"] = time.Since(start3).Seconds()
246 | processLogs = append(processLogs, _log2)
247 | //return fmt.Errorf("%s", _log2["msg"])
248 | failedCondition = true
249 | } else if !cond {
250 | _log2["success"] = false
251 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed the condition %s was not met!", key, "", condition)
252 | _log2["end_at"] = time.Now()
253 | _log2["duration"] = time.Since(start3).Seconds()
254 | if okCondMsg && condMsg != "" {
255 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, "", etlx.SetQueryPlaceholders(condMsg, "", "", dateRef))
256 | }
257 | processLogs = append(processLogs, _log2)
258 | // return fmt.Errorf("%s", _log2["msg"])
259 | failedCondition = true
260 | }
261 | }
262 | if saveSQL != "" && okSave && !failedCondition {
263 | data["final_query"] = sql // PUT THE QUERY GENERATED IN THE SCOPE
264 | // fmt.Println(data[saveSQL.(string)])
265 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
266 | _log2["mem_alloc_start"] = mem_alloc
267 | _log2["mem_total_alloc_start"] = mem_total_alloc
268 | _log2["mem_sys_start"] = mem_sys
269 | _log2["num_gc_start"] = num_gc
270 | err = etlx.ExecuteQuery(dbConn, saveSQL, data, "", "", dateRef)
271 | if err != nil {
272 | _err_by_pass := false
273 | if okErrPatt && errPatt != nil && okErrSQL && errSQL != nil {
274 | //fmt.Println(onErrPatt.(string), onErrSQL.(string))
275 | re, regex_err := regexp.Compile(errPatt.(string))
276 | if regex_err != nil {
277 | _log2["success"] = false
278 | _log2["msg"] = fmt.Sprintf("%s ERR: fallback regex matching the error failed to compile: %s", key, regex_err)
279 | _log2["end_at"] = time.Now()
280 | _log2["duration"] = time.Since(start3).Seconds()
281 | } else if re.MatchString(string(err.Error())) {
282 | err = etlx.ExecuteQuery(dbConn, errSQL, data, "", "", dateRef)
283 | if err != nil {
284 | _log2["success"] = false
285 | _log2["msg"] = fmt.Sprintf("%s ERR: main: %s", key, err)
286 | _log2["end_at"] = time.Now()
287 | _log2["duration"] = time.Since(start3).Seconds()
288 | } else {
289 | _err_by_pass = true
290 | }
291 | }
292 | }
293 | if !_err_by_pass {
294 | //return nil, fmt.Errorf("%s ERR: main: %s", key, err)
295 | _log2["success"] = false
296 | _log2["msg"] = fmt.Sprintf("%s ERR: main: %s", key, err)
297 | _log2["end_at"] = time.Now()
298 | _log2["duration"] = time.Since(start3).Seconds()
299 | } else {
300 | _log2["success"] = true
301 | _log2["msg"] = fmt.Sprintf("%s main ", key)
302 | _log2["end_at"] = time.Now()
303 | _log2["duration"] = time.Since(start3).Seconds()
304 | }
305 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
306 | _log2["mem_alloc_end"] = mem_alloc
307 | _log2["mem_total_alloc_end"] = mem_total_alloc
308 | _log2["mem_sys_end"] = mem_sys
309 | _log2["num_gc_end"] = num_gc
310 | } else {
311 | _log2["success"] = true
312 | _log2["msg"] = fmt.Sprintf("%s main ", key)
313 | _log2["end_at"] = time.Now()
314 | _log2["duration"] = time.Since(start3).Seconds()
315 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
316 | _log2["mem_alloc_end"] = mem_alloc
317 | _log2["mem_total_alloc_end"] = mem_total_alloc
318 | _log2["mem_sys_end"] = mem_sys
319 | _log2["num_gc_end"] = num_gc
320 | }
321 | processLogs = append(processLogs, _log2)
322 | } else if !failedCondition {
323 | rows, _, err := etlx.Query(dbConn, sql, data, "", "", dateRef)
324 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
325 | if err != nil {
326 | _log2["success"] = false
327 | _log2["msg"] = fmt.Sprintf("%s After error: %s", key, err)
328 | _log2["end_at"] = time.Now()
329 | _log2["duration"] = time.Since(start3).Seconds()
330 | } else {
331 | processData = *rows
332 | _log2["success"] = true
333 | _log2["msg"] = fmt.Sprintf("%s After ", key)
334 | _log2["end_at"] = time.Now()
335 | _log2["duration"] = time.Since(start3).Seconds()
336 | }
337 | _log2["mem_alloc_end"] = mem_alloc
338 | _log2["mem_total_alloc_end"] = mem_total_alloc
339 | _log2["mem_sys_end"] = mem_sys
340 | _log2["num_gc_end"] = num_gc
341 | processLogs = append(processLogs, _log2)
342 | }
343 | // QUERIES TO RUN AT THE END
344 | if okAfter {
345 | start3 := time.Now()
346 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
347 | _log2 = map[string]any{
348 | "name": key,
349 | "description": metadata["description"].(string),
350 | "key": key, "start_at": start3,
351 | "ref": dtRef,
352 | "mem_alloc_start": mem_alloc,
353 | "mem_total_alloc_start": mem_total_alloc,
354 | "mem_sys_start": mem_sys,
355 | "num_gc_start": num_gc,
356 | }
357 | err = etlx.ExecuteQuery(dbConn, afterSQL, data, "", "", dateRef)
358 | if err != nil {
359 | _log2["success"] = false
360 | _log2["msg"] = fmt.Sprintf("%s After error: %s", key, err)
361 | _log2["end_at"] = time.Now()
362 | _log2["duration"] = time.Since(start3).Seconds()
363 | } else {
364 | _log2["success"] = true
365 | _log2["msg"] = fmt.Sprintf("%s After ", key)
366 | _log2["end_at"] = time.Now()
367 | _log2["duration"] = time.Since(start3).Seconds()
368 | }
369 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
370 | _log2["mem_alloc_end"] = mem_alloc
371 | _log2["mem_total_alloc_end"] = mem_total_alloc
372 | _log2["mem_sys_end"] = mem_sys
373 | _log2["num_gc_end"] = num_gc
374 | processLogs = append(processLogs, _log2)
375 | }
376 | return processLogs, processData, nil
377 | }
378 |
--------------------------------------------------------------------------------
/internal/etlx/run_scripts.go:
--------------------------------------------------------------------------------
1 | package etlxlib
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "path/filepath"
7 | "regexp"
8 | "time"
9 | )
10 |
11 | func (etlx *ETLX) RunSCRIPTS(dateRef []time.Time, conf map[string]any, extraConf map[string]any, keys ...string) ([]map[string]any, error) {
12 | key := "SCRIPTS"
13 | if len(keys) > 0 && keys[0] != "" {
14 | key = keys[0]
15 | }
16 | //fmt.Println(key, dateRef)
17 | var processLogs []map[string]any
18 | start := time.Now()
19 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats()
20 | processLogs = append(processLogs, map[string]any{
21 | "name": key,
22 | "key": key, "start_at": start,
23 | "ref": nil,
24 | "mem_alloc_start": mem_alloc,
25 | "mem_total_alloc_start": mem_total_alloc,
26 | "mem_sys_start": mem_sys,
27 | "num_gc_start": num_gc,
28 | })
29 | mainDescription := ""
30 | // Define the runner as a simple function
31 | SCRIPTSRunner := func(metadata map[string]any, itemKey string, item map[string]any) error {
32 | //fmt.Println(metadata, itemKey, item)
33 | // ACTIVE
34 | if active, okActive := metadata["active"]; okActive {
35 | if !active.(bool) {
36 | processLogs = append(processLogs, map[string]any{
37 | "name": fmt.Sprintf("KEY %s", key),
38 | "description": metadata["description"].(string),
39 | "key": key, "item_key": itemKey, "start_at": time.Now(),
40 | "end_at": time.Now(),
41 | "success": true,
42 | "msg": "Deactivated",
43 | })
44 | return fmt.Errorf("deactivated %s", "")
45 | }
46 | }
47 | // MAIN PATH
48 | mainPath, okMainPath := metadata["path"].(string)
49 | if okMainPath {
50 | pth := etlx.ReplaceQueryStringDate(mainPath, dateRef)
51 | //fmt.Println("MAIN PATH", pth)
52 | if ok, _ := pathExists(pth); !ok {
53 | err := os.Mkdir(pth, 0755)
54 | if err != nil {
55 | return fmt.Errorf("%s ERR: trying to create the script path %s -> %s", key, pth, err)
56 | }
57 | }
58 | } else {
59 |
60 | }
61 | mainConn, _ := metadata["connection"].(string)
62 | mainDescription = metadata["description"].(string)
63 | itemMetadata, ok := item["metadata"].(map[string]any)
64 | if !ok {
65 | processLogs = append(processLogs, map[string]any{
66 | "name": fmt.Sprintf("%s->%s", key, itemKey),
67 | "description": itemMetadata["description"].(string),
68 | "key": key, "item_key": itemKey, "start_at": time.Now(),
69 | "end_at": time.Now(),
70 | "success": true,
71 | "msg": "Missing metadata in item",
72 | })
73 | return nil
74 | }
75 | // ACTIVE
76 | if active, okActive := itemMetadata["active"]; okActive {
77 | if !active.(bool) {
78 | processLogs = append(processLogs, map[string]any{
79 | "name": fmt.Sprintf("%s->%s", key, itemKey),
80 | "description": itemMetadata["description"].(string),
81 | "key": key, "item_key": itemKey, "start_at": time.Now(),
82 | "end_at": time.Now(),
83 | "success": true,
84 | "msg": "Deactivated",
85 | })
86 | return nil
87 | }
88 | }
89 | beforeSQL, okBefore := itemMetadata["before_sql"]
90 | scriptSQL, okScript := itemMetadata["script_sql"]
91 | afterSQL, okAfter := itemMetadata["after_sql"]
92 | errPatt, okErrPatt := itemMetadata["on_err_patt"]
93 | errSQL, okErrSQL := itemMetadata["on_err_sql"]
94 | conn, okCon := itemMetadata["connection"]
95 | if !okCon {
96 | conn = mainConn
97 | }
98 | dtRef, okDtRef := itemMetadata["date_ref"]
99 | if okDtRef && dtRef != "" {
100 | _dt, err := time.Parse("2006-01-02", dtRef.(string))
101 | if err == nil {
102 | dateRef = append([]time.Time{}, _dt)
103 | }
104 | } else {
105 | if len(dateRef) > 0 {
106 | dtRef = dateRef[0].Format("2006-01-02")
107 | }
108 | }
109 | if processLogs[0]["ref"] == nil {
110 | processLogs[0]["ref"] = dtRef
111 | }
112 | start3 := time.Now()
113 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats()
114 | _log2 := map[string]any{
115 | "name": fmt.Sprintf("%s->%s", key, itemKey),
116 | "description": itemMetadata["description"].(string),
117 | "key": key, "item_key": itemKey, "start_at": start3,
118 | "ref": dtRef,
119 | "mem_alloc_start": mem_alloc,
120 | "mem_total_alloc_start": mem_total_alloc,
121 | "mem_sys_start": mem_sys,
122 | "num_gc_start": num_gc,
123 | }
124 | dbConn, err := etlx.GetDB(conn.(string))
125 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
126 | if err != nil {
127 | _log2["success"] = false
128 | _log2["msg"] = fmt.Sprintf("%s -> %s ERR: connecting to %s in : %s", key, itemKey, conn, err)
129 | _log2["end_at"] = time.Now()
130 | _log2["duration"] = time.Since(start3).Seconds()
131 | _log2["mem_alloc_end"] = mem_alloc
132 | _log2["mem_total_alloc_end"] = mem_total_alloc
133 | _log2["mem_sys_end"] = mem_sys
134 | _log2["num_gc_end"] = num_gc
135 | processLogs = append(processLogs, _log2)
136 | return nil
137 | }
138 | defer dbConn.Close()
139 | _log2["success"] = true
140 | _log2["msg"] = fmt.Sprintf("%s -> %s CONN: connection to %s successfull", key, itemKey, conn)
141 | _log2["end_at"] = time.Now()
142 | _log2["duration"] = time.Since(start3).Seconds()
143 | _log2["mem_alloc_end"] = mem_alloc
144 | _log2["mem_total_alloc_end"] = mem_total_alloc
145 | _log2["mem_sys_end"] = mem_sys
146 | _log2["num_gc_end"] = num_gc
147 | processLogs = append(processLogs, _log2)
148 | // FILE
149 | table := itemMetadata["name"].(string)
150 | path, okPath := itemMetadata["path"].(string)
151 | if !okPath {
152 | path, okPath = itemMetadata["fname"].(string)
153 | if !okPath {
154 | path, okPath = itemMetadata["file"].(string)
155 | }
156 | }
157 | fname := fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, os.TempDir(), table)
158 | if okPath && path != "" {
159 | fname = path
160 | if filepath.IsAbs(fname) {
161 | } else if filepath.IsLocal(fname) {
162 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname)
163 | } else if filepath.Dir(fname) != "" && okMainPath && mainPath != "" {
164 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname)
165 | }
166 | } else if okMainPath && mainPath != "" {
167 | fname = fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, mainPath, table)
168 | }
169 | // QUERIES TO RUN AT beginning
170 | if okBefore {
171 | start3 := time.Now()
172 | _log2 := map[string]any{
173 | "name": fmt.Sprintf("%s->%s", key, itemKey),
174 | "description": itemMetadata["description"].(string),
175 | "key": key, "item_key": itemKey, "start_at": start3,
176 | "ref": dtRef,
177 | "mem_alloc_start": mem_alloc,
178 | "mem_total_alloc_start": mem_total_alloc,
179 | "mem_sys_start": mem_sys,
180 | "num_gc_start": num_gc,
181 | }
182 | err = etlx.ExecuteQuery(dbConn, beforeSQL, item, fname, "", dateRef)
183 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
184 | if err != nil {
185 | _log2["success"] = false
186 | _log2["msg"] = fmt.Sprintf("%s -> %s Before error: %s", key, itemKey, err)
187 | _log2["end_at"] = time.Now()
188 | _log2["duration"] = time.Since(start3).Seconds()
189 | } else {
190 | _log2["success"] = true
191 | _log2["msg"] = fmt.Sprintf("%s -> %s Before ", key, itemKey)
192 | _log2["end_at"] = time.Now()
193 | _log2["duration"] = time.Since(start3).Seconds()
194 | }
195 | _log2["mem_alloc_end"] = mem_alloc
196 | _log2["mem_total_alloc_end"] = mem_total_alloc
197 | _log2["mem_sys_end"] = mem_sys
198 | _log2["num_gc_end"] = num_gc
199 | processLogs = append(processLogs, _log2)
200 | }
201 | // CHECK CONDITION
202 | condition, okCondition := itemMetadata["condition"].(string)
203 | condMsg, okCondMsg := itemMetadata["condition_msg"].(string)
204 | failedCondition := false
205 | if okCondition && condition != "" {
206 | cond, err := etlx.ExecuteCondition(dbConn, condition, itemMetadata, fname, "", dateRef)
207 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
208 | if err != nil {
209 | _log2["success"] = false
210 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, err)
211 | _log2["end_at"] = time.Now()
212 | _log2["duration"] = time.Since(start3).Seconds()
213 | _log2["mem_alloc_end"] = mem_alloc
214 | _log2["mem_total_alloc_end"] = mem_total_alloc
215 | _log2["mem_sys_end"] = mem_sys
216 | _log2["num_gc_end"] = num_gc
217 | processLogs = append(processLogs, _log2)
218 | //return fmt.Errorf("%s", _log2["msg"])
219 | failedCondition = true
220 | } else if !cond {
221 | _log2["success"] = false
222 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed the condition %s was not met!", key, itemKey, condition)
223 | _log2["end_at"] = time.Now()
224 | _log2["duration"] = time.Since(start3).Seconds()
225 | _log2["mem_alloc_end"] = mem_alloc
226 | _log2["mem_total_alloc_end"] = mem_total_alloc
227 | _log2["mem_sys_end"] = mem_sys
228 | _log2["num_gc_end"] = num_gc
229 | if okCondMsg && condMsg != "" {
230 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, etlx.SetQueryPlaceholders(condMsg, table, fname, dateRef))
231 | }
232 | processLogs = append(processLogs, _log2)
233 | // return fmt.Errorf("%s", _log2["msg"])
234 | failedCondition = true
235 | }
236 | }
237 | // MAIN QUERIES
238 | if okScript && !failedCondition {
239 | start3 := time.Now()
240 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats()
241 | _log2 := map[string]any{
242 | "name": fmt.Sprintf("%s->%s", key, itemKey),
243 | "description": itemMetadata["description"].(string),
244 | "key": key, "item_key": itemKey, "start_at": start3,
245 | "ref": dtRef,
246 | "mem_alloc_start": mem_alloc,
247 | "mem_total_alloc_start": mem_total_alloc,
248 | "mem_sys_start": mem_sys,
249 | "num_gc_start": num_gc,
250 | }
251 | err = etlx.ExecuteQuery(dbConn, scriptSQL, item, fname, "", dateRef)
252 | if err != nil {
253 | _err_by_pass := false
254 | if okErrPatt && errPatt != nil && okErrSQL && errSQL != nil {
255 | //fmt.Println(onErrPatt.(string), onErrSQL.(string))
256 | re, regex_err := regexp.Compile(errPatt.(string))
257 | if regex_err != nil {
258 | _log2["success"] = false
259 | _log2["msg"] = fmt.Errorf("%s ERR: fallback regex matching the error failed to compile: %s", key, regex_err)
260 | _log2["end_at"] = time.Now()
261 | _log2["duration"] = time.Since(start3).Seconds()
262 | } else if re.MatchString(string(err.Error())) {
263 | err = etlx.ExecuteQuery(dbConn, errSQL, item, fname, "", dateRef)
264 | if err != nil {
265 | _log2["success"] = false
266 | _log2["msg"] = fmt.Errorf("%s ERR: main: %s", key, err)
267 | _log2["end_at"] = time.Now()
268 | _log2["duration"] = time.Since(start3).Seconds()
269 | } else {
270 | _err_by_pass = true
271 | }
272 | }
273 | }
274 | if !_err_by_pass {
275 | _log2["success"] = false
276 | _log2["msg"] = fmt.Sprintf("%s -> %s error: %s", key, itemKey, err)
277 | _log2["end_at"] = time.Now()
278 | _log2["duration"] = time.Since(start3).Seconds()
279 | } else {
280 | _log2["success"] = true
281 | _log2["msg"] = fmt.Sprintf("%s -> %s Success", key, itemKey)
282 | _log2["end_at"] = time.Now()
283 | _log2["duration"] = time.Since(start3).Seconds()
284 | }
285 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
286 | _log2["mem_alloc_end"] = mem_alloc
287 | _log2["mem_total_alloc_end"] = mem_total_alloc
288 | _log2["mem_sys_end"] = mem_sys
289 | _log2["num_gc_end"] = num_gc
290 | } else {
291 | _log2["success"] = true
292 | _log2["msg"] = fmt.Sprintf("%s -> %s Success", key, itemKey)
293 | _log2["end_at"] = time.Now()
294 | _log2["duration"] = time.Since(start3).Seconds()
295 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
296 | _log2["mem_alloc_end"] = mem_alloc
297 | _log2["mem_total_alloc_end"] = mem_total_alloc
298 | _log2["mem_sys_end"] = mem_sys
299 | _log2["num_gc_end"] = num_gc
300 | }
301 | processLogs = append(processLogs, _log2)
302 | }
303 | // QUERIES TO RUN AT THE END
304 | if okAfter {
305 | start3 := time.Now()
306 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats()
307 | _log2 := map[string]any{
308 | "name": fmt.Sprintf("%s->%s", key, itemKey),
309 | "description": itemMetadata["description"].(string),
310 | "key": key, "item_key": itemKey, "start_at": start3,
311 | "ref": dtRef,
312 | "mem_alloc_start": mem_alloc,
313 | "mem_total_alloc_start": mem_total_alloc,
314 | "mem_sys_start": mem_sys,
315 | "num_gc_start": num_gc,
316 | }
317 | err = etlx.ExecuteQuery(dbConn, afterSQL, item, fname, "", dateRef)
318 | if err != nil {
319 | _log2["success"] = false
320 | _log2["msg"] = fmt.Sprintf("%s -> %s After error: %s", key, itemKey, err)
321 | _log2["end_at"] = time.Now()
322 | _log2["duration"] = time.Since(start3).Seconds()
323 | } else {
324 | _log2["success"] = true
325 | _log2["msg"] = fmt.Sprintf("%s -> %s After ", key, itemKey)
326 | _log2["end_at"] = time.Now()
327 | _log2["duration"] = time.Since(start3).Seconds()
328 | }
329 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
330 | _log2["mem_alloc_end"] = mem_alloc
331 | _log2["mem_total_alloc_end"] = mem_total_alloc
332 | _log2["mem_sys_end"] = mem_sys
333 | _log2["num_gc_end"] = num_gc
334 | processLogs = append(processLogs, _log2)
335 | }
336 | return nil
337 | }
338 | // Check if the input conf is nil or empty
339 | if conf == nil {
340 | conf = etlx.Config
341 | }
342 | // Process the MD KEY
343 | err := etlx.ProcessMDKey(key, conf, SCRIPTSRunner)
344 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats()
345 | if err != nil {
346 | return processLogs, fmt.Errorf("%s failed: %v", key, err)
347 | }
348 | processLogs[0] = map[string]any{
349 | "name": key,
350 | "description": mainDescription,
351 | "key": key, "start_at": processLogs[0]["start_at"],
352 | "end_at": time.Now(),
353 | "duration": time.Since(start).Seconds(),
354 | "ref": processLogs[0]["ref"],
355 | "mem_alloc_start": processLogs[0]["mem_alloc_start"],
356 | "mem_total_alloc_start": processLogs[0]["mem_total_alloc_start"],
357 | "mem_sys_start": processLogs[0]["mem_sys_start"],
358 | "num_gc_start": processLogs[0]["num_gc_start"],
359 | }
360 | return processLogs, nil
361 | }
362 |
--------------------------------------------------------------------------------