├── examples ├── PeakHoursAnalysis.sql ├── raw_sql_file.sql ├── require_non_md.md ├── hf.md ├── ducklake.md ├── hf.ipynb ├── s3.md ├── actions.md ├── pg.md ├── s3.ipynb └── http.cs.md ├── .gitignore ├── .github └── workflows │ ├── docker-publish.yml │ ├── linux.yaml │ ├── release.yml │ ├── build.windows.yml │ ├── macos.yaml │ ├── docker.yml │ ├── windows.cc.yaml │ └── go.yml ├── internal ├── env │ └── env.go ├── db │ ├── db_interface.go │ ├── odbc.go │ └── duckdb.go └── etlx │ ├── ipynb_md.go │ ├── http.go │ ├── compress.go │ ├── run_logs.go │ ├── sftp.go │ ├── build_query.go │ ├── ftp.go │ ├── aws.go │ ├── mail.go │ ├── ducklake.go │ ├── action_db2db.go │ ├── load_requirements.go │ ├── run_notify.go │ ├── run_multiples_queries.go │ └── run_scripts.go ├── LICENSE ├── debian.Dockerfile ├── alpine.Dockerfile ├── debian.slim.Dockerfile ├── etlx.go ├── ubuntu.Dockerfile ├── Dockerfile ├── go.mod └── cmd └── main.go /examples/PeakHoursAnalysis.sql: -------------------------------------------------------------------------------- 1 | -- Peak Hours Analysis 2 | SELECT EXTRACT(hour FROM tpep_pickup_datetime) AS hour_of_day, 3 | COUNT(*) AS total_trips, 4 | ROUND(AVG(total_amount), 2) AS avg_fare, 5 | ROUND(AVG(trip_distance), 2) AS avg_distance, 6 | ROUND(AVG(EXTRACT(EPOCH FROM (tpep_dropoff_datetime - tpep_pickup_datetime)) / 60.0), 2) AS avg_duration_minutes 7 | FROM DB."NYC_TAXI" 8 | WHERE tpep_dropoff_datetime > tpep_pickup_datetime 9 | GROUP BY hour_of_day 10 | ORDER BY hour_of_day -------------------------------------------------------------------------------- /examples/raw_sql_file.sql: -------------------------------------------------------------------------------- 1 | -- Create a table 2 | CREATE TABLE employees ( 3 | id INT PRIMARY KEY, 4 | name VARCHAR(100), 5 | position VARCHAR(50), 6 | salary DECIMAL(10, 2) 7 | ); 8 | 9 | -- Insert data 10 | INSERT INTO employees (id, name, position, salary) VALUES 11 | (1, 'Alice Smith', 'Developer', 75000.00), 12 | (2, 'Bob Johnson', 'Manager', 90000.00); 13 | 14 | -- Select data 15 | SELECT * FROM employees; 16 | 17 | -- Update data 18 | UPDATE employees SET salary = 80000.00 WHERE id = 1; 19 | 20 | -- Delete data 21 | DELETE FROM employees WHERE id = 2; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### AL ### 2 | #Template for AL projects for Dynamics 365 Business Central 3 | #launch.json folder 4 | .vscode/ 5 | #Cache folder 6 | .alcache/ 7 | #Symbols folder 8 | .alpackages/ 9 | #Snapshots folder 10 | .snapshots/ 11 | #Testing Output folder 12 | .output/ 13 | #Extension App-file 14 | *.app 15 | #Rapid Application Development File 16 | rad.json 17 | #Translation Base-file 18 | *.g.xlf 19 | #License-file 20 | *.flf 21 | #Test results file 22 | TestResults.xml 23 | *.env 24 | *.exe 25 | *.db 26 | *.db-journal 27 | *.duckdb 28 | *.ddb 29 | database/ 30 | tmp/ 31 | bin/ 32 | .air.toml 33 | logs.*.json 34 | *.xlsx 35 | *.csv 36 | *.parquet 37 | *.ddb 38 | *.zip 39 | *.gz 40 | *nyc* -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push Docker image to Docker Hub 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build-and-push: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | 14 | - name: Log in to Docker Hub 15 | uses: docker/login-action@v3 16 | with: 17 | username: ${{ secrets.DOCKERHUB_USERNAME }} 18 | password: ${{ secrets.DOCKERHUB_TOKEN }} 19 | 20 | - name: Build Docker image 21 | run: docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/etlx:latest . 22 | 23 | - name: Push Docker image 24 | run: docker push ${{ secrets.DOCKERHUB_USERNAME }}/etlx:latest 25 | -------------------------------------------------------------------------------- /internal/env/env.go: -------------------------------------------------------------------------------- 1 | package env 2 | 3 | import ( 4 | "os" 5 | "strconv" 6 | ) 7 | 8 | func GetString(key, defaultValue string) string { 9 | value, exists := os.LookupEnv(key) 10 | if !exists { 11 | return defaultValue 12 | } 13 | 14 | return value 15 | } 16 | 17 | func GetInt(key string, defaultValue int) int { 18 | value, exists := os.LookupEnv(key) 19 | if !exists { 20 | return defaultValue 21 | } 22 | 23 | intValue, err := strconv.Atoi(value) 24 | if err != nil { 25 | panic(err) 26 | } 27 | 28 | return intValue 29 | } 30 | 31 | func GetBool(key string, defaultValue bool) bool { 32 | value, exists := os.LookupEnv(key) 33 | if !exists { 34 | return defaultValue 35 | } 36 | 37 | boolValue, err := strconv.ParseBool(value) 38 | if err != nil { 39 | return false 40 | } 41 | 42 | return boolValue 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/linux.yaml: -------------------------------------------------------------------------------- 1 | name: Linux Build 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build-linux: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Install Dependencies 11 | run: | 12 | sudo apt-get update 13 | sudo apt-get install -y build-essential gcc g++ unixodbc unixodbc-dev 14 | - name: Checkout Code 15 | uses: actions/checkout@v3 16 | - name: Setup Go 17 | uses: actions/setup-go@v4 18 | with: 19 | go-version: '1.23' 20 | - name: Build Linux Binaries 21 | run: | 22 | mkdir -p dist 23 | CGO_ENABLED=1 go build -o dist/etlx-linux-amd64 ./cmd/main.go 24 | - name: Upload Artifacts 25 | uses: actions/upload-artifact@v4 26 | with: 27 | name: linux-binary 28 | path: dist/*linux* 29 | -------------------------------------------------------------------------------- /examples/require_non_md.md: -------------------------------------------------------------------------------- 1 | # ETL 2 | 3 | ```yaml metadata 4 | name: DB 5 | description: "Example extrating from S3 to a local sqlite3 file" 6 | connection: "duckdb:" 7 | active: true 8 | ``` 9 | 10 | ## VERSION 11 | 12 | ```yaml metadata 13 | name: VERSION 14 | description: "DDB Version" 15 | table: VERSION 16 | load_conn: "duckdb:" 17 | load_before_sql: "ATTACH 'database/DB.db' AS DB (TYPE SQLITE)" 18 | load_sql: 'CREATE OR REPLACE TABLE DB."" AS SELECT version() AS "VERSION";' 19 | load_after_sql: "DETACH DB;" 20 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB."
"' 21 | active: true 22 | ``` 23 | 24 | # REQUIRES 25 | 26 | ```yaml metadata 27 | name: REQUIRES 28 | description: load dependencies 29 | active: true 30 | ``` 31 | 32 | ## RAW_SQL 33 | ```yaml 34 | name: RAW_SQL 35 | description: load raw sql from file 36 | path: examples/raw_sql_file.sql 37 | active: true 38 | ``` -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | release: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Download Windows Binary 11 | uses: actions/download-artifact@v3 12 | with: 13 | name: windows-binary 14 | path: dist 15 | - name: Download MacOS Binary 16 | uses: actions/download-artifact@v3 17 | with: 18 | name: macos-binary 19 | path: dist 20 | - name: Download Linux Binary 21 | uses: actions/download-artifact@v3 22 | with: 23 | name: linux-binary 24 | path: dist 25 | - name: Create Release 26 | uses: softprops/action-gh-release@v1 27 | with: 28 | files: | 29 | dist/etlx-windows-amd64.exe 30 | dist/etlx-macos-amd64 31 | dist/etlx-linux-amd64 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 realdatadriven 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /debian.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a minimal Debian-based image 2 | FROM debian:bookworm-slim 3 | 4 | # Set the ETLX version and architecture 5 | ARG ETLX_VERSION=v0.2.1 6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems 7 | 8 | # Define the download URL for the zipped release 9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip" 10 | 11 | # Install dependencies (curl for downloading, unzip for extracting) 12 | RUN apt-get update && apt-get install -y \ 13 | curl \ 14 | unzip \ 15 | ca-certificates \ 16 | unixodbc \ 17 | build-essential \ 18 | libc6 \ 19 | && rm -rf /var/lib/apt/lists/* 20 | 21 | # Set working directory 22 | WORKDIR /app 23 | 24 | # Download and extract the ETLX binary 25 | RUN curl -L $ETLX_URL -o etlx.zip && \ 26 | unzip etlx.zip && \ 27 | rm etlx.zip && \ 28 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \ 29 | chmod +x /usr/local/bin/etlx 30 | 31 | # Allow users to mount a config file 32 | VOLUME ["/app/config"] 33 | 34 | # Set the entrypoint to pass CLI arguments 35 | ENTRYPOINT ["/usr/local/bin/etlx"] 36 | -------------------------------------------------------------------------------- /alpine.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a minimal Alpine-based image 2 | FROM alpine:latest 3 | 4 | # Set the ETLX version and architecture 5 | ARG ETLX_VERSION=v0.2.1 6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems 7 | 8 | # Define the download URL for the zipped release 9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip" 10 | 11 | # Install dependencies (curl for downloading, unzip for extracting, libc6 is replaced by musl) 12 | RUN apk update && apk add --no-cache \ 13 | curl \ 14 | unzip \ 15 | ca-certificates \ 16 | unixodbc \ 17 | libc6-compat \ 18 | bash \ 19 | && rm -rf /var/cache/apk/* 20 | 21 | # Set working directory 22 | WORKDIR /app 23 | 24 | # Download and extract the ETLX binary 25 | RUN curl -L $ETLX_URL -o etlx.zip && \ 26 | unzip etlx.zip && \ 27 | rm etlx.zip && \ 28 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \ 29 | chmod +x /usr/local/bin/etlx 30 | 31 | # Allow users to mount a config file 32 | VOLUME ["/app/config"] 33 | 34 | # Set the entrypoint to pass CLI arguments 35 | ENTRYPOINT ["/usr/local/bin/etlx"] 36 | -------------------------------------------------------------------------------- /.github/workflows/build.windows.yml: -------------------------------------------------------------------------------- 1 | name: Debug Windows Build 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build-windows-msvc: 8 | runs-on: windows-latest 9 | steps: 10 | - name: Checkout Code 11 | uses: actions/checkout@v3 12 | 13 | - name: Setup Go 14 | uses: actions/setup-go@v4 15 | with: 16 | go-version: '1.24' 17 | 18 | - name: Setup MSBuild (for Visual Studio environment) 19 | uses: microsoft/setup-msbuild@v2 20 | 21 | - name: Set Environment Variables for CGO 22 | run: | 23 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV 24 | # echo "CC=cl.exe" >> $env:GITHUB_ENV 25 | 26 | - name: Build with MSVC (Visual Studio compiler) 27 | run: | 28 | mkdir dist 29 | go build -o dist/etlx-windows-amd64.exe ./cmd/main.go 30 | # echo "GOARCH=arm64" >> $env:GITHUB_ENV 31 | # go build -o dist/etlx-windows-arm64.exe ./cmd/main.go 32 | 33 | - name: Upload MSVC Artifacts 34 | uses: actions/upload-artifact@v4 35 | with: 36 | name: windows-msvc-binary 37 | path: dist/*windows* 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /debian.slim.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a minimal Debian-based image 2 | FROM debian:bookworm-slim 3 | 4 | # Set the ETLX version and architecture 5 | ARG ETLX_VERSION=v0.2.1 6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems 7 | 8 | # Define the download URL for the zipped release 9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip" 10 | 11 | # Install dependencies (curl for downloading, unzip for extracting) 12 | RUN apt-get update && apt-get install -y \ 13 | curl \ 14 | unzip \ 15 | ca-certificates \ 16 | unixodbc \ 17 | build-essential \ 18 | libc6 \ 19 | && rm -rf /var/lib/apt/lists/* 20 | 21 | # Set working directory 22 | WORKDIR /app 23 | 24 | # Download and extract the ETLX binary 25 | RUN curl -L $ETLX_URL -o etlx.zip && \ 26 | unzip etlx.zip && \ 27 | rm etlx.zip && \ 28 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \ 29 | chmod +x /usr/local/bin/etlx 30 | 31 | # Allow users to mount a config file 32 | VOLUME ["/app/config"] 33 | 34 | # Set the entrypoint to pass CLI arguments 35 | ENTRYPOINT ["/usr/local/bin/etlx"] 36 | 37 | # sudo docker build -t etlx:latest . 38 | # sudo docker exec etxl --help 39 | -------------------------------------------------------------------------------- /etlx.go: -------------------------------------------------------------------------------- 1 | package etlx 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/joho/godotenv" 7 | "github.com/realdatadriven/etlx/internal/db" 8 | etlxlib "github.com/realdatadriven/etlx/internal/etlx" 9 | ) 10 | 11 | // Expose the library functions 12 | type ETLX = etlxlib.ETLX 13 | 14 | type DBInterface = db.DBInterface 15 | 16 | type DB = db.DB 17 | 18 | func New(driverName string, dsn string) (*db.DB, error) { 19 | return db.New(driverName, dsn) 20 | } 21 | 22 | type DuckDB = db.DuckDB 23 | 24 | func NewDuckDB(dsn string) (*db.DuckDB, error) { 25 | return db.NewDuckDB(dsn) 26 | } 27 | 28 | type ODBC = db.ODBC 29 | 30 | func NewODBC(dsn string) (*db.ODBC, error) { 31 | return db.NewODBC(dsn) 32 | } 33 | 34 | func ReplaceDBName(dsn, dbname string) (string, error) { 35 | return db.ReplaceDBName(dsn, dbname) 36 | } 37 | 38 | type DuckLakeParseResult = etlxlib.DuckLakeParseResult 39 | type DuckLakeOccurrence = etlxlib.DuckLakeOccurrence 40 | type DuckLakeParser = etlxlib.DuckLakeParser 41 | 42 | func NewDuckLakeParser() *etlxlib.DuckLakeParser { 43 | return etlxlib.NewDuckLakeParser() 44 | } 45 | 46 | func GetDB(conn string) (DBInterface, error) { 47 | /**retuns DBInterface and chooses the driver base on the etlx connection style driver: */ 48 | return etlxlib.GetDB(conn) 49 | } 50 | 51 | func LoadDotEnv() { 52 | _err := godotenv.Load() 53 | if _err != nil { 54 | fmt.Println("Error loading .env file") 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /internal/db/db_interface.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | ) 7 | 8 | type DBInterface interface { 9 | ExecuteQuery(query string, data ...interface{}) (int, error) 10 | Query2CSV(query string, csv_path string, params ...interface{}) (bool, error) 11 | QueryMultiRows(query string, params ...interface{}) (*[]map[string]interface{}, bool, error) 12 | ExecuteQueryRowsAffected(query string, data ...interface{}) (int64, error) 13 | QuerySingleRow(query string, params ...interface{}) (*map[string]interface{}, bool, error) 14 | QueryRows(ctx context.Context, query string, params ...interface{}) (*sql.Rows, error) 15 | QueryMultiRowsWithCols(query string, params ...interface{}) (*[]map[string]interface{}, []string, bool, error) 16 | AllTables(params map[string]interface{}, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) 17 | TableSchema(params map[string]interface{}, table string, dbName string, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) 18 | ExecuteNamedQuery(query string, data map[string]interface{}) (int, error) 19 | ExecuteQueryPGInsertWithLastInsertId(query string, data ...interface{}) (int, error) 20 | GetUserByNameOrEmail(email string) (map[string]interface{}, bool, error) 21 | GetDriverName() string 22 | Close() error 23 | IsEmpty(value interface{}) bool 24 | FromParams(params map[string]interface{}, extra_conf map[string]interface{}) (*DB, string, string, error) 25 | Ping() error 26 | } 27 | -------------------------------------------------------------------------------- /ubuntu.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a minimal Ubuntu-based image 2 | FROM ubuntu:latest 3 | 4 | # Set the ETLX version and architecture 5 | ARG ETLX_VERSION=v0.2.2 6 | ARG ETLX_ARCH=amd64 # Change to arm64 if needed for ARM-based systems 7 | 8 | # Define the download URL for the zipped release 9 | ENV ETLX_URL="https://github.com/realdatadriven/etlx/releases/download/${ETLX_VERSION}/etlx-linux-${ETLX_ARCH}.zip" 10 | #ENV ETLX_URL="https://github.com/realdatadriven/etlx/actions/runs/13442643061/artifacts/2625838115" 11 | # Install dependencies (curl for downloading, unzip for extracting, and necessary libraries) 12 | RUN apt-get update && apt-get install -y \ 13 | curl \ 14 | unzip \ 15 | ca-certificates \ 16 | unixodbc \ 17 | build-essential \ 18 | libc6 \ 19 | wget \ 20 | && rm -rf /var/lib/apt/lists/* 21 | 22 | # Install a newer version of glibc (careful with this in production) 23 | #RUN wget http://ftp.gnu.org/gnu/libc/glibc-2.38.tar.gz && \ 24 | # tar -xvzf glibc-2.38.tar.gz && \ 25 | # cd glibc-2.38 && \ 26 | # mkdir build && \ 27 | # cd build && \ 28 | # ../configure && \ 29 | # make -j$(nproc) && \ 30 | # make install 31 | 32 | # Set working directory 33 | WORKDIR /app 34 | 35 | # Download and extract the ETLX binary 36 | RUN curl -L $ETLX_URL -o etlx.zip && \ 37 | unzip etlx.zip && \ 38 | rm etlx.zip && \ 39 | mv etlx-linux-${ETLX_ARCH} /usr/local/bin/etlx && \ 40 | chmod +x /usr/local/bin/etlx 41 | 42 | # Allow users to mount a config file 43 | VOLUME ["/app/config"] 44 | 45 | # Set the entrypoint to pass CLI arguments 46 | ENTRYPOINT ["/usr/local/bin/etlx"] 47 | -------------------------------------------------------------------------------- /.github/workflows/macos.yaml: -------------------------------------------------------------------------------- 1 | name: MacOS Build with CGO and UnixODBC 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build-macos: 8 | runs-on: macos-latest 9 | steps: 10 | # Step 1: Checkout the code 11 | - name: Checkout Code 12 | uses: actions/checkout@v3 13 | 14 | # Step 2: Setup Go 15 | - name: Setup Go 16 | uses: actions/setup-go@v4 17 | with: 18 | go-version: '1.23' 19 | 20 | # Step 3: Install UnixODBC 21 | - name: Install UnixODBC 22 | run: | 23 | brew install unixodbc 24 | brew --prefix unixodbc 25 | 26 | # Step 4: Set Environment Variables 27 | - name: Set Environment Variables 28 | run: | 29 | ODBC_PREFIX=$(brew --prefix unixodbc) 30 | echo "CGO_ENABLED=1" >> $GITHUB_ENV 31 | echo "CGO_CFLAGS=-I$ODBC_PREFIX/include" >> $GITHUB_ENV 32 | echo "CGO_LDFLAGS=-L$ODBC_PREFIX/lib -lodbc" >> $GITHUB_ENV 33 | 34 | # Step 5: Build the Application 35 | - name: Build MacOS Binary 36 | run: | 37 | mkdir dist 38 | go build -o dist/etlx-macos-amd64 ./cmd/main.go 39 | 40 | # Step 6: Upload Build Logs for Debugging (if Build Fails) 41 | - name: Upload Logs 42 | if: failure() 43 | uses: actions/upload-artifact@v3 44 | with: 45 | name: build-logs 46 | path: ${{ github.workspace }} 47 | 48 | # Step 7: Upload Built Binary for Testing (if Successful) 49 | - name: Upload Binary 50 | if: success() 51 | uses: actions/upload-artifact@v3 52 | with: 53 | name: macos-binary 54 | path: dist/etlx-macos-amd64 55 | -------------------------------------------------------------------------------- /internal/etlx/ipynb_md.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | "strings" 8 | ) 9 | 10 | // Notebook represents the structure of a Jupyter notebook. 11 | type Notebook struct { 12 | Cells []Cell `json:"cells"` 13 | } 14 | 15 | // Cell represents a single cell in the notebook. 16 | type Cell struct { 17 | CellType string `json:"cell_type"` 18 | Source []string `json:"source"` 19 | } 20 | 21 | // ConvertIPYNBToMarkdown converts the content of a .ipynb file to Markdown text. 22 | func (etlx *ETLX) ConvertIPYNBToMarkdown(ipynbContent []byte) (string, error) { 23 | // Parse the .ipynb content 24 | var notebook Notebook 25 | if err := json.Unmarshal(ipynbContent, ¬ebook); err != nil { 26 | return "", fmt.Errorf("error parsing JSON: %w", err) 27 | } 28 | // Build the Markdown output 29 | var mdBuilder strings.Builder 30 | for _, cell := range notebook.Cells { 31 | // Skip empty cells 32 | if len(cell.Source) == 0 { 33 | continue 34 | } 35 | switch cell.CellType { 36 | case "markdown": 37 | // Add Markdown content directly 38 | for _, line := range cell.Source { 39 | mdBuilder.WriteString(line) 40 | } 41 | mdBuilder.WriteString("\n\n") // Add spacing between cells 42 | case "code": 43 | // Wrap code content in a Markdown code block 44 | mdBuilder.WriteString("```\n") 45 | for _, line := range cell.Source { 46 | mdBuilder.WriteString(line) 47 | } 48 | mdBuilder.WriteString("```\n\n") 49 | } 50 | } 51 | if os.Getenv("ETLX_DEBUG_QUERY") == "true" { 52 | _, err := etlx.TempFIle("", mdBuilder.String(), "ipymd2md.*.md") 53 | if err != nil { 54 | fmt.Println(err) 55 | } 56 | //fmt.Println(_file) 57 | } 58 | return mdBuilder.String(), nil 59 | } 60 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # ============================================ 2 | # 🛠️ Stage 1: Build etlx from Source 3 | # ============================================ 4 | FROM golang:1.24 AS builder 5 | 6 | WORKDIR /app 7 | 8 | # Install build deps if needed 9 | RUN apt-get update && apt-get install -y \ 10 | build-essential \ 11 | gcc \ 12 | g++ \ 13 | unixodbc \ 14 | unixodbc-dev \ 15 | && rm -rf /var/lib/apt/lists/* 16 | 17 | ENV CGO_ENABLED=1 18 | 19 | # Clone etlx repository 20 | RUN git clone --depth=1 https://github.com/realdatadriven/etlx.git . 21 | 22 | # Build etlx binary 23 | RUN go build -o etlx ./cmd 24 | 25 | # ============================================ 26 | # 🚀 Stage 2: Runtime Image 27 | # ============================================ 28 | FROM ubuntu:24.04 29 | 30 | RUN apt-get update && apt-get install -y \ 31 | ca-certificates \ 32 | unixodbc \ 33 | && rm -rf /var/lib/apt/lists/* 34 | 35 | WORKDIR /app 36 | 37 | # Copy compiled binary 38 | COPY --from=builder /app/etlx /usr/local/bin/etlx 39 | 40 | # Ensure binary is executable 41 | RUN chmod +x /usr/local/bin/etlx 42 | 43 | # Volume mounts (db/config/env handled externally) 44 | VOLUME ["/app/database"] 45 | 46 | # Entry script for env/config handling 47 | RUN echo '#!/bin/bash\n\ 48 | set -e\n\ 49 | \n\ 50 | # Load env if mounted\n\ 51 | if [ -f "/app/.env" ]; then\n\ 52 | echo "Loading environment variables from /app/.env"\n\ 53 | set -a\n\ 54 | source /app/.env\n\ 55 | set +a\n\ 56 | fi\n\ 57 | \n\ 58 | # If first arg is empty, show help\n\ 59 | if [ $# -eq 0 ]; then\n\ 60 | echo "Usage: docker run etlx [command] [args]"\n\ 61 | echo "Run \\"docker run etlx help\\" for full CLI usage."\n\ 62 | exit 0\n\ 63 | fi\n\ 64 | \n\ 65 | echo "Executing: etlx $@"\n\ 66 | exec /usr/local/bin/etlx "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh 67 | 68 | ENTRYPOINT ["/entrypoint.sh"] 69 | CMD [] 70 | 71 | # ============================================ 72 | # 📝 Usage Instructions 73 | #docker build --no-cache -t etlx:latest . 74 | #docker run -v ./.env:/app/.env:ro -v ./config.md:/app/config.md:ro -v ./database:/app/database etlx:latest --config /app/config.md 75 | #podman tag etlx:latest docker.io/realdatadriven/etlx:latest 76 | #podman tag etlx:latest docker.io/realdatadriven/etlx:v1.4.7 77 | #podman login docker.io 78 | #podman push docker.io/realdatadriven/etlx:latest 79 | #podman push docker.io/realdatadriven/etlx:v1.4.7 -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: v2Build and Push Docker image to Docker Hub 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | deploy: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | 14 | - name: Extract Version 15 | id: version_step 16 | run: | 17 | if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then 18 | VERSION="${GITHUB_REF#refs/tags/v}" 19 | else 20 | VERSION="${GITHUB_REF##*/}" # fallback to branch name 21 | fi 22 | VERSION_TAG="${{ github.repository }}:$VERSION" 23 | LATEST_TAG="${{ github.repository }}:latest" 24 | 25 | echo "version=$VERSION" >> "$GITHUB_OUTPUT" 26 | echo "version_tag=${VERSION_TAG,,}" >> "$GITHUB_OUTPUT" 27 | echo "latest_tag=${LATEST_TAG,,}" >> "$GITHUB_OUTPUT" 28 | 29 | - name: Debug Version Info 30 | run: | 31 | echo "version: ${{ steps.version_step.outputs.version }}" 32 | echo "version_tag: ${{ steps.version_step.outputs.version_tag }}" 33 | echo "latest_tag: ${{ steps.version_step.outputs.latest_tag }}" 34 | 35 | 36 | 37 | 38 | - name: Set up QEMU 39 | uses: docker/setup-qemu-action@v3 40 | 41 | - name: Set up Docker Buildx 42 | uses: docker/setup-buildx-action@v3 43 | 44 | - name: Login to Docker Hub 45 | uses: docker/login-action@v3 46 | with: 47 | username: ${{ secrets.DOCKERHUB_USERNAME }} 48 | password: ${{ secrets.DOCKERHUB_TOKEN }} 49 | 50 | # - name: Prepare registry tags 51 | # run: | 52 | # echo "VERSION_TAG=$(echo ${{ steps.version_step.outputs.version_tag }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV 53 | # echo "LATEST_TAG=$(echo ${{ steps.version_step.outputs.latest_tag }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV 54 | 55 | - name: Prepare registry tags 56 | run: | 57 | echo "VERSION_TAG=${{ steps.version_step.outputs.version_tag }}" >> $GITHUB_ENV 58 | echo "LATEST_TAG=${{ steps.version_step.outputs.latest_tag }}" >> $GITHUB_ENV 59 | 60 | - name: Build and push Docker image 61 | uses: docker/build-push-action@v5 62 | with: 63 | context: . 64 | push: true 65 | tags: | 66 | ${{ env.VERSION_TAG }} 67 | ${{ env.LATEST_TAG }} 68 | build-args: | 69 | VERSION=${{ steps.version_step.outputs.version }} 70 | 71 | -------------------------------------------------------------------------------- /internal/etlx/http.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "os" 9 | "strings" 10 | ) 11 | 12 | // HTTPAction executes HTTP uploads and downloads based on the mode and params 13 | func (etlx *ETLX) HTTPAction(mode string, params map[string]any) error { 14 | url, _ := params["url"].(string) 15 | method := "GET" 16 | if m, ok := params["method"].(string); ok { 17 | method = strings.ToUpper(m) 18 | } 19 | headers, _ := params["headers"].(map[string]any) 20 | contentType, _ := params["content_type"].(string) 21 | // bodyParams, _ := params["body"].(map[string]any) 22 | source, _ := params["source"].(string) 23 | target, _ := params["target"].(string) 24 | if url == "" { 25 | return fmt.Errorf("missing 'url' parameter") 26 | } 27 | client := &http.Client{} 28 | switch mode { 29 | case "download": 30 | req, err := http.NewRequest(method, url, nil) 31 | if err != nil { 32 | return fmt.Errorf("creating request failed: %w", err) 33 | } 34 | for k, v := range headers { 35 | req.Header.Set(k, fmt.Sprintf("%v", v)) 36 | } 37 | resp, err := client.Do(req) 38 | if err != nil { 39 | return fmt.Errorf("HTTP request failed: %w", err) 40 | } 41 | defer resp.Body.Close() 42 | if resp.StatusCode >= 300 { 43 | return fmt.Errorf("HTTP request returned status: %s", resp.Status) 44 | } 45 | outFile, err := os.Create(target) 46 | if err != nil { 47 | return fmt.Errorf("creating output file failed: %w", err) 48 | } 49 | defer outFile.Close() 50 | _, err = io.Copy(outFile, resp.Body) 51 | if err != nil { 52 | return fmt.Errorf("saving response failed: %w", err) 53 | } 54 | case "upload": 55 | file, err := os.Open(source) 56 | if err != nil { 57 | return fmt.Errorf("opening source file failed: %w", err) 58 | } 59 | defer file.Close() 60 | body := &bytes.Buffer{} 61 | _, err = io.Copy(body, file) 62 | if err != nil { 63 | return fmt.Errorf("copying file to body failed: %w", err) 64 | } 65 | req, err := http.NewRequest(method, url, body) 66 | if err != nil { 67 | return fmt.Errorf("creating HTTP request failed: %w", err) 68 | } 69 | if contentType != "" { 70 | req.Header.Set("Content-Type", contentType) 71 | } 72 | for k, v := range headers { 73 | req.Header.Set(k, fmt.Sprintf("%v", v)) 74 | } 75 | resp, err := client.Do(req) 76 | if err != nil { 77 | return fmt.Errorf("HTTP upload failed: %w", err) 78 | } 79 | defer resp.Body.Close() 80 | if resp.StatusCode >= 300 { 81 | return fmt.Errorf("upload returned status: %s", resp.Status) 82 | } 83 | default: 84 | return fmt.Errorf("unsupported http action: %s", mode) 85 | } 86 | return nil 87 | } 88 | -------------------------------------------------------------------------------- /internal/etlx/compress.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "archive/zip" 5 | "compress/gzip" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | ) 10 | 11 | func (etlx *ETLX) CompressToZip(files []string, output string) error { 12 | outFile, err := os.Create(output) 13 | if err != nil { 14 | return err 15 | } 16 | defer outFile.Close() 17 | zipWriter := zip.NewWriter(outFile) 18 | defer zipWriter.Close() 19 | for _, file := range files { 20 | inFile, err := os.Open(file) 21 | if err != nil { 22 | return err 23 | } 24 | defer inFile.Close() 25 | 26 | w, err := zipWriter.Create(filepath.Base(file)) 27 | if err != nil { 28 | return err 29 | } 30 | _, err = io.Copy(w, inFile) 31 | if err != nil { 32 | return err 33 | } 34 | } 35 | return nil 36 | } 37 | 38 | func (etlx *ETLX) CompressToGZ(input string, output string) error { 39 | inFile, err := os.Open(input) 40 | if err != nil { 41 | return err 42 | } 43 | defer inFile.Close() 44 | outFile, err := os.Create(output) 45 | if err != nil { 46 | return err 47 | } 48 | defer outFile.Close() 49 | gzWriter := gzip.NewWriter(outFile) 50 | defer gzWriter.Close() 51 | _, err = io.Copy(gzWriter, inFile) 52 | return err 53 | } 54 | 55 | // Unzip a .zip archive to a specified directory 56 | func (etlx *ETLX) Unzip(zipPath string, destDir string) error { 57 | r, err := zip.OpenReader(zipPath) 58 | if err != nil { 59 | return err 60 | } 61 | defer r.Close() 62 | 63 | for _, f := range r.File { 64 | outPath := filepath.Join(destDir, f.Name) 65 | if f.FileInfo().IsDir() { 66 | os.MkdirAll(outPath, os.ModePerm) 67 | continue 68 | } 69 | 70 | rc, err := f.Open() 71 | if err != nil { 72 | return err 73 | } 74 | defer rc.Close() 75 | 76 | if err := os.MkdirAll(filepath.Dir(outPath), os.ModePerm); err != nil { 77 | return err 78 | } 79 | 80 | outFile, err := os.Create(outPath) 81 | if err != nil { 82 | return err 83 | } 84 | defer outFile.Close() 85 | 86 | if _, err = io.Copy(outFile, rc); err != nil { 87 | return err 88 | } 89 | } 90 | return nil 91 | } 92 | 93 | // Decompress a GZ file into the original file 94 | func (etlx *ETLX) DecompressGZ(gzPath string, outputPath string) error { 95 | inFile, err := os.Open(gzPath) 96 | if err != nil { 97 | return err 98 | } 99 | defer inFile.Close() 100 | 101 | gzReader, err := gzip.NewReader(inFile) 102 | if err != nil { 103 | return err 104 | } 105 | defer gzReader.Close() 106 | 107 | outFile, err := os.Create(outputPath) 108 | if err != nil { 109 | return err 110 | } 111 | defer outFile.Close() 112 | 113 | _, err = io.Copy(outFile, gzReader) 114 | return err 115 | } 116 | -------------------------------------------------------------------------------- /examples/hf.md: -------------------------------------------------------------------------------- 1 | # ETL 2 | 3 | The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/overview, "httpfs") extension introduces support for the hf:// protocol to access data sets hosted in [Hugging Face](https://huggingface.co "Hugging Face Homepage") repositories. See the [announcement blog post](https://duckdb.org/2024/05/29/access-150k-plus-datasets-from-hugging-face-with-duckdb.html, "announcement blog post") for details. 4 | 5 | ```yaml metadata 6 | name: HF_EXTRACT 7 | description: "Example extrating from hf to a local sqlite3 file" 8 | connection: "duckdb:" 9 | active: true 10 | ``` 11 | 12 | ## HF_EXTRACT 13 | 14 | ```yaml metadata 15 | name: HF_EXTRACT 16 | description: "Example extrating from hf to a local sqlite3 file" 17 | table: HF_EXTRACT 18 | load_conn: "duckdb:" 19 | load_before_sql: 20 | - load_extentions 21 | - attach_db 22 | - create_hf_token 23 | load_sql: load_query 24 | load_after_sql: detach_db 25 | drop_sql: drop_sql 26 | clean_sql: clean_sql 27 | rows_sql: nrows 28 | active: true 29 | ``` 30 | 31 | ```sql 32 | -- load_extentions 33 | INSTALL sqlite; 34 | LOAD sqlite; 35 | INSTALL httpfs; 36 | LOAD httpfs; 37 | ``` 38 | 39 | ```sql 40 | -- attach_db 41 | ATTACH 'examples/HF_EXTRACT.db' AS "DB" (TYPE SQLITE) 42 | ``` 43 | 44 | Configure your Hugging Face Token in the DuckDB Secrets Manager to access private or gated datasets. First, [visit Hugging Face Settings – Tokens](https://huggingface.co/settings/tokens) to obtain your access token. Second, set it in your DuckDB session using [DuckDB’s Secrets Manager](https://duckdb.org/docs/configuration/secrets_manager.html). DuckDB supports two providers for managing secrets: 45 | 46 | ```sql 47 | -- create_hf_token 48 | CREATE SECRET hf_token ( 49 | TYPE HUGGINGFACE, 50 | TOKEN '@HF_TOKEN' 51 | ); 52 | ``` 53 | 54 | ```sql 55 | -- detach_db 56 | DETACH "DB"; 57 | ``` 58 | 59 | ```sql 60 | -- load_query 61 | CREATE OR REPLACE TABLE "DB"."
" AS 62 | SELECT * 63 | FROM 'hf://datasets/datasets-examples/doc-formats-csv-1/data.csv' 64 | LIMIT 10 65 | ``` 66 | 67 | ```sql 68 | -- load_query2 69 | CREATE OR REPLACE TABLE "DB"."
" AS 70 | SELECT * 71 | FROM 'hf://datasets/horus-ai-labs/WebInstructSub-150K/data/train-00000-of-00001.parquet' 72 | ``` 73 | 74 | ```sql 75 | -- drop_sql 76 | DROP TABLE IF EXISTS "DB"."
"; 77 | ``` 78 | 79 | ```sql 80 | -- clean_sql 81 | DELETE FROM "DB"."
"; 82 | ``` 83 | 84 | ```sql 85 | -- nrows 86 | SELECT COUNT(*) AS "nrows" FROM "DB"."
" 87 | ``` 88 | 89 | ```shell 90 | bin/etlx --config examples/hf.md 91 | ``` 92 | 93 | # LOGS 94 | 95 | ```yaml metadata 96 | name: LOGS 97 | description: "Example saving logs" 98 | table: _logs 99 | connection: "duckdb:" 100 | before_sql: 101 | - load_extentions 102 | - attach_db 103 | save_log_sql: load_query 104 | after_sql: detach_db 105 | active: true 106 | ``` 107 | 108 | ```sql 109 | -- load_extentions 110 | INSTALL Sqlite; 111 | LOAD Sqlite; 112 | INSTALL json; 113 | LOAD json; 114 | ``` 115 | 116 | ```sql 117 | -- attach_db 118 | ATTACH 'examples/HF_EXTRACT.db' AS "DB" (TYPE SQLITE) 119 | ``` 120 | 121 | ```sql 122 | -- detach_db 123 | DETACH "DB"; 124 | ``` 125 | 126 | ```sql 127 | -- load_query 128 | CREATE OR REPLACE TABLE "DB"."
" AS 129 | SELECT * 130 | FROM read_json(''); 131 | ``` 132 | -------------------------------------------------------------------------------- /.github/workflows/windows.cc.yaml: -------------------------------------------------------------------------------- 1 | name: Windows Build with CGO and Cross-Compiler 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build-windows: 8 | runs-on: windows-latest 9 | steps: 10 | # Step 1: Checkout the code 11 | - name: Checkout Code 12 | uses: actions/checkout@v3 13 | 14 | # Step 2: Set up Go environment 15 | - name: Setup Go 16 | uses: actions/setup-go@v4 17 | with: 18 | go-version: '1.23' 19 | 20 | # Step 3: Download DuckDB Precompiled Library 21 | - name: Download DuckDB Library 22 | run: | 23 | $version = "v1.1.3" 24 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-amd64.zip" 25 | $destinationPath = "$(Get-Location)\duckdb" 26 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip" 27 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath 28 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-arm64.zip" 29 | $destinationPath = "$(Get-Location)\duckdbarm64" 30 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip" 31 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath 32 | Write-Host "DuckDB library extracted to $destinationPath" 33 | 34 | # Step 4: Install MinGW for Cross-Compilation 35 | - name: Install MinGW 36 | run: | 37 | choco install mingw -y 38 | Write-Host "MinGW installed for cross-compilation" 39 | 40 | # Step 5: Set Environment Variables 41 | - name: Set Environment Variables 42 | run: | 43 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV 44 | echo "CC=x86_64-w64-mingw32-gcc" >> $env:GITHUB_ENV 45 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdb\" >> $env:GITHUB_ENV 46 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdb\ -lduckdb" >> $env:GITHUB_ENV 47 | 48 | # Step 6: Verify DuckDB Library 49 | - name: Verify DuckDB Library 50 | run: | 51 | $libPath = "$(Get-Location)\duckdb\" 52 | if (!(Test-Path "$libPath\duckdb.lib")) { 53 | Write-Error "duckdb.lib not found in $libPath" 54 | } 55 | Write-Host "duckdb.lib found in $libPath" 56 | 57 | # Step 7: Build the Application 58 | - name: Build Windows Binary 59 | run: | 60 | mkdir dist 61 | go build -o dist/etlx-windows-amd64.exe ./cmd/main.go 62 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV 63 | echo "CC=x86_64-w64-mingw32-gcc" >> $env:GITHUB_ENV 64 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdbarm64\" >> $env:GITHUB_ENV 65 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdbarm64\ -lduckdb" >> $env:GITHUB_ENV 66 | echo "GOARCH=arm64" >> $env:GITHUB_ENV 67 | go build -o dist/etlx-windows-arm64.exe ./cmd/main.go 68 | 69 | # Step 8: Upload Build Logs for Debugging (if Build Fails) 70 | - name: Upload Logs 71 | if: failure() 72 | uses: actions/upload-artifact@v3 73 | with: 74 | name: build-logs 75 | path: ${{ github.workspace }} 76 | 77 | # Step 9: Upload Built Binary for Testing (if Successful) 78 | - name: Upload Binary 79 | if: success() 80 | uses: actions/upload-artifact@v3 81 | with: 82 | name: windows-binary 83 | path: dist/etlx-windows-amd64.exe 84 | -------------------------------------------------------------------------------- /examples/ducklake.md: -------------------------------------------------------------------------------- 1 | # GENERATE_SAMPLE_DATA 2 | 3 | ex source [https://www.youtube.com/watch?v=NbnEVFAtx9o&ab_channel=JoeReis](DuckLake w/ Hannes Mühleisen - Practical Data Lunch and Learn) 4 | 5 | ```yaml metadata 6 | name: GENERATE_SAMPLE_DATA 7 | runs_as: SCRIPTS 8 | description: Here we are just going to generate a sample databese for the exercise mimicing a real traditional database 9 | connection: "duckdb:database/sample.duckdb" 10 | active: false 11 | ``` 12 | 13 | ## SAMPLE_DB 14 | 15 | ```yaml metadata 16 | name: SAMPLE_DB 17 | description: Generate sample data 18 | connection: "duckdb:database/sample.duckdb" 19 | script_sql: CALL dbgen(sf = 1) 20 | active: true 21 | ``` 22 | 23 | # DUCKLAKE 24 | 25 | ```yaml metadata 26 | name: GENERATE_SAMPLE_DATA 27 | runs_as: ETL 28 | description: Data lake exemple 29 | connection: "'ducklake:sqlite:database/dl_metadata.sqlite' AS dl (DATA_PATH 'database/dl/')" 30 | active: true 31 | ``` 32 | 33 | ## lineitem 34 | 35 | ```yaml metadata 36 | name: lineitem 37 | description: lineitem 38 | table: lineitem 39 | database: "ATTACH 'ducklake:sqlite:database/dl_metadata.sqlite' AS dl (DATA_PATH 'database/dl/')" 40 | load_conn: "duckdb:" 41 | load_before_sql: 42 | - INSTALL ducklake -- OR FORCE INSTALL ducklake FROM core_nightly 43 | - INSTALL sqlite 44 | - "ATTACH 'ducklake:sqlite:database/dl_metadata.sqlite' AS dl (DATA_PATH 'database/dl/')" 45 | - ATTACH 'database/sample.duckdb' AS S 46 | load_sql: INSERT INTO dl."
" BY NAME SELECT * FROM S."
" 47 | load_on_err_match_patt: '(?i)table.+with.+name.+(\w+).+does.+not.+exist' 48 | load_on_err_match_sql: CREATE TABLE dl."
" AS SELECT * FROM S."
" 49 | load_after_sql: 50 | - DETACH S 51 | - DETACH dl 52 | drop_sql: DROP TABLE dl."
" 53 | clean_sql: DELETE FROM dl."
" 54 | rows_sql: SELECT COUNT(*) AS "nrows" FROM dl."
" 55 | active: true 56 | ``` 57 | 58 | # ETLX_LOGS 59 | 60 | ```yaml metadata 61 | name: ETLX_LOGS 62 | runs_as: LOGS 63 | description: Logging 64 | table: logs 65 | database: 'sqlite3:database/dl_etlx_logs.db' 66 | connection: "duckdb:" 67 | before_sql: 68 | - "LOAD Sqlite" 69 | - "ATTACH 'database/dl_etlx_logs.db' AS l (TYPE SQLITE)" 70 | - "USE l" 71 | - "LOAD json" 72 | - "get_dyn_queries[create_missing_columns](ATTACH 'database/dl_etlx_logs.db' AS l (TYPE SQLITE),DETACH l)" 73 | save_log_sql: | 74 | INSERT INTO "l"."
" BY NAME 75 | SELECT * 76 | FROM READ_JSON(''); 77 | save_on_err_patt: '(?i)table.+with.+name.+(\w+).+does.+not.+exist' 78 | save_on_err_sql: | 79 | CREATE TABLE "l"."
" AS 80 | SELECT * 81 | FROM READ_JSON(''); 82 | after_sql: 83 | - 'USE memory' 84 | - 'DETACH "l"' 85 | active: true 86 | ``` 87 | 88 | ```sql 89 | -- create_missing_columns 90 | WITH source_columns AS ( 91 | SELECT "column_name", "column_type" 92 | FROM (DESCRIBE SELECT * FROM READ_JSON('')) 93 | ), 94 | destination_columns AS ( 95 | SELECT "column_name", "data_type" as "column_type" 96 | FROM "duckdb_columns" 97 | WHERE "table_name" = '
' 98 | ), 99 | missing_columns AS ( 100 | SELECT "s"."column_name", "s"."column_type" 101 | FROM source_columns "s" 102 | LEFT JOIN destination_columns "d" ON "s"."column_name" = "d"."column_name" 103 | WHERE "d"."column_name" IS NULL 104 | ) 105 | SELECT 'ALTER TABLE "l"."
" ADD COLUMN "' || "column_name" || '" ' || "column_type" || ';' AS "query" 106 | FROM missing_columns 107 | WHERE (SELECT COUNT(*) FROM destination_columns) > 0; 108 | ``` -------------------------------------------------------------------------------- /internal/etlx/run_logs.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "regexp" 7 | "time" 8 | ) 9 | 10 | func (etlx *ETLX) RunLOGS(dateRef []time.Time, conf map[string]any, logs []map[string]any, keys ...string) ([]map[string]any, error) { 11 | key := "LOGS" 12 | if len(keys) > 0 && keys[0] != "" { 13 | key = keys[0] 14 | } 15 | // fmt.Println(key, dateRef) 16 | var processData []map[string]any 17 | // Check if the input conf is nil or empty 18 | if conf == nil { 19 | conf = etlx.Config 20 | } 21 | data, ok := conf[key].(map[string]any) 22 | if !ok { 23 | return nil, fmt.Errorf("missing or invalid %s section", key) 24 | } 25 | // Extract metadata 26 | metadata, ok := data["metadata"].(map[string]any) 27 | if !ok { 28 | return nil, fmt.Errorf("missing metadata in %s section", key) 29 | } 30 | if active, okActive := metadata["active"]; okActive { 31 | if !active.(bool) { 32 | return nil, fmt.Errorf("deactivated %s", key) 33 | } 34 | } 35 | beforeSQL, okBefore := metadata["before_sql"] 36 | afterSQL, okAfter := metadata["after_sql"] 37 | saveSQL, okSave := metadata["save_log_sql"] 38 | errPatt, okErrPatt := metadata["save_on_err_patt"] 39 | errSQL, okErrSQL := metadata["save_on_err_sql"] 40 | tmpDir := "" 41 | if _, ok := metadata["tmp_dir"].(string); ok { 42 | tmpDir = metadata["tmp_dir"].(string) 43 | } 44 | conn, okCon := metadata["connection"] 45 | if !okCon { 46 | return nil, fmt.Errorf("%s err no connection defined", key) 47 | } 48 | dbConn, err := etlx.GetDB(conn.(string)) 49 | if err != nil { 50 | return nil, fmt.Errorf("%s ERR: connecting to %s in : %s", key, conn, err) 51 | } 52 | defer dbConn.Close() 53 | jsonData, err := json.MarshalIndent(logs, "", " ") 54 | if err != nil { 55 | return nil, fmt.Errorf("error converting logs to JSON: %v", err) 56 | } 57 | fname, err := etlx.TempFIle(tmpDir, string(jsonData), "logs.*.json") 58 | // println(fname, string(jsonData)) 59 | if err != nil { 60 | return nil, fmt.Errorf("error saving logs to JSON: %v", err) 61 | } 62 | // QUERIES TO RUN AT beginning 63 | if okBefore { 64 | err = etlx.ExecuteQuery(dbConn, beforeSQL, data, fname, "", dateRef) 65 | if err != nil { 66 | return nil, fmt.Errorf("%s: Before error: %s", key, err) 67 | } 68 | } 69 | // fmt.Println(key, sql) 70 | if saveSQL != "" && okSave { 71 | // fmt.Println(data[saveSQL.(string)]) 72 | err = etlx.ExecuteQuery(dbConn, saveSQL, data, fname, "", dateRef) 73 | if err != nil { 74 | _err_by_pass := false 75 | if okErrPatt && errPatt != nil && okErrSQL && errSQL != nil { 76 | //fmt.Println(onErrPatt.(string), onErrSQL.(string)) 77 | re, regex_err := regexp.Compile(errPatt.(string)) 78 | if regex_err != nil { 79 | return nil, fmt.Errorf("%s ERR: fallback regex matching the error failed to compile: %s", key, regex_err) 80 | } else if re.MatchString(string(err.Error())) { 81 | err = etlx.ExecuteQuery(dbConn, errSQL, data, fname, "", dateRef) 82 | if err != nil { 83 | return nil, fmt.Errorf("%s ERR: main: %s", key, err) 84 | } else { 85 | _err_by_pass = true 86 | } 87 | } 88 | } 89 | if !_err_by_pass { 90 | return nil, fmt.Errorf("%s ERR: main: %s", key, err) 91 | } 92 | } 93 | } 94 | // QUERIES TO RUN AT THE END 95 | if okAfter { 96 | err = etlx.ExecuteQuery(dbConn, afterSQL, data, fname, "", dateRef) 97 | if err != nil { 98 | return nil, fmt.Errorf("%s: After error: %s", key, err) 99 | } 100 | } 101 | return processData, nil 102 | } 103 | -------------------------------------------------------------------------------- /internal/etlx/sftp.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "time" 8 | 9 | "github.com/pkg/sftp" 10 | "golang.org/x/crypto/ssh" 11 | ) 12 | 13 | // getHostKey loads and parses the host public key 14 | func getHostKey(path string) (ssh.PublicKey, error) { 15 | hostKeyBytes, err := os.ReadFile(path) 16 | if err != nil { 17 | return nil, fmt.Errorf("failed to read host key file: %w", err) 18 | } 19 | hostKey, _, _, _, err := ssh.ParseAuthorizedKey(hostKeyBytes) 20 | if err != nil { 21 | return nil, fmt.Errorf("failed to parse host key: %w", err) 22 | } 23 | return hostKey, nil 24 | } 25 | 26 | // runSFTPActionWithFixedHostKey uploads or downloads files via SFTP with host key validation 27 | func (etlx *ETLX) SFTPActionWithFixedHostKey(mode string, params map[string]any) error { 28 | // Extract and validate required params 29 | host, _ := params["host"].(string) 30 | user, _ := params["user"].(string) 31 | password, _ := params["password"].(string) 32 | source, _ := params["source"].(string) 33 | target, _ := params["target"].(string) 34 | hostKeyPath, _ := params["host_key"].(string) 35 | port := 22 36 | if p, ok := params["port"].(int); ok { 37 | port = p 38 | } 39 | if host == "" || user == "" || password == "" || source == "" || target == "" || hostKeyPath == "" { 40 | return fmt.Errorf("missing required SFTP parameters: host, user, password, source, target, host_key") 41 | } 42 | host = etlx.ReplaceEnvVariable(host) 43 | user = etlx.ReplaceEnvVariable(user) 44 | password = etlx.ReplaceEnvVariable(password) 45 | // Get host key for validation 46 | hostKey, err := getHostKey(hostKeyPath) 47 | if err != nil { 48 | return fmt.Errorf("could not load host key: %w", err) 49 | } 50 | 51 | // Create SSH config 52 | config := &ssh.ClientConfig{ 53 | User: user, 54 | Auth: []ssh.AuthMethod{ssh.Password(password)}, 55 | HostKeyCallback: ssh.FixedHostKey(hostKey), 56 | Timeout: 5 * time.Second, 57 | } 58 | 59 | // Connect 60 | addr := fmt.Sprintf("%s:%d", host, port) 61 | conn, err := ssh.Dial("tcp", addr, config) 62 | if err != nil { 63 | return fmt.Errorf("SSH dial failed: %w", err) 64 | } 65 | defer conn.Close() 66 | 67 | // Create SFTP client 68 | client, err := sftp.NewClient(conn) 69 | if err != nil { 70 | return fmt.Errorf("SFTP client creation failed: %w", err) 71 | } 72 | defer client.Close() 73 | 74 | switch mode { 75 | case "upload": 76 | srcFile, err := os.Open(source) 77 | if err != nil { 78 | return fmt.Errorf("could not open source file: %w", err) 79 | } 80 | defer srcFile.Close() 81 | 82 | dstFile, err := client.Create(target) 83 | if err != nil { 84 | return fmt.Errorf("could not create remote file: %w", err) 85 | } 86 | defer dstFile.Close() 87 | 88 | _, err = io.Copy(dstFile, srcFile) 89 | if err != nil { 90 | return fmt.Errorf("upload failed: %w", err) 91 | } 92 | case "download": 93 | srcFile, err := client.Open(source) 94 | if err != nil { 95 | return fmt.Errorf("could not open remote file: %w", err) 96 | } 97 | defer srcFile.Close() 98 | 99 | dstFile, err := os.Create(target) 100 | if err != nil { 101 | return fmt.Errorf("could not create local file: %w", err) 102 | } 103 | defer dstFile.Close() 104 | 105 | _, err = io.Copy(dstFile, srcFile) 106 | if err != nil { 107 | return fmt.Errorf("download failed: %w", err) 108 | } 109 | default: 110 | return fmt.Errorf("unsupported SFTP action: %s", mode) 111 | } 112 | 113 | return nil 114 | } 115 | -------------------------------------------------------------------------------- /internal/etlx/build_query.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // Builds query from the query doc MD config 8 | // 9 | // Input: 10 | // 11 | // -key: Represnting the markdown Level 1 Heading where the query begins 12 | // 13 | // Output: 14 | // 15 | // -sql: The SQL query generated 16 | // -query_parts: The query parts parsed from the md config input 17 | // -field_orders: The order of the fields in the parts 18 | // -error: Error returned in case something goes wrong 19 | func (etlx *ETLX) QueryBuilder(conf map[string]any, keys ...string) (string, map[string]any, []string, error) { // dateRef []time.Time, extraConf map[string]any, 20 | key := "QUERY_DOC" 21 | if len(keys) > 0 && keys[0] != "" { 22 | key = keys[0] 23 | } 24 | // Check if the input conf is nil or empty 25 | if conf == nil { 26 | conf = etlx.Config 27 | } 28 | data, ok := conf[key].(map[string]any) 29 | if !ok { 30 | return "", nil, nil, fmt.Errorf("missing or invalid %s section", key) 31 | } 32 | // Extract metadata 33 | fields, ok := data["FIELDS"].(map[string]any) 34 | if !ok { 35 | fields = data 36 | } 37 | query_parts := map[string]interface{}{} 38 | _fields_order := []string{} 39 | for key2, value := range fields { 40 | if key2 == "metadata" || key2 == "__order" || key2 == "order" { 41 | continue 42 | } 43 | _field := value.(map[string]any) 44 | field_metadata, ok := _field["metadata"].(map[string]any) 45 | //fmt.Println(1, field_metadata, len(field_metadata)) 46 | if !ok { 47 | // return "", nil, nil, fmt.Errorf("missing metadata in query %s and field %s", key, _field) 48 | field_metadata = map[string]any{ 49 | "name": key2, 50 | "description": key2, 51 | } 52 | } else if len(field_metadata) == 0 { 53 | field_metadata = map[string]any{ 54 | "name": key2, 55 | "description": key2, 56 | } 57 | } 58 | _fields_order = append(_fields_order, field_metadata["name"].(string)) 59 | active, ok := field_metadata["active"].(bool) 60 | if !ok { 61 | active = true 62 | } 63 | query_parts[field_metadata["name"].(string)] = map[string]any{ 64 | "name": field_metadata["name"], 65 | "desc": field_metadata["description"], 66 | "cte": _field["cte"], 67 | "select": _field["select"], 68 | "from": _field["from"], 69 | "join": _field["join"], 70 | "where": _field["where"], 71 | "group_by": _field["group_by"], 72 | "order_by": _field["order_by"], 73 | "having": _field["having"], 74 | "window": _field["window"], 75 | "active": active, 76 | "key": key, 77 | "metadata": field_metadata, 78 | } 79 | } 80 | __order, ok := data["__order"].([]any) 81 | //fmt.Printf("%s -> %v, %v, %t", key, ok, data["__order"], data["__order"]) 82 | if ok { 83 | _fields_order = []string{} 84 | for _, o := range __order { 85 | if _, ok := o.(string); ok { 86 | _field_data, _ok := data[o.(string)].(map[string]any) 87 | if _ok { 88 | _metadata, _ok := _field_data["metadata"].(map[string]any) 89 | if _ok { 90 | _name, _ok := _metadata["name"].(string) 91 | if _ok { 92 | _fields_order = append(_fields_order, _name) 93 | } else { 94 | _fields_order = append(_fields_order, o.(string)) 95 | } 96 | } else { 97 | _fields_order = append(_fields_order, o.(string)) 98 | } 99 | } else { 100 | _fields_order = append(_fields_order, o.(string)) 101 | } 102 | } 103 | } 104 | //fmt.Println("QD ORDER:", _fields_order) 105 | } 106 | qd := QueryDoc{ 107 | QueryParts: make(map[string]Field), 108 | FieldOrders: _fields_order, 109 | } 110 | err := qd.SetQueryPartsFromMap(query_parts) 111 | if err != nil { 112 | return "", nil, nil, fmt.Errorf("error setting field: %s", err) 113 | } 114 | _sql := qd.GetQuerySQLFromMap() 115 | //_sql = app.setQueryDate(_sql, date_ref) 116 | //fmt.Println("SQL", _sql) 117 | return _sql, query_parts, _fields_order, nil 118 | } 119 | -------------------------------------------------------------------------------- /internal/etlx/ftp.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "path" 8 | "path/filepath" 9 | "strings" 10 | "time" 11 | 12 | "github.com/jlaffaye/ftp" 13 | ) 14 | 15 | func (etlx *ETLX) FTPUpload(host, port, user, pass, localPath, remotePath string) error { 16 | if port == "" { 17 | port = "21" 18 | } 19 | address := host + ":" + port 20 | conn, err := ftp.Dial(address, ftp.DialWithTimeout(5*time.Second)) 21 | if err != nil { 22 | return fmt.Errorf("failed to dial: %w", err) 23 | } 24 | defer conn.Quit() 25 | if user != "" && pass != "" { 26 | if err := conn.Login(user, pass); err != nil { 27 | return fmt.Errorf("failed to login: %w", err) 28 | } 29 | } 30 | file, err := os.Open(localPath) 31 | if err != nil { 32 | return fmt.Errorf("failed to open local file: %w", err) 33 | } 34 | defer file.Close() 35 | if err := conn.Stor(remotePath, file); err != nil { 36 | return fmt.Errorf("failed to upload: %w", err) 37 | } 38 | return nil 39 | } 40 | 41 | func (etlx *ETLX) FTPDownload(host, port, user, pass, remotePath, localPath string) error { 42 | if port == "" { 43 | port = "21" 44 | } 45 | address := host + ":" + port 46 | conn, err := ftp.Dial(address, ftp.DialWithTimeout(5*time.Second)) 47 | if err != nil { 48 | return fmt.Errorf("failed to dial: %w", err) 49 | } 50 | defer conn.Quit() 51 | if user != "" && pass != "" { 52 | if err := conn.Login(user, pass); err != nil { 53 | return fmt.Errorf("failed to login: %w", err) 54 | } 55 | } 56 | response, err := conn.Retr(remotePath) 57 | if err != nil { 58 | return fmt.Errorf("failed to download file: %w", err) 59 | } 60 | defer response.Close() 61 | outFile, err := os.Create(localPath) 62 | if err != nil { 63 | return fmt.Errorf("failed to create local file: %w", err) 64 | } 65 | defer outFile.Close() 66 | _, err = io.Copy(outFile, response) 67 | if err != nil { 68 | return fmt.Errorf("failed to save file: %w", err) 69 | } 70 | return nil 71 | } 72 | 73 | func globToRegex(glob string) string { 74 | var sb strings.Builder 75 | sb.WriteString("^") 76 | for i := 0; i < len(glob); i++ { 77 | switch glob[i] { 78 | case '*': 79 | sb.WriteString(".*") 80 | case '?': 81 | sb.WriteString(".") 82 | case '.', '(', ')', '+', '|', '^', '$', '[', ']', '{', '}', '\\': 83 | sb.WriteString(`\`) 84 | sb.WriteByte(glob[i]) 85 | default: 86 | sb.WriteByte(glob[i]) 87 | } 88 | } 89 | sb.WriteString("$") 90 | return sb.String() 91 | } 92 | 93 | func (etlx *ETLX) FTPDownloadBatch(host, port, user, pass, remoteDir, pattern, localDir string) error { 94 | if port == "" { 95 | port = "21" 96 | } 97 | address := host + ":" + port 98 | conn, err := ftp.Dial(address, ftp.DialWithTimeout(5*time.Second)) 99 | if err != nil { 100 | return fmt.Errorf("failed to dial: %w", err) 101 | } 102 | defer conn.Quit() 103 | if user != "" && pass != "" { 104 | if err := conn.Login(user, pass); err != nil { 105 | return fmt.Errorf("failed to login: %w", err) 106 | } 107 | } 108 | // List all files in the remote directory 109 | entries, err := conn.List(remoteDir) 110 | if err != nil { 111 | return fmt.Errorf("failed to list remote directory: %w", err) 112 | } 113 | // Ensure local directory exists 114 | if err := os.MkdirAll(localDir, 0755); err != nil { 115 | return fmt.Errorf("failed to create local directory: %w", err) 116 | } 117 | // Loop over files and download matching ones 118 | for _, entry := range entries { 119 | if entry.Type != ftp.EntryTypeFile { 120 | continue // skip non-files 121 | } 122 | matched, err := filepath.Match(globToRegex(pattern), entry.Name) 123 | if err != nil { 124 | return fmt.Errorf("invalid pattern: %w", err) 125 | } 126 | if matched { 127 | remotePath := path.Join(remoteDir, entry.Name) 128 | localPath := filepath.Join(localDir, entry.Name) 129 | fmt.Printf("Downloading: %s → %s\n", remotePath, localPath) 130 | // Download each matching file using the single-file method 131 | err := etlx.FTPDownload(host, port, user, pass, remotePath, localPath) 132 | if err != nil { 133 | return fmt.Errorf("failed to download %s: %w", remotePath, err) 134 | } 135 | } 136 | } 137 | 138 | return nil 139 | } 140 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/realdatadriven/etlx 2 | 3 | go 1.25.0 4 | 5 | toolchain go1.25.5 6 | 7 | require ( 8 | github.com/BurntSushi/toml v1.5.0 9 | github.com/Masterminds/sprig/v3 v3.3.0 10 | github.com/alexbrainman/odbc v0.0.0-20250601004241-49e6b2bc0cf0 11 | github.com/aws/aws-sdk-go-v2 v1.41.0 12 | github.com/aws/aws-sdk-go-v2/config v1.32.5 13 | github.com/aws/aws-sdk-go-v2/credentials v1.19.5 14 | github.com/aws/aws-sdk-go-v2/service/s3 v1.93.2 15 | github.com/duckdb/duckdb-go/v2 v2.5.4 16 | github.com/jlaffaye/ftp v0.2.0 17 | github.com/jmoiron/sqlx v1.4.0 18 | github.com/joho/godotenv v1.5.1 19 | github.com/lib/pq v1.10.9 20 | github.com/mattn/go-sqlite3 v1.14.32 21 | github.com/microsoft/go-mssqldb v1.9.5 22 | github.com/pkg/sftp v1.13.10 23 | github.com/xuri/excelize/v2 v2.10.0 24 | github.com/yuin/goldmark v1.7.13 25 | golang.org/x/crypto v0.46.0 26 | golang.org/x/text v0.32.0 27 | gopkg.in/yaml.v3 v3.0.1 28 | ) 29 | 30 | require ( 31 | github.com/aws/aws-sdk-go-v2/service/signin v1.0.4 // indirect 32 | github.com/duckdb/duckdb-go/arrowmapping v0.0.27 // indirect 33 | github.com/duckdb/duckdb-go/mapping v0.0.27 // indirect 34 | ) 35 | 36 | require ( 37 | dario.cat/mergo v1.0.2 // indirect 38 | github.com/Masterminds/goutils v1.1.1 // indirect 39 | github.com/Masterminds/semver/v3 v3.4.0 // indirect 40 | github.com/apache/arrow-go/v18 v18.5.0 // indirect 41 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect 42 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16 // indirect 43 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.16 // indirect 44 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.16 // indirect 45 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect 46 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 // indirect 47 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect 48 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 // indirect 49 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.16 // indirect 50 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 // indirect 51 | github.com/aws/aws-sdk-go-v2/service/sso v1.30.7 // indirect 52 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12 // indirect 53 | github.com/aws/aws-sdk-go-v2/service/sts v1.41.5 // indirect 54 | github.com/aws/smithy-go v1.24.0 // indirect 55 | github.com/duckdb/duckdb-go-bindings v0.1.24 // indirect 56 | github.com/duckdb/duckdb-go-bindings/darwin-amd64 v0.1.24 // indirect 57 | github.com/duckdb/duckdb-go-bindings/darwin-arm64 v0.1.24 // indirect 58 | github.com/duckdb/duckdb-go-bindings/linux-amd64 v0.1.24 // indirect 59 | github.com/duckdb/duckdb-go-bindings/linux-arm64 v0.1.24 // indirect 60 | github.com/duckdb/duckdb-go-bindings/windows-amd64 v0.1.24 // indirect 61 | github.com/go-viper/mapstructure/v2 v2.4.0 // indirect 62 | github.com/goccy/go-json v0.10.5 // indirect 63 | github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect 64 | github.com/golang-sql/sqlexp v0.1.0 // indirect 65 | github.com/google/flatbuffers v25.9.23+incompatible // indirect 66 | github.com/google/uuid v1.6.0 // indirect 67 | github.com/hashicorp/errwrap v1.1.0 // indirect 68 | github.com/hashicorp/go-multierror v1.1.1 // indirect 69 | github.com/huandu/xstrings v1.5.0 // indirect 70 | github.com/klauspost/compress v1.18.2 // indirect 71 | github.com/klauspost/cpuid/v2 v2.3.0 // indirect 72 | github.com/kr/fs v0.1.0 // indirect 73 | // github.com/duckdb/duckdb-go/arrowmapping v0.0.21 // indirect 74 | // github.com/duckdb/duckdb-go/mapping v0.0.21 // indirect 75 | github.com/mitchellh/copystructure v1.2.0 // indirect 76 | github.com/mitchellh/reflectwalk v1.0.2 // indirect 77 | github.com/pierrec/lz4/v4 v4.1.22 // indirect 78 | github.com/richardlehane/mscfb v1.0.4 // indirect 79 | github.com/richardlehane/msoleps v1.0.4 // indirect 80 | github.com/shopspring/decimal v1.4.0 // indirect 81 | github.com/spf13/cast v1.10.0 // indirect 82 | github.com/tiendc/go-deepcopy v1.7.2 // indirect 83 | github.com/xuri/efp v0.0.1 // indirect 84 | github.com/xuri/nfp v0.0.2-0.20250530014748-2ddeb826f9a9 // indirect 85 | github.com/zeebo/xxh3 v1.0.2 // indirect 86 | golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect 87 | golang.org/x/mod v0.31.0 // indirect 88 | golang.org/x/net v0.48.0 // indirect 89 | golang.org/x/sync v0.19.0 // indirect 90 | golang.org/x/sys v0.39.0 // indirect 91 | golang.org/x/telemetry v0.0.0-20251208220230-2638a1023523 // indirect 92 | golang.org/x/tools v0.40.0 // indirect 93 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect 94 | ) 95 | -------------------------------------------------------------------------------- /internal/etlx/aws.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io" 8 | "os" 9 | 10 | "github.com/aws/aws-sdk-go-v2/aws" 11 | "github.com/aws/aws-sdk-go-v2/config" 12 | "github.com/aws/aws-sdk-go-v2/credentials" 13 | "github.com/aws/aws-sdk-go-v2/service/s3" 14 | 15 | "github.com/realdatadriven/etlx/internal/env" 16 | ) 17 | 18 | // awsConfig returns an AWS config for SDK v2 19 | func (etlx *ETLX) awsConfig(ctx context.Context, AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN string) (aws.Config, error) { 20 | opts := []func(*config.LoadOptions) error{ 21 | config.WithRegion(AWS_REGION), 22 | config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( 23 | AWS_ACCESS_KEY_ID, 24 | AWS_SECRET_ACCESS_KEY, 25 | AWS_SESSION_TOKEN, 26 | )), 27 | } 28 | cfg, err := config.LoadDefaultConfig(ctx, opts...) 29 | if err != nil { 30 | return cfg, fmt.Errorf("failed to load AWS config: %v", err) 31 | } 32 | return cfg, nil 33 | } 34 | 35 | // fileExistsInS3 checks if a file exists in the given S3 bucket 36 | func (etlx *ETLX) FileExistsInS3(ctx context.Context, client *s3.Client, bucket, key string) bool { 37 | _, err := client.HeadObject(ctx, &s3.HeadObjectInput{ 38 | Bucket: aws.String(bucket), 39 | Key: aws.String(key), 40 | }) 41 | return err == nil 42 | } 43 | 44 | func (etlx *ETLX) S3(mode string, params map[string]any) (string, error) { 45 | // Create AWS session 46 | AWS_ACCESS_KEY_ID, ok := params["AWS_ACCESS_KEY_ID"].(string) 47 | if !ok { 48 | AWS_ACCESS_KEY_ID = os.Getenv("AWS_ACCESS_KEY_ID") 49 | } 50 | AWS_SECRET_ACCESS_KEY, ok := params["AWS_SECRET_ACCESS_KEY"].(string) 51 | if !ok { 52 | AWS_SECRET_ACCESS_KEY = os.Getenv("AWS_SECRET_ACCESS_KEY") 53 | } 54 | AWS_SESSION_TOKEN, ok := params["AWS_SESSION_TOKEN"].(string) 55 | if !ok { 56 | AWS_SESSION_TOKEN = os.Getenv("AWS_SESSION_TOKEN") 57 | } 58 | AWS_REGION, ok := params["AWS_REGION"].(string) 59 | if !ok { 60 | AWS_REGION = os.Getenv("AWS_REGION") 61 | } 62 | AWS_ENDPOINT, ok := params["AWS_ENDPOINT"].(string) 63 | if !ok { 64 | AWS_ENDPOINT = os.Getenv("AWS_ENDPOINT") 65 | } 66 | S3_FORCE_PATH_STYLE, ok := params["S3_FORCE_PATH_STYLE"].(bool) 67 | if !ok { 68 | S3_FORCE_PATH_STYLE = env.GetBool("S3_FORCE_PATH_STYLE", false) 69 | } 70 | /*S3_SKIP_SSL_VERIFY, ok := params["S3_SKIP_SSL_VERIFY"].(bool) 71 | if !ok { 72 | S3_SKIP_SSL_VERIFY = env.GetBool("S3_SKIP_SSL_VERIFY", false) 73 | } 74 | S3_DISABLE_SSL, ok := params["S3_DISABLE_SSL"].(bool) 75 | if !ok { 76 | S3_DISABLE_SSL = env.GetBool("S3_DISABLE_SSL", false) 77 | }*/ 78 | ctx := context.Background() 79 | cfg, err := etlx.awsConfig(ctx, AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN) 80 | if err != nil { 81 | return "", fmt.Errorf("failed to create AWS config: %v", err) 82 | } 83 | client := s3.NewFromConfig(cfg, func(o *s3.Options) { 84 | if endpoint := AWS_ENDPOINT; endpoint != "" { 85 | o.BaseEndpoint = aws.String(endpoint) 86 | } 87 | o.UsePathStyle = S3_FORCE_PATH_STYLE 88 | }) 89 | // Define the S3 bucket and key 90 | bucket := params["bucket"].(string) 91 | originalKey := params["key"].(string) 92 | //ext := filepath.Ext(originalKey) 93 | //baseName := originalKey[:len(originalKey)-len(ext)] 94 | // Check if the file already exists and modify the file name if necessary 95 | key := originalKey 96 | for i := 1; etlx.FileExistsInS3(ctx, client, bucket, key); i++ { 97 | //key = fmt.Sprintf("%s_%d%s", baseName, i, ext) 98 | } 99 | if mode == "upload" { 100 | source, _ := params["source"].(string) 101 | file, err := os.Open(source) 102 | if err != nil { 103 | return "", fmt.Errorf("opening source file failed: %w", err) 104 | } 105 | defer file.Close() 106 | // Read file into a buffer to allow seeking 107 | var buffer bytes.Buffer 108 | if _, err := io.Copy(&buffer, file); err != nil { 109 | return "", fmt.Errorf("failed to read file into buffer: %v", err) 110 | } 111 | // Upload file to S3 112 | _, err = client.PutObject(ctx, &s3.PutObjectInput{ 113 | Bucket: aws.String(bucket), 114 | Key: aws.String(key), 115 | Body: bytes.NewReader(buffer.Bytes()), 116 | //ACL: types.ObjectCannedACLPublicRead, // Optional: Set ACL for public access if needed 117 | }) 118 | if err != nil { 119 | return "", fmt.Errorf("failed to upload to S3: %v", err) 120 | } 121 | return key, nil 122 | } else if mode == "download" { 123 | target, _ := params["target"].(string) 124 | resp, err := client.GetObject(ctx, &s3.GetObjectInput{ 125 | Bucket: aws.String(bucket), 126 | Key: aws.String(key), 127 | }) 128 | if err != nil { 129 | return "", fmt.Errorf("failed to get file from S3 %v", err) 130 | } 131 | defer resp.Body.Close() 132 | outFile, err := os.Create(target) 133 | if err != nil { 134 | return "", fmt.Errorf("creating target file failed: %w", err) 135 | } 136 | defer outFile.Close() 137 | _, err = io.Copy(outFile, resp.Body) 138 | if err != nil { 139 | return "", fmt.Errorf("writing to target file failed: %w", err) 140 | } 141 | return key, nil 142 | } else { 143 | return "", fmt.Errorf("%s not suported", mode) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /internal/etlx/mail.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "bytes" 5 | "encoding/base64" 6 | "fmt" 7 | "html/template" 8 | "mime/multipart" 9 | "mime/quotedprintable" 10 | "net/smtp" 11 | "net/textproto" 12 | "os" 13 | "path/filepath" 14 | "strings" 15 | 16 | "github.com/Masterminds/sprig/v3" 17 | ) 18 | 19 | // parseSlice converts an interface{} into a []string safely 20 | func parseSlice(value any) []string { 21 | if value == nil { 22 | return nil 23 | } 24 | slice, ok := value.([]any) 25 | if !ok { 26 | return nil 27 | } 28 | var result []string 29 | for _, v := range slice { 30 | if str, ok := v.(string); ok { 31 | result = append(result, str) 32 | } 33 | } 34 | return result 35 | } 36 | 37 | // renderTemplate processes the HTML template with the provided data 38 | func (etlx *ETLX) RenderTemplate(tmplStr string, data map[string]any) (string, error) { 39 | // fmt.Println(tmplStr) 40 | // Create a FuncMap with some common functions 41 | // funcMap := sprig.FuncMap() 42 | tmpl, err := template.New("tmpl").Funcs(sprig.FuncMap()).Parse(tmplStr) 43 | //tmpl, err := template.New("email").Funcs(funcMap).Parse(tmplStr) 44 | if err != nil { 45 | return "", fmt.Errorf("failed to parse template: %v", err) 46 | } 47 | var buf bytes.Buffer 48 | if err := tmpl.Execute(&buf, data); err != nil { 49 | return "", fmt.Errorf("failed to execute template: %v", err) 50 | } 51 | //fmt.Println(buf.String()) 52 | return buf.String(), nil 53 | } 54 | 55 | // sendEmail sends an email with dynamic HTML content, optional CC, BCC, and attachments 56 | func (etlx *ETLX) SendEmail(data map[string]any) error { 57 | // Load SMTP configuration from environment variables 58 | smtpHost := os.Getenv("SMTP_HOST") 59 | smtpPort := os.Getenv("SMTP_PORT") 60 | smtpUsername := os.Getenv("SMTP_USERNAME") 61 | smtpPassword := os.Getenv("SMTP_PASSWORD") 62 | smtpFrom := os.Getenv("SMTP_FROM") 63 | // Extract fields from data 64 | to := parseSlice(data["to"]) 65 | cc := parseSlice(data["cc"]) 66 | bcc := parseSlice(data["bcc"]) 67 | subject, _ := data["subject"].(string) 68 | bodyTemplate, _ := data["body"].(string) 69 | templateData, _ := data["data"].(map[string]any) 70 | attachments := parseSlice(data["attachments"]) 71 | if len(to) == 0 { 72 | return fmt.Errorf("recipient 'to' field is required") 73 | } 74 | // Render the HTML template with data 75 | body, err := etlx.RenderTemplate(bodyTemplate, templateData) 76 | if err != nil { 77 | return err 78 | } 79 | // SMTP authentication 80 | auth := smtp.PlainAuth("", smtpUsername, smtpPassword, smtpHost) 81 | // Create email buffer 82 | var email bytes.Buffer 83 | writer := multipart.NewWriter(&email) 84 | boundary := writer.Boundary() 85 | // Headers 86 | headers := map[string]string{ 87 | "From": smtpFrom, 88 | "To": strings.Join(to, ", "), 89 | "Subject": subject, 90 | "MIME-Version": "1.0", 91 | "Content-Type": fmt.Sprintf("multipart/mixed; boundary=%s", boundary), 92 | } 93 | if len(cc) > 0 { 94 | headers["Cc"] = strings.Join(cc, ", ") 95 | } 96 | // Write headers 97 | for key, val := range headers { 98 | email.WriteString(fmt.Sprintf("%s: %s\r\n", key, val)) 99 | } 100 | email.WriteString("\r\n") 101 | // Add HTML body 102 | htmlPart, _ := writer.CreatePart(textproto.MIMEHeader{ 103 | "Content-Type": {"text/html; charset=UTF-8"}, 104 | "Content-Transfer-Encoding": {"quoted-printable"}, 105 | }) 106 | qpWriter := quotedprintable.NewWriter(htmlPart) 107 | qpWriter.Write([]byte(body)) 108 | qpWriter.Close() 109 | // Attach files 110 | if len(attachments) > 0 { 111 | for _, attachmentPath := range attachments { 112 | path := "" 113 | if _, okPath := data["path"].(string); okPath { 114 | path = data["path"].(string) 115 | } 116 | file, err := os.Open(fmt.Sprintf("%s/%s", path, attachmentPath)) 117 | if err != nil { 118 | //return fmt.Errorf("failed to open attachment %s: %v", attachmentPath, err) 119 | continue 120 | } 121 | defer file.Close() 122 | // Read file content 123 | fileContent, err := os.ReadFile(fmt.Sprintf("%s/%s", path, attachmentPath)) 124 | if err != nil { 125 | return fmt.Errorf("failed to read attachment %s: %v", attachmentPath, err) 126 | } 127 | // Create attachment part 128 | fileName := filepath.Base(attachmentPath) 129 | attachmentHeader := textproto.MIMEHeader{ 130 | "Content-Type": {"application/octet-stream"}, 131 | "Content-Disposition": {fmt.Sprintf("attachment; filename=\"%s\"", fileName)}, 132 | "Content-Transfer-Encoding": {"base64"}, 133 | } 134 | attachmentPart, _ := writer.CreatePart(attachmentHeader) 135 | // Encode file content as base64 136 | encoded := base64.StdEncoding.EncodeToString(fileContent) 137 | attachmentPart.Write([]byte(encoded)) 138 | } 139 | } 140 | // Close writer 141 | writer.Close() 142 | // Merge recipients 143 | recipients := append(to, append(cc, bcc...)...) 144 | // Send email 145 | serverAddr := smtpHost + ":" + smtpPort 146 | err = smtp.SendMail(serverAddr, auth, smtpUsername, recipients, email.Bytes()) 147 | if err != nil { 148 | return fmt.Errorf("failed to send email: %v", err) 149 | } 150 | return nil 151 | } 152 | -------------------------------------------------------------------------------- /examples/hf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/overview, \"httpfs\") extension introduces support for the hf:// protocol to access data sets hosted in [Hugging Face](https://huggingface.co \"Hugging Face Homepage\") repositories. See the [announcement blog post](https://duckdb.org/2024/05/29/access-150k-plus-datasets-from-hugging-face-with-duckdb.html, \"announcement blog post\") for details." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# ETL" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "```yaml metadata\n", 22 | "name: HF_EXTRACT\n", 23 | "description: \"Example extracting from hf to a local sqlite3 file\"\n", 24 | "connection: \"duckdb:\"\n", 25 | "active: true\n", 26 | "```" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## HF_EXTRACT" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "```yaml metadata\n", 41 | "name: HF_EXTRACT\n", 42 | "description: \"Example extracting from hf to a local sqlite3 file\"\n", 43 | "table: HF_EXTRACT\n", 44 | "load_conn: \"duckdb:\"\n", 45 | "load_before_sql:\n", 46 | " - load_extentions\n", 47 | " - attach_db\n", 48 | " - create_hf_token\n", 49 | "load_sql: load_query\n", 50 | "load_after_sql: detach_db\n", 51 | "drop_sql: drop_sql\n", 52 | "clean_sql: clean_sql\n", 53 | "rows_sql: nrows\n", 54 | "active: true\n", 55 | "```" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "```sql\n", 63 | "-- load_extentions\n", 64 | "INSTALL sqlite;\n", 65 | "LOAD sqlite;\n", 66 | "INSTALL httpfs;\n", 67 | "LOAD httpfs;\n", 68 | "```" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "```sql\n", 76 | "-- attach_db\n", 77 | "ATTACH 'examples/HF_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n", 78 | "```" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "Configure your Hugging Face Token in the DuckDB Secrets Manager to access private or gated datasets. First, [visit Hugging Face Settings – Tokens](https://huggingface.co/settings/tokens) to obtain your access token. Second, set it in your DuckDB session using [DuckDB’s Secrets Manager](https://duckdb.org/docs/configuration/secrets_manager.html). DuckDB supports two providers for managing secrets:" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "```sql\n", 93 | "-- create_hf_token\n", 94 | "CREATE SECRET hf_token (\n", 95 | " TYPE HUGGINGFACE,\n", 96 | " TOKEN '@HF_TOKEN'\n", 97 | ");\n", 98 | "```" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "```sql\n", 106 | "-- detach_db\n", 107 | "DETACH \"DB\";\n", 108 | "```" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "```sql\n", 116 | "-- load_query\n", 117 | "CREATE OR REPLACE TABLE \"DB\".\"
\" AS\n", 118 | "SELECT *\n", 119 | "FROM 'hf://datasets/datasets-examples/doc-formats-csv-1/data.csv'\n", 120 | "LIMIT 10\n", 121 | "```" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "```sql\n", 129 | "-- load_query2\n", 130 | "CREATE OR REPLACE TABLE \"DB\".\"
\" AS\n", 131 | "SELECT *\n", 132 | "FROM 'hf://datasets/horus-ai-labs/WebInstructSub-150K/data/train-00000-of-00001.parquet'\n", 133 | "```" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "```sql\n", 141 | "-- drop_sql\n", 142 | "DROP TABLE IF EXISTS \"DB\".\"
\";\n", 143 | "```" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "```sql\n", 151 | "-- clean_sql\n", 152 | "DELETE FROM \"DB\".\"
\";\n", 153 | "```" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "```sql\n", 161 | "-- nrows\n", 162 | "SELECT COUNT(*) AS \"nrows\" FROM \"DB\".\"
\"\n", 163 | "```" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "```shell\n", 171 | "bin/etlx --config examples/hf.ipynb\n", 172 | "```" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 3", 179 | "language": "python", 180 | "name": "python3" 181 | }, 182 | "language_info": { 183 | "codemirror_mode": { 184 | "name": "ipython", 185 | "version": 3 186 | }, 187 | "file_extension": ".py", 188 | "mimetype": "text/x-python", 189 | "name": "python", 190 | "nbconvert_exporter": "python", 191 | "pygments_lexer": "ipython3", 192 | "version": "3.11.7" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | -------------------------------------------------------------------------------- /examples/s3.md: -------------------------------------------------------------------------------- 1 | # ETL 2 | 3 | The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/s3api, "httpfs") extension supports reading/writing/globbing files on object storage servers using the S3 API. S3 offers a standard API to read and write to remote files (while regular http servers, predating S3, do not offer a common write API). DuckDB conforms to the S3 API, that is now common among industry storage providers. 4 | The preferred way to configure and authenticate to S3 endpoints is to use secrets. Multiple secret providers are available 5 | 6 | ```yaml metadata 7 | name: S3_EXTRACT 8 | description: "Example extrating from S3 to a local sqlite3 file" 9 | connection: "duckdb:" 10 | active: true 11 | ``` 12 | 13 | ## VERSION 14 | 15 | ```yaml metadata 16 | name: VERSION 17 | description: "DDB Version" 18 | table: VERSION 19 | load_conn: "duckdb:" 20 | load_before_sql: "ATTACH 'database/S3_EXTRACT.db' AS DB (TYPE SQLITE)" 21 | load_sql: 'CREATE OR REPLACE TABLE DB."
" AS SELECT version() AS "VERSION";' 22 | load_after_sql: "DETACH DB;" 23 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB."
"' 24 | active: true 25 | ``` 26 | 27 | ## train_services 28 | 29 | ```yaml metadata 30 | name: train_services 31 | description: "train_services" 32 | table: train_services 33 | load_conn: "duckdb:" 34 | load_before_sql: 35 | - load_extentions 36 | - attach_db 37 | load_sql: load_query 38 | load_after_sql: detach_db 39 | drop_sql: drop_sql 40 | clean_sql: clean_sql 41 | rows_sql: nrows 42 | active: false 43 | ``` 44 | 45 | ```sql 46 | -- load_extentions 47 | INSTALL sqlite; 48 | LOAD sqlite; 49 | INSTALL httpfs; 50 | LOAD httpfs; 51 | ``` 52 | 53 | ```sql 54 | -- attach_db 55 | ATTACH 'database/S3_EXTRACT.db' AS "DB" (TYPE SQLITE) 56 | ``` 57 | 58 | ```sql 59 | -- detach_db 60 | DETACH "DB"; 61 | ``` 62 | 63 | ```sql 64 | -- load_query 65 | CREATE OR REPLACE TABLE "DB"."
" AS 66 | FROM 's3://duckdb-blobs/train_services.parquet'; 67 | ``` 68 | 69 | ```sql 70 | -- drop_sql 71 | DROP TABLE IF EXISTS "DB"."
"; 72 | ``` 73 | 74 | ```sql 75 | -- clean_sql 76 | DELETE FROM "DB"."
"; 77 | ``` 78 | 79 | ```sql 80 | -- nrows 81 | SELECT COUNT(*) AS "nrows" FROM "DB"."
" 82 | ``` 83 | 84 | ## S3_EXTRACT 85 | 86 | ```yaml metadata 87 | name: S3_EXTRACT 88 | description: "Example extrating from S3 to a local sqlite3 file" 89 | table: S3_EXTRACT 90 | load_conn: "duckdb:" 91 | load_before_sql: 92 | - load_extentions 93 | - attach_db 94 | - create_S3_token 95 | load_sql: load_query 96 | load_after_sql: detach_db 97 | drop_sql: drop_sql 98 | clean_sql: clean_sql 99 | rows_sql: nrows 100 | active: false 101 | ``` 102 | 103 | ```sql 104 | -- load_extentions 105 | INSTALL httpfs; 106 | LOAD httpfs; 107 | ``` 108 | 109 | ```sql 110 | -- attach_db 111 | ATTACH 'database/S3_EXTRACT.db' AS "DB" (TYPE SQLITE) 112 | ``` 113 | 114 | Example with a [Minio](https://min.io/) local instance 115 | 116 | ```sql 117 | -- create_S3_token 118 | CREATE SECRET S3_token ( 119 | TYPE S3, 120 | KEY_ID '@S3_KEY_ID', 121 | SECRET '@S3_SECRET', 122 | ENDPOINT '127.0.0.1:3000', 123 | URL_STYLE 'path' 124 | ); 125 | ``` 126 | 127 | ```sql 128 | -- detach_db 129 | DETACH "DB"; 130 | ``` 131 | 132 | ```sql 133 | -- load_query 134 | CREATE OR REPLACE TABLE "DB"."
" AS 135 | SELECT * 136 | FROM 's3://uploads/flights.csv'; 137 | ``` 138 | 139 | ```sql 140 | -- drop_sql 141 | DROP TABLE IF EXISTS "DB"."
"; 142 | ``` 143 | 144 | ```sql 145 | -- clean_sql 146 | DELETE FROM "DB"."
"; 147 | ``` 148 | 149 | ```sql 150 | -- nrows 151 | SELECT COUNT(*) AS "nrows" FROM "DB"."
" 152 | ``` 153 | 154 | # LOGS 155 | 156 | ```yaml metadata 157 | name: LOGS 158 | description: "Example saving logs" 159 | table: etlx_logs 160 | connection: "duckdb:" 161 | before_sql: 162 | - load_extentions 163 | - attach_db 164 | - 'USE DB;' 165 | save_log_sql: load_logs 166 | save_on_err_patt: '(?i)table.+does.+not.+exist|does.+not.+have.+column.+with.+name' 167 | save_on_err_sql: 168 | - create_logs 169 | - get_dyn_queries[create_columns_missing] 170 | - load_logs 171 | after_sql: 172 | - 'USE memory;' 173 | - detach_db 174 | tmp_dir: database 175 | active: true 176 | ``` 177 | 178 | ```sql 179 | -- load_extentions 180 | INSTALL Sqlite; 181 | LOAD Sqlite; 182 | INSTALL json; 183 | LOAD json; 184 | ``` 185 | 186 | ```sql 187 | -- attach_db 188 | ATTACH 'database/S3_EXTRACT.db' AS "DB" (TYPE SQLITE) 189 | ``` 190 | 191 | ```sql 192 | -- detach_db 193 | DETACH "DB"; 194 | ``` 195 | 196 | ```sql 197 | -- load_logs 198 | INSERT INTO "DB"."
" BY NAME 199 | SELECT * 200 | FROM read_json(''); 201 | ``` 202 | 203 | ```sql 204 | -- create_logs 205 | CREATE TABLE IF NOT EXISTS "DB"."
" AS 206 | SELECT * 207 | FROM read_json(''); 208 | ``` 209 | 210 | ```sql 211 | -- create_columns_missing 212 | WITH source_columns AS ( 213 | SELECT column_name, column_type 214 | FROM (DESCRIBE SELECT * FROM read_json('')) 215 | ), 216 | destination_columns AS ( 217 | SELECT column_name, data_type as column_type 218 | FROM duckdb_columns 219 | WHERE table_name = '
' 220 | ), 221 | missing_columns AS ( 222 | SELECT s.column_name, s.column_type 223 | FROM source_columns s 224 | LEFT JOIN destination_columns d ON s.column_name = d.column_name 225 | WHERE d.column_name IS NULL 226 | ) 227 | SELECT 'ALTER TABLE "DB"."
" ADD COLUMN "' || column_name || '" ' || column_type || ';' AS query 228 | FROM missing_columns; 229 | ``` 230 | -------------------------------------------------------------------------------- /examples/actions.md: -------------------------------------------------------------------------------- 1 | # ACTIONS 2 | 3 | ```yaml metadata 4 | name: FileOperations 5 | description: "Transfer and organize generated reports" 6 | path: examples 7 | active: true 8 | ``` 9 | 10 | ## FTP DOWNLOAD 11 | 12 | ```yaml metadata 13 | name: FetchRemoteReport 14 | description: "Download data file from external FTP" 15 | type: ftp_download 16 | params: 17 | host: "ftp.example.com" 18 | port: "21" 19 | user: "myuser" 20 | password: "@FTP_PASSWORD" 21 | source: "/data/daily_report.csv" 22 | target: "downloads/daily_report.csv" 23 | active: true 24 | ``` 25 | 26 | --- 27 | 28 | ## COPY LOCAL FILE 29 | 30 | ```yaml metadata 31 | name: CopyReportToArchive 32 | description: "Move final report to archive folder" 33 | type: copy_file 34 | params: 35 | source: "nyc_taxy_YYYYMMDD.xlsx" 36 | target: "copy_nyc_taxy_YYYYMMDD.xlsx" 37 | active: false 38 | ``` 39 | 40 | --- 41 | 42 | ## Compress to ZIP 43 | 44 | ```yaml metadata 45 | name: CompressReports 46 | description: "Compress report files into a .zip archive" 47 | type: compress 48 | params: 49 | compression: zip 50 | files: 51 | - "nyc_taxy_YYYYMMDD.xlsx" 52 | - "copy_nyc_taxy_YYYYMMDD.xlsx" 53 | output: "nyc_taxy.zip" 54 | active: false 55 | ``` 56 | 57 | --- 58 | 59 | ## Compress to GZ 60 | 61 | ```yaml metadata 62 | name: CompressToGZ 63 | description: "Compress a summary file to .gz" 64 | type: compress 65 | params: 66 | compression: gz 67 | files: 68 | - "nyc_taxy_YYYYMMDD.xlsx" 69 | output: "nyc_taxy_YYYYMMDD.xlsx.gz" 70 | active: false 71 | ``` 72 | 73 | --- 74 | 75 | ## HTTP DOWNLOAD 76 | 77 | ```yaml metadata 78 | name: DownloadFromAPI 79 | description: "Download dataset from HTTP endpoint" 80 | type: http_download 81 | params: 82 | url: "https://api.example.com/data" 83 | target: "data/today.json" 84 | method: GET 85 | headers: 86 | Authorization: "Bearer @API_TOKEN" 87 | Accept: "application/json" 88 | params: 89 | date: "YYYYMMDD" 90 | limit: "1000" 91 | active: false 92 | ``` 93 | 94 | --- 95 | 96 | ## HTTP UPLOAD 97 | 98 | ```yaml metadata 99 | name: PushReportToWebhook 100 | description: "Upload final report to an HTTP endpoint" 101 | type: http_upload 102 | params: 103 | url: "https://webhook.example.com/upload" 104 | method: POST 105 | source: "reports/final.csv" 106 | headers: 107 | Authorization: "Bearer @WEBHOOK_TOKEN" 108 | Content-Type: "multipart/form-data" 109 | params: 110 | type: "summary" 111 | date: "YYYYMMDD" 112 | active: false 113 | ``` 114 | 115 | --- 116 | 117 | ## FTP DOWNLOAD 118 | 119 | ```yaml metadata 120 | name: FetchRemoteReport 121 | description: "Download data file from external FTP" 122 | type: ftp_download 123 | params: 124 | host: "ftp.example.com" 125 | username: "myuser" 126 | password: "@FTP_PASSWORD" 127 | source: "/data/daily_report.csv" 128 | target: "downloads/daily_report.csv" 129 | active: false 130 | ``` 131 | 132 | ## SFTP DOWNLOAD 133 | 134 | ```yaml metadata 135 | name: FetchRemoteReport 136 | description: "Download data file from external SFTP" 137 | type: sftp_download 138 | params: 139 | host: "sftp.example.com" 140 | username: "myuser" 141 | password: "@FTP_PASSWORD" 142 | source: "/data/daily_report.csv" 143 | target: "downloads/daily_report.csv" 144 | active: false 145 | ``` 146 | 147 | --- 148 | 149 | ## S3 UPLOAD 150 | 151 | ```yaml metadata 152 | name: ArchiveToS3 153 | description: "Send latest results to S3 bucket" 154 | type: s3_upload 155 | params: 156 | AWS_ACCESS_KEY_ID: '@AWS_ACCESS_KEY_ID' 157 | AWS_SECRET_ACCESS_KEY: '@AWS_SECRET_ACCESS_KEY' 158 | AWS_REGION: '@AWS_REGION' 159 | AWS_ENDPOINT: 127.0.0.1:3000 160 | S3_FORCE_PATH_STYLE: true 161 | S3_DISABLE_SSL: false 162 | S3_SKIP_SSL_VERIFY: true 163 | bucket: "my-etlx-bucket" 164 | key: "exports/summary_YYYYMMDD.xlsx" 165 | source: "reports/summary.xlsx" 166 | active: false 167 | ``` 168 | 169 | ## S3 DOWNLOAD 170 | 171 | ```yaml metadata 172 | name: DownalodFromS3 173 | description: "Download file S3 from bucket" 174 | type: s3_download 175 | params: 176 | AWS_ACCESS_KEY_ID: '@AWS_ACCESS_KEY_ID' 177 | AWS_SECRET_ACCESS_KEY: '@AWS_SECRET_ACCESS_KEY' 178 | AWS_REGION: '@AWS_REGION' 179 | AWS_ENDPOINT: 127.0.0.1:3000 180 | S3_FORCE_PATH_STYLE: true 181 | S3_DISABLE_SSL: false 182 | S3_SKIP_SSL_VERIFY: true 183 | bucket: "my-etlx-bucket" 184 | key: "exports/summary_YYYYMMDD.xlsx" 185 | target: "reports/summary.xlsx" 186 | active: false 187 | ``` 188 | 189 | ## DB 2 DB EX 190 | 191 | ```yaml metadata 192 | name: WRITE_RESULTS_MSSQL 193 | description: "MSSQL, as of this moment DDB does not have the same suport to MSSQL like it has for SQLite, PG or MySQL so this could be a way to pu results in db like MSSQL or nay other DB suported by sqlx" 194 | type: db_2_db 195 | params: 196 | source: 197 | conn: sqlite3:database/HTTP_EXTRACT.db 198 | before: null 199 | chunk_size: 3 200 | timeout: 30 201 | sql: origin_query 202 | after: null 203 | target: 204 | conn: mssql:sqlserver://sa:@MSSQL_PASSWORD@localhost?database=master 205 | timeout: 30 206 | before: 207 | - create_schema 208 | sql: mssql_sql 209 | after: null 210 | active: true 211 | ``` 212 | 213 | ```sql 214 | -- origin_query 215 | SELECT "description", "duration", STRFTIME('%Y-%m-%d %H:%M:%S', "start_at") AS "start_at", "ref" 216 | FROM "etlx_logs" 217 | ORDER BY "start_at" DESC 218 | ``` 219 | 220 | ```sql 221 | -- create_schema 222 | IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'etlx_logs' AND type = 'U') 223 | CREATE TABLE [dbo].[etlx_logs] ( 224 | [description] NVARCHAR(MAX) NULL, 225 | [duration] BIGINT NULL, 226 | [start_at] DATETIME NULL, 227 | [ref] DATE NULL 228 | ); 229 | ``` 230 | 231 | ```sql 232 | -- mssql_sql 233 | INSERT INTO [dbo].[etlx_logs] ([:columns]) VALUES 234 | ``` 235 | 236 | # ETL 237 | 238 | ```yaml metadata 239 | name: MSSQL_EXTRACT 240 | description: "Example extrating from mssql sqlite3 file" 241 | connection: "duckdb:" 242 | database: MSSQL_EXTRACT.db 243 | active: false 244 | ``` 245 | 246 | ## MSSQL_EXTRACT 247 | 248 | ```yaml metadata 249 | name: MSSQL_EXTRACT 250 | description: "Example extrating from mssql sqlite3 file" 251 | table: logs 252 | to_csv: true 253 | extract_conn: mssql:sqlserver://sa:@MSSQL_PASSWORD@localhost?database=master 254 | extract_sql: SELECT * FROM [dbo].[etlx_logs] 255 | load_conn: "duckdb:" 256 | load_before_sql: 257 | - load_extentions 258 | - attach_db 259 | load_sql: load_query 260 | load_after_sql: detach_db 261 | active: true 262 | ``` 263 | 264 | ```sql 265 | -- load_extentions 266 | INSTALL sqlite; 267 | LOAD sqlite; 268 | ``` 269 | 270 | ```sql 271 | -- attach_db 272 | ATTACH 'database/MSSQL_EXTRACT.db' AS "DB" (TYPE SQLITE) 273 | ``` 274 | 275 | ```sql 276 | -- detach_db 277 | DETACH "DB"; 278 | ``` 279 | 280 | ```sql 281 | -- load_query 282 | CREATE OR REPLACE TABLE "DB"."
" AS 283 | SELECT * 284 | FROM ''; 285 | ``` 286 | -------------------------------------------------------------------------------- /examples/pg.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # ETL 5 | 6 | 7 | 8 | ```yaml metadata 9 | name: HTTP_EXTRACT 10 | description: "Example extrating from web to a local postgres file" 11 | connection: "postgres:user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable" 12 | database: "postgres:user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable" 13 | active: true 14 | ``` 15 | 16 | ## VERSION 17 | 18 | ```yaml metadata 19 | name: VERSION 20 | description: "DDB Version" 21 | table: VERSION 22 | load_conn: "duckdb:" 23 | load_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)" 24 | load_sql: 'CREATE OR REPLACE TABLE DB."
" AS SELECT version() AS "VERSION";' 25 | load_after_sql: "DETACH DB;" 26 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB."
"' 27 | active: true 28 | ``` 29 | 30 | ## NYC_TAXI 31 | 32 | ```yaml metadata 33 | name: NYC_TAXI 34 | description: "Example extrating from web to a local postgres file" 35 | table: NYC_TAXI 36 | load_conn: "duckdb:" 37 | load_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)" 38 | load_sql: load_query 39 | load_after_sql: DETACH "DB" 40 | drop_sql: DROP TABLE IF EXISTS "DB"."
" 41 | clean_sql: DELETE FROM "DB"."
" 42 | rows_sql: SELECT COUNT(*) AS "nrows" FROM "DB"."
" 43 | active: false 44 | ``` 45 | 46 | ```sql 47 | -- load_query 48 | CREATE OR REPLACE TABLE "DB"."
" AS 49 | SELECT * 50 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-01.parquet'; 51 | ``` 52 | 53 | ## PeadkHours 54 | 55 | ```yaml metadata 56 | name: PeadkHours 57 | description: Peask Hours Analysis 58 | table: PeadkHours 59 | transform_conn: "duckdb:" 60 | transform_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)" 61 | transform_sql: preadk_hours_load_query 62 | transform_after_sql: DETACH "DB" 63 | drop_sql: DROP TABLE IF EXISTS "DB"."
" 64 | clean_sql: DELETE FROM "DB"."
" 65 | rows_sql: SELECT COUNT(*) AS "nrows" FROM "DB"."
" 66 | active: true 67 | ``` 68 | 69 | ```sql 70 | -- preadk_hours_load_query 71 | CREATE OR REPLACE TABLE "DB"."
" AS 72 | [[PeakHoursAnalysis]] 73 | ``` 74 | 75 | ## DailyRevenueTripVolume 76 | 77 | ```yaml metadata 78 | name: DailyRevenueTripVolume 79 | description: Daily Revenue and Trip Volume 80 | has_placeholders: true 81 | schema: TRF 82 | database: "postgres:user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable search_path=TRF" 83 | table: DailyRevenueTripVolume 84 | transform_conn: "duckdb:" 85 | transform_before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)" 86 | transform_sql: 87 | - CREATE SCHEMA IF NOT EXISTS "DB"."" 88 | - DailyRevenueTripVolume 89 | transform_after_sql: DETACH "DB" 90 | drop_sql: DROP TABLE IF EXISTS "DB".""."
" 91 | clean_sql: DELETE FROM "DB".""."
" 92 | rows_sql: SELECT COUNT(*) AS "nrows" FROM "DB".""."
" 93 | active: true 94 | ``` 95 | 96 | ```sql 97 | -- DailyRevenueTripVolume 98 | CREATE OR REPLACE TABLE "DB"."TRF"."
" AS 99 | SELECT CAST(tpep_pickup_datetime AS DATE) AS trip_date, 100 | COUNT(*) AS total_trips, 101 | ROUND(SUM(total_amount), 2) AS total_revenue, 102 | ROUND(AVG(total_amount), 2) AS avg_revenue_per_trip, 103 | ROUND(SUM(trip_distance), 2) AS total_miles, 104 | ROUND(AVG(trip_distance), 2) AS avg_trip_distance 105 | FROM DB.NYC_TAXI 106 | GROUP BY trip_date 107 | ORDER BY trip_date 108 | ``` 109 | 110 | # EXPORTS 111 | 112 | Exports data to files. 113 | 114 | ```yaml metadata 115 | name: EXPORTS 116 | description: Exports Examples 117 | connection: "duckdb:" 118 | path: "static/uploads/tmp" 119 | active: true 120 | ``` 121 | 122 | ## ExportDailyRevenueTripVolume 123 | 124 | ```yaml metadata 125 | name: ExportDailyRevenueTripVolume 126 | description: "Export data to CSV" 127 | connection: "duckdb:" 128 | before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)" 129 | export_sql: export 130 | after_sql: "DETACH DB" 131 | path: 'ExportDailyRevenueTripVolume_{YYYYMMDD}.{TSTAMP}.parquet' 132 | tmp_prefix: 'tmp' 133 | active: true 134 | ``` 135 | 136 | ```sql 137 | -- export 138 | COPY ( 139 | SELECT CAST(tpep_pickup_datetime AS DATE) AS trip_date, 140 | COUNT(*) AS total_trips, 141 | ROUND(SUM(total_amount), 2) AS total_revenue, 142 | ROUND(AVG(total_amount), 2) AS avg_revenue_per_trip, 143 | ROUND(SUM(trip_distance), 2) AS total_miles, 144 | ROUND(AVG(trip_distance), 2) AS avg_trip_distance 145 | FROM DB.NYC_TAXI 146 | GROUP BY trip_date 147 | ORDER BY trip_date 148 | ) TO '' 149 | ``` 150 | 151 | ## hist_logs 152 | 153 | ```yaml metadata 154 | name: hist_logs 155 | description: "Export data to CSV" 156 | connection: "duckdb:" 157 | before_sql: "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)" 158 | export_sql: export 159 | after_sql: "DETACH DB" 160 | path: 'hist_logs_{YYYYMMDD}.{TSTAMP}.parquet' 161 | tmp_prefix: 'tmp' 162 | active: true 163 | ``` 164 | 165 | ```sql 166 | -- export 167 | COPY ( 168 | SELECT * 169 | FROM DB.etlx_logs 170 | ) TO '' 171 | ``` 172 | 173 | # LOGS 174 | 175 | ```yaml metadata 176 | name: LOGS 177 | description: "Example saving logs" 178 | table: etlx_logs 179 | connection: "duckdb:" 180 | before_sql: 181 | - "ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES)" 182 | - 'USE DB;' 183 | - LOAD json 184 | - "get_dyn_queries[create_columns_missing](ATTACH 'user=postgres password=1234 dbname=ETLX_DATA host=localhost port=5432 sslmode=disable' AS DB (TYPE POSTGRES), DETACH DB)" 185 | save_log_sql: load_logs 186 | save_on_err_patt: '(?i)table.+does.+not.+exist' 187 | save_on_err_sql: create_logs 188 | after_sql: 189 | - 'USE memory;' 190 | - DETACH "DB" 191 | active: true 192 | ``` 193 | 194 | ```sql 195 | -- load_logs 196 | INSERT INTO "DB"."
" BY NAME 197 | SELECT * 198 | FROM read_json(''); 199 | ``` 200 | 201 | ```sql 202 | -- create_logs 203 | CREATE TABLE IF NOT EXISTS "DB"."
" AS 204 | SELECT * 205 | FROM read_json(''); 206 | ``` 207 | 208 | ```sql 209 | -- create_columns_missing 210 | WITH source_columns AS ( 211 | SELECT column_name, column_type 212 | FROM (DESCRIBE SELECT * FROM read_json('')) 213 | ), 214 | destination_columns AS ( 215 | SELECT column_name, data_type as column_type 216 | FROM duckdb_columns 217 | WHERE table_name = '
' 218 | ), 219 | missing_columns AS ( 220 | SELECT s.column_name, s.column_type 221 | FROM source_columns s 222 | LEFT JOIN destination_columns d ON s.column_name = d.column_name 223 | WHERE d.column_name IS NULL 224 | ) 225 | SELECT 'ALTER TABLE "DB"."
" ADD COLUMN "' || column_name || '" ' || column_type || ';' AS query 226 | FROM missing_columns 227 | WHERE (SELECT COUNT(*) FROM destination_columns) > 0; 228 | ``` 229 | 230 | # REQUIRES 231 | 232 | ```yaml metadata 233 | name: REQUIRES 234 | description: "Example requires" 235 | active: true 236 | ``` 237 | 238 | ## PeakHoursAnalysis 239 | 240 | ```yaml metadata 241 | name: PeakHoursAnalysis 242 | description: "Analyze peak hours for NYC Yellow Taxi rides" 243 | path: examples/PeakHoursAnalysis.sql 244 | ``` 245 | -------------------------------------------------------------------------------- /internal/etlx/ducklake.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | ) 7 | 8 | // DuckLakeParseResult represents the result of parsing a DuckLake string 9 | type DuckLakeParseResult struct { 10 | IsDuckLake bool `json:"is_ducklake"` 11 | HasAttach bool `json:"has_attach"` 12 | DSN string `json:"dsn"` 13 | DuckLakeName string `json:"ducklake_name"` 14 | DataPath string `json:"data_path"` 15 | } 16 | 17 | // DuckLakeOccurrence represents a single DuckLake occurrence found in text 18 | type DuckLakeOccurrence struct { 19 | DuckLakeString string `json:"ducklake_string"` 20 | HasAttach bool `json:"has_attach"` 21 | DSN string `json:"dsn"` 22 | DuckLakeName string `json:"ducklake_name"` 23 | DataPath string `json:"data_path"` 24 | } 25 | 26 | // DuckLakeParser handles parsing of DuckLake format strings 27 | type DuckLakeParser struct { 28 | mainPattern *regexp.Regexp 29 | dataPathPattern *regexp.Regexp 30 | scanPattern *regexp.Regexp 31 | } 32 | 33 | // NewDuckLakeParser creates a new DuckLakeParser instance 34 | func NewDuckLakeParser() *DuckLakeParser { 35 | // Main regex pattern to match ducklake format 36 | // This pattern handles: 37 | // 1. Optional ATTACH keyword at the beginning 38 | // 2. Required 'ducklake:' prefix 39 | // 3. DSN (data source name) - can contain various characters 40 | // 4. Optional AS clause with ducklake_name 41 | // 5. Optional parameters like DATA_PATH 42 | mainPattern := regexp.MustCompile(`(?i)^\s*(ATTACH\s+)?['"]?ducklake:([^'"\)\s]+)['"]?(?:\s+AS\s+([a-zA-Z_][a-zA-Z0-9_]*))?(?:\s*\(([^)]*)\))?\s*;?\s*$`) 43 | 44 | // Pattern to extract DATA_PATH from parameters 45 | dataPathPattern := regexp.MustCompile(`(?i)DATA_PATH\s+['"]([^'"]+)['"]`) 46 | 47 | // Pattern for finding potential ducklake occurrences in logs 48 | // This is more flexible and can find partial matches 49 | scanPattern := regexp.MustCompile(`(?i)(?:(?:^|\s)(ATTACH)\s+)?['"]?(ducklake:[^'"\)\s;]+)['"]?(?:\s+AS\s+([a-zA-Z_][a-zA-Z0-9_]*))?(?:\s*\([^)]*\))?\s*;?`) 50 | 51 | return &DuckLakeParser{ 52 | mainPattern: mainPattern, 53 | dataPathPattern: dataPathPattern, 54 | scanPattern: scanPattern, 55 | } 56 | } 57 | 58 | // Parse parses a string to check if it's in ducklake format and extract components 59 | func (p *DuckLakeParser) Parse(input string) DuckLakeParseResult { 60 | result := DuckLakeParseResult{ 61 | IsDuckLake: false, 62 | HasAttach: false, 63 | DSN: "", 64 | DuckLakeName: "", 65 | DataPath: "", 66 | } 67 | 68 | if input == "" { 69 | return result 70 | } 71 | 72 | matches := p.mainPattern.FindStringSubmatch(strings.TrimSpace(input)) 73 | 74 | if len(matches) > 0 { 75 | result.IsDuckLake = true 76 | 77 | // Check if ATTACH keyword is present (group 1) 78 | if matches[1] != "" { 79 | result.HasAttach = true 80 | } 81 | 82 | // Extract DSN (group 2) 83 | if len(matches) > 2 && matches[2] != "" { 84 | result.DSN = matches[2] 85 | } 86 | 87 | // Extract ducklake name (group 3) 88 | if len(matches) > 3 && matches[3] != "" { 89 | result.DuckLakeName = matches[3] 90 | } 91 | 92 | // Extract DATA_PATH from parameters (group 4) 93 | if len(matches) > 4 && matches[4] != "" { 94 | dataPathMatches := p.dataPathPattern.FindStringSubmatch(matches[4]) 95 | if len(dataPathMatches) > 1 { 96 | result.DataPath = dataPathMatches[1] 97 | } 98 | } 99 | } 100 | 101 | return result 102 | } 103 | 104 | // FindDuckLakeOccurrences finds all DuckLake occurrences in a multi-line string 105 | func (p *DuckLakeParser) FindDuckLakeOccurrences(text string) []DuckLakeOccurrence { 106 | if text == "" { 107 | return []DuckLakeOccurrence{} 108 | } 109 | 110 | var occurrences []DuckLakeOccurrence 111 | matches := p.scanPattern.FindAllStringSubmatch(text, -1) 112 | 113 | for _, match := range matches { 114 | if len(match) > 2 { 115 | // Extract components 116 | hasAttach := match[1] != "" 117 | fullDSN := match[2] 118 | duckLakeName := "" 119 | if len(match) > 3 { 120 | duckLakeName = match[3] 121 | } 122 | parameters := "" 123 | if len(match) > 4 { 124 | parameters = match[4] 125 | } 126 | 127 | // Extract DSN (remove ducklake: prefix) 128 | dsn := fullDSN 129 | if strings.HasPrefix(fullDSN, "ducklake:") { 130 | dsn = fullDSN[9:] 131 | } 132 | 133 | // Extract DATA_PATH if present 134 | dataPath := "" 135 | if parameters != "" { 136 | dataPathMatch := p.dataPathPattern.FindStringSubmatch(parameters) 137 | if len(dataPathMatch) > 1 { 138 | dataPath = dataPathMatch[1] 139 | } 140 | } 141 | 142 | // Create occurrence record 143 | occurrence := DuckLakeOccurrence{ 144 | DuckLakeString: strings.TrimSpace(match[0]), 145 | HasAttach: hasAttach, 146 | DSN: dsn, 147 | DuckLakeName: duckLakeName, 148 | DataPath: dataPath, 149 | } 150 | 151 | occurrences = append(occurrences, occurrence) 152 | } 153 | } 154 | 155 | return occurrences 156 | } 157 | 158 | // FindDuckLakeStrings finds all DuckLake strings in a multi-line text (simple version) 159 | func (p *DuckLakeParser) FindDuckLakeStrings(text string) []string { 160 | occurrences := p.FindDuckLakeOccurrences(text) 161 | var strings []string 162 | 163 | for _, occ := range occurrences { 164 | strings = append(strings, occ.DuckLakeString) 165 | } 166 | 167 | return strings 168 | } 169 | 170 | // FindDuckLakeDSNs finds all unique DSNs in a multi-line text 171 | func (p *DuckLakeParser) FindDuckLakeDSNs(text string) []string { 172 | occurrences := p.FindDuckLakeOccurrences(text) 173 | dsnMap := make(map[string]bool) 174 | var dsns []string 175 | 176 | for _, occ := range occurrences { 177 | if occ.DSN != "" && !dsnMap[occ.DSN] { 178 | dsnMap[occ.DSN] = true 179 | dsns = append(dsns, occ.DSN) 180 | } 181 | } 182 | 183 | return dsns 184 | } 185 | 186 | // IsDuckLakeFormat quickly checks if a string is in ducklake format 187 | func (p *DuckLakeParser) IsDuckLakeFormat(input string) bool { 188 | return p.Parse(input).IsDuckLake 189 | } 190 | 191 | // ExtractDSN extracts just the DSN from a ducklake format string 192 | func (p *DuckLakeParser) ExtractDSN(input string) string { 193 | return p.Parse(input).DSN 194 | } 195 | 196 | // ExtractDuckLakeName extracts just the ducklake name from a ducklake format string 197 | func (p *DuckLakeParser) ExtractDuckLakeName(input string) string { 198 | return p.Parse(input).DuckLakeName 199 | } 200 | 201 | // ExtractDataPath extracts just the DATA_PATH value from a ducklake format string 202 | func (p *DuckLakeParser) ExtractDataPath(input string) string { 203 | return p.Parse(input).DataPath 204 | } 205 | 206 | // HasAttachKeyword checks if the string contains the ATTACH keyword 207 | func (p *DuckLakeParser) HasAttachKeyword(input string) bool { 208 | return p.Parse(input).HasAttach 209 | } 210 | 211 | // ParseDuckLakeString is a convenience function to parse a ducklake string 212 | func ParseDuckLakeString(input string) DuckLakeParseResult { 213 | parser := NewDuckLakeParser() 214 | return parser.Parse(input) 215 | } 216 | 217 | // FindDuckLakeOccurrences is a convenience function to find all DuckLake occurrences in a multi-line string 218 | func FindDuckLakeOccurrences(text string) []DuckLakeOccurrence { 219 | parser := NewDuckLakeParser() 220 | return parser.FindDuckLakeOccurrences(text) 221 | } 222 | 223 | // FindDuckLakeStrings is a convenience function to find all DuckLake strings in a multi-line text 224 | func FindDuckLakeStrings(text string) []string { 225 | parser := NewDuckLakeParser() 226 | return parser.FindDuckLakeStrings(text) 227 | } 228 | 229 | // FindDuckLakeDSNs is a convenience function to find all unique DSNs in a multi-line text 230 | func FindDuckLakeDSNs(text string) []string { 231 | parser := NewDuckLakeParser() 232 | return parser.FindDuckLakeDSNs(text) 233 | } 234 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Build and Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | build-linux: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Install Dependencies 13 | run: | 14 | sudo apt-get update 15 | sudo apt-get install -y build-essential gcc g++ unixodbc unixodbc-dev 16 | - name: Checkout Code 17 | uses: actions/checkout@v3 18 | - name: Setup Go 19 | uses: actions/setup-go@v4 20 | with: 21 | go-version: '1.25' 22 | - name: Build Linux Binaries 23 | run: | 24 | mkdir -p dist 25 | CGO_ENABLED=1 go build -o dist/etlx-linux-amd64 ./cmd/main.go 26 | - name: Upload Artifacts 27 | uses: actions/upload-artifact@v4 28 | with: 29 | name: linux-binary 30 | path: dist/*linux* 31 | 32 | build-windows-linking: 33 | runs-on: windows-latest 34 | steps: 35 | # Step 1: Checkout the code 36 | - name: Checkout Code 37 | uses: actions/checkout@v3 38 | 39 | # Step 2: Set up Go environment 40 | - name: Setup Go 41 | uses: actions/setup-go@v4 42 | with: 43 | go-version: '1.25' 44 | 45 | - name: Setup MSBuild (for Visual Studio environment) 46 | uses: microsoft/setup-msbuild@v2 47 | 48 | # Step 3: Download DuckDB Precompiled Library 49 | - name: Download DuckDB Library 50 | run: | 51 | $version = "v1.4.3" 52 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-amd64.zip" 53 | $destinationPath = "$(Get-Location)\duckdb" 54 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip" 55 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath 56 | Write-Host "DuckDB library extracted to $destinationPath" 57 | $duckdb_url = "https://github.com/duckdb/duckdb/releases/download/$version/libduckdb-windows-arm64.zip" 58 | $destinationPath = "$(Get-Location)\duckdbarm64" 59 | Invoke-WebRequest -Uri $duckdb_url -OutFile "duckdb.zip" 60 | Expand-Archive -Path "duckdb.zip" -DestinationPath $destinationPath 61 | Write-Host "DuckDB library extracted to $destinationPath" 62 | 63 | # Step 5: Set Environment Variables 64 | - name: Set Environment Variables 65 | run: | 66 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV 67 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdb\" >> $env:GITHUB_ENV 68 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdb\ -lduckdb" >> $env:GITHUB_ENV 69 | 70 | # Step 6: Verify DuckDB Library 71 | - name: Verify DuckDB Library 72 | run: | 73 | $libPath = "$(Get-Location)\duckdb\" 74 | if (!(Test-Path "$libPath\duckdb.lib")) { 75 | Write-Error "duckdb.lib not found in $libPath" 76 | } 77 | Write-Host "duckdb.lib found in $libPath" 78 | 79 | # Step 7: Build the Application 80 | - name: Build Windows Binary 81 | run: | 82 | mkdir dist 83 | go build -o dist/etlx-windows-linking-amd64.exe ./cmd/main.go 84 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV 85 | echo "CGO_CFLAGS=-I$(Get-Location)\duckdbarm64\" >> $env:GITHUB_ENV 86 | echo "CGO_LDFLAGS=-L$(Get-Location)\duckdbarm64\ -lduckdb" >> $env:GITHUB_ENV 87 | echo "GOARCH=arm64" >> $env:GITHUB_ENV 88 | go build -o dist/etlx-windows-linking-arm64.exe ./cmd/main.go 89 | #go build -tags=duckdb_use_lib -o dist/etlx-windows-amd64.exe ./cmd/main.go 90 | 91 | - name: Upload Artifacts 92 | uses: actions/upload-artifact@v4 93 | with: 94 | name: windows-binary-linking 95 | path: dist/*windows* 96 | build-windows: 97 | runs-on: windows-latest 98 | steps: 99 | - name: Checkout Code 100 | uses: actions/checkout@v3 101 | 102 | - name: Setup Go 103 | uses: actions/setup-go@v4 104 | with: 105 | go-version: '1.25' 106 | 107 | - name: Setup MSBuild (for Visual Studio environment) 108 | uses: microsoft/setup-msbuild@v2 109 | 110 | - name: Set Environment Variables for CGO 111 | run: | 112 | echo "CGO_ENABLED=1" >> $env:GITHUB_ENV 113 | # echo "CC=cl.exe" >> $env:GITHUB_ENV 114 | 115 | - name: Build with MSVC (Visual Studio compiler) 116 | run: | 117 | mkdir dist 118 | go build -o dist/etlx-windows-amd64.exe ./cmd/main.go 119 | echo "GOARCH=arm64" >> $env:GITHUB_ENV 120 | go build -o dist/etlx-windows-arm64.exe ./cmd/main.go 121 | 122 | - name: Upload MSVC Artifacts 123 | uses: actions/upload-artifact@v4 124 | with: 125 | name: windows-msvc-binary 126 | path: dist/*windows* 127 | build-darwin: 128 | runs-on: macos-latest 129 | steps: 130 | # Step 1: Checkout the code 131 | - name: Checkout Code 132 | uses: actions/checkout@v3 133 | 134 | # Step 2: Setup Go 135 | - name: Setup Go 136 | uses: actions/setup-go@v4 137 | with: 138 | go-version: '1.25' 139 | 140 | # Step 3: Install UnixODBC 141 | - name: Install UnixODBC 142 | run: | 143 | brew install unixodbc 144 | brew --prefix unixodbc 145 | 146 | # Step 4: Set Environment Variables 147 | - name: Set Environment Variables 148 | run: | 149 | ODBC_PREFIX=$(brew --prefix unixodbc) 150 | echo "CGO_ENABLED=1" >> $GITHUB_ENV 151 | echo "CGO_CFLAGS=-I$ODBC_PREFIX/include" >> $GITHUB_ENV 152 | echo "CGO_LDFLAGS=-L$ODBC_PREFIX/lib -lodbc" >> $GITHUB_ENV 153 | 154 | # Step 5: Build the Application 155 | - name: Build MacOS Binary 156 | run: | 157 | mkdir dist 158 | go build -o dist/etlx-macos-amd64 ./cmd/main.go 159 | GOARCH=arm64 go build -o dist/etlx-macos-arm64 ./cmd/main.go 160 | 161 | - name: Upload Artifacts 162 | uses: actions/upload-artifact@v4 163 | with: 164 | name: macos-binary 165 | path: dist/*macos* 166 | 167 | release: 168 | permissions: write-all 169 | runs-on: ubuntu-latest 170 | needs: [build-linux, build-windows, build-windows-linking, build-darwin] 171 | steps: 172 | - name: Download Windows Binary with MSVC 173 | uses: actions/download-artifact@v4 174 | with: 175 | name: windows-msvc-binary 176 | path: dist 177 | - name: Download MacOS Binary 178 | uses: actions/download-artifact@v4 179 | with: 180 | name: macos-binary 181 | path: dist 182 | - name: Download Linux Binary 183 | uses: actions/download-artifact@v4 184 | with: 185 | name: linux-binary 186 | path: dist 187 | - name: Download Windows Binary With Linking 188 | uses: actions/download-artifact@v4 189 | with: 190 | name: windows-binary-linking 191 | path: dist 192 | - name: Changelog 193 | uses: scottbrenner/generate-changelog-action@master 194 | id: Changelog 195 | env: 196 | REPO: ${{ github.repository }} 197 | - name: Zip Binaries 198 | run: | 199 | zip -j dist/etlx-linux-amd64.zip dist/etlx-linux-amd64 200 | zip -j dist/etlx-macos-amd64.zip dist/etlx-macos-amd64 201 | zip -j dist/etlx-windows-amd64.zip dist/etlx-windows-amd64.exe 202 | zip -j dist/etlx-windows-linking-amd64.zip dist/etlx-windows-linking-amd64.exe 203 | zip -j dist/etlx-macos-arm64.zip dist/etlx-macos-arm64 204 | zip -j dist/etlx-windows-arm64.zip dist/etlx-windows-arm64.exe 205 | zip -j dist/etlx-windows-linking-arm64.zip dist/etlx-windows-linking-arm64.exe 206 | #zip -j dist/etlx-linux-arm64.zip dist/etlx-linux-arm64 207 | - name: Create Release 208 | uses: softprops/action-gh-release@v1 209 | with: 210 | tag_name: ${{ github.ref_name }} 211 | draft: false 212 | prerelease: false 213 | files: | 214 | dist/etlx-linux-amd64.zip 215 | dist/etlx-macos-amd64.zip 216 | dist/etlx-windows-amd64.zip 217 | dist/etlx-windows-arm64.zip 218 | dist/etlx-windows-linking-amd64.zip 219 | dist/etlx-macos-arm64.zip 220 | dist/etlx-windows-linking-arm64.zip 221 | # dist/etlx-linux-arm64.zip 222 | env: 223 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 224 | -------------------------------------------------------------------------------- /internal/etlx/action_db2db.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "os" 8 | "regexp" 9 | "strconv" 10 | "strings" 11 | "time" 12 | 13 | "github.com/realdatadriven/etlx/internal/db" 14 | ) 15 | 16 | func ScanRowToMap(rows *sql.Rows) (map[string]interface{}, error) { 17 | columns, err := rows.Columns() 18 | if err != nil { 19 | return nil, fmt.Errorf("failed to get columns: %w", err) 20 | } 21 | values := make([]interface{}, len(columns)) 22 | valuePointers := make([]interface{}, len(columns)) 23 | for i := range values { 24 | valuePointers[i] = &values[i] 25 | } 26 | if err := rows.Scan(valuePointers...); err != nil { 27 | return nil, fmt.Errorf("failed to scan row: %w", err) 28 | } 29 | rowMap := make(map[string]interface{}) 30 | for i, colName := range columns { 31 | rowMap[colName] = values[i] 32 | } 33 | return rowMap, nil 34 | } 35 | 36 | func (etlx *ETLX) DB2DB(params map[string]any, item map[string]any, dateRef []time.Time) error { 37 | // Extract and validate required params 38 | source, _ := params["source"].(map[string]any) 39 | target, _ := params["target"].(map[string]any) 40 | source_conn, ok := source["conn"].(string) 41 | if !ok { 42 | return fmt.Errorf("no source conn string detected %s", source_conn) 43 | } 44 | target_conn, ok := target["conn"].(string) 45 | if !ok { 46 | return fmt.Errorf("no target conn string detected %s", target_conn) 47 | } 48 | source_sql, ok := source["sql"].(string) 49 | if !ok { 50 | return fmt.Errorf("no source conn string detected %s", source_sql) 51 | } 52 | target_sql, ok := target["sql"].(string) 53 | if !ok { 54 | return fmt.Errorf("no target conn string detected %s", target_sql) 55 | } 56 | dbSourceConn, err := etlx.GetDB(source_conn) 57 | if err != nil { 58 | return fmt.Errorf("error connecting to source: %v %s", err, source_conn) 59 | } 60 | defer dbSourceConn.Close() 61 | dbTargetConn, err := etlx.GetDB(target_conn) 62 | if err != nil { 63 | return fmt.Errorf("error connecting to target: %v %s", err, target_conn) 64 | } 65 | defer dbTargetConn.Close() 66 | // BEGIN / STARTING QUERIES 67 | before_source, ok := source["before"] 68 | if ok { 69 | err = etlx.ExecuteQuery(dbSourceConn, before_source, item, "", "", dateRef) 70 | if err != nil { 71 | return fmt.Errorf("error executing source preparation queries: %s", err) 72 | } 73 | } 74 | //fmt.Println(target_sql, item) 75 | sql_target := target_sql 76 | if _, ok := item[target_sql]; ok { 77 | sql_target = item[target_sql].(string) 78 | } 79 | sql := source_sql 80 | if _, ok := item[source_sql]; ok { 81 | sql = item[sql].(string) 82 | } 83 | chunk_size := 1_000 84 | if _, ok := source["chunk_size"]; ok { 85 | j, err := strconv.Atoi(fmt.Sprintf("%v", source["chunk_size"])) 86 | if err == nil { 87 | chunk_size = j 88 | } 89 | } 90 | //fmt.Printf("%T->%d", chunk_size, chunk_size) 91 | timeout := 1200 92 | if _, ok := source["timeout"]; ok { 93 | j, err := strconv.Atoi(fmt.Sprintf("%v", source["timeout"])) 94 | if err == nil { 95 | timeout = j 96 | } 97 | } 98 | sql = etlx.SetQueryPlaceholders(sql, "", "", dateRef) 99 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second) 100 | defer cancel() 101 | rows, err := dbSourceConn.QueryRows(ctx, sql, []any{}...) 102 | if err != nil { 103 | return fmt.Errorf("failed to execute source query %s", err) 104 | } 105 | defer rows.Close() 106 | /*columns, err := rows.Columns() 107 | if err != nil { 108 | fmt.Printf("failed to get columns: %s", err) 109 | }*/ 110 | // BEGIN / STARTING QUERIES 111 | before_target, ok := target["before"] 112 | if ok { 113 | err = etlx.ExecuteQuery(dbTargetConn, before_target, item, "", "", dateRef) 114 | if err != nil { 115 | return fmt.Errorf("error executing target preparation queries: %s", err) 116 | } 117 | } 118 | i := 0 119 | var result []map[string]any 120 | for rows.Next() { 121 | i += 1 122 | row, err := ScanRowToMap(rows) 123 | if err != nil { 124 | return fmt.Errorf("failed to scan row to map: %w", err) 125 | } 126 | result = append(result, row) 127 | // send to target 128 | if i >= chunk_size { 129 | i = 0 130 | _, err = etlx.UpdateTarget(dbTargetConn, sql_target, result) 131 | if err != nil { 132 | // fmt.Printf("failed update the target: %s", err) 133 | return fmt.Errorf("main target query faild: %w", err) 134 | } 135 | result = []map[string]any{} //result[:0] 136 | } 137 | } 138 | if err := rows.Err(); err != nil { 139 | fmt.Printf("row iteration error: %s", err) 140 | return fmt.Errorf("row iteration error: %w", err) 141 | } 142 | if len(result) > 0 { 143 | _, err = etlx.UpdateTarget(dbTargetConn, sql_target, result) 144 | if err != nil { 145 | return fmt.Errorf("main target query faild: %w", err) 146 | } 147 | } 148 | // END / CLOSING QUERIES 149 | after_source, ok := source["after"] 150 | if ok { 151 | err = etlx.ExecuteQuery(dbSourceConn, after_source, item, "", "", dateRef) 152 | if err != nil { 153 | return fmt.Errorf("error executing source closing queries: %s", err) 154 | } 155 | } 156 | after_target, ok := target["after"] 157 | if ok { 158 | err = etlx.ExecuteQuery(dbTargetConn, after_target, item, "", "", dateRef) 159 | if err != nil { 160 | return fmt.Errorf("error executing source closing queries: %s", err) 161 | } 162 | } 163 | return nil 164 | } 165 | 166 | func (etlx *ETLX) BuildInsertSQL(sql_header string, data []map[string]any) (string, error) { 167 | if len(data) == 0 { 168 | return "", fmt.Errorf("no data to insert") 169 | } 170 | // Use keys from the first map as column names 171 | columns := make([]string, 0, len(data[0])) 172 | for k := range data[0] { 173 | columns = append(columns, k) 174 | } 175 | var valueRows []string 176 | for _, row := range data { 177 | var values []string 178 | for _, col := range columns { 179 | val := row[col] 180 | values = append(values, formatValue(val)) 181 | } 182 | valueRows = append(valueRows, "("+strings.Join(values, ", ")+")") 183 | } 184 | sql, err := ReplaceColumnsWithDetectedIdentifier(sql_header, columns) 185 | if err == nil { 186 | sql = fmt.Sprintf("%s %s;", sql, strings.Join(valueRows, ",\n")) 187 | } else { 188 | //fmt.Println(err) 189 | // Escape column names (basic, you might need to adapt for SQL Server specifics) 190 | colList := strings.Join(columns, ", ") 191 | /*sql := fmt.Sprintf("INSERT INTO %s (%s) VALUES %s;", 192 | table, 193 | colList, 194 | strings.Join(valueRows, ",\n"), 195 | )*/ 196 | re := regexp.MustCompile(`:columns\b`) 197 | sql_header = re.ReplaceAllString(sql_header, colList) 198 | sql = fmt.Sprintf("%s %s;", sql_header, strings.Join(valueRows, ",\n")) 199 | } 200 | if os.Getenv("ETLX_DEBUG_QUERY") == "true" { 201 | _file, err := etlx.TempFIle("", sql, fmt.Sprintf("query.%s.*.sql", "db2db")) 202 | if err != nil { 203 | fmt.Println(err) 204 | } 205 | fmt.Println(_file) 206 | } 207 | return sql, nil 208 | } 209 | 210 | // Detects the quote character around :columns and replaces it with the appropriate formatted column list. 211 | func ReplaceColumnsWithDetectedIdentifier(query string, columns []string) (string, error) { 212 | // Regex to capture optional identifier wrapping 213 | re := regexp.MustCompile("([[\"`]?):columns([]\"`]?)") 214 | matches := re.FindStringSubmatch(query) 215 | var open, close string 216 | if len(matches) == 3 { 217 | open, close = matches[1], matches[2] 218 | } 219 | // Default if nothing matched 220 | if open == "" && close == "" { 221 | open, close = "", "" 222 | } else if open == "[" && close != "]" { 223 | close = "]" 224 | } else if open == `"` && close != `"` { 225 | close = `"` 226 | } else if open == "`" && close != "`" { 227 | close = "`" 228 | } else if open == "(" && close == ")" { 229 | open, close = "", "" // treat as no identifier 230 | } 231 | // Escape square brackets inside column names for MSSQL 232 | formatIdentifier := func(col string) string { 233 | if open == "[" && close == "]" { 234 | col = strings.ReplaceAll(col, "]", "]]") 235 | } 236 | return open + col + close 237 | } 238 | // Apply identifier 239 | var escapedCols []string 240 | for _, col := range columns { 241 | escapedCols = append(escapedCols, formatIdentifier(col)) 242 | } 243 | finalCols := strings.Join(escapedCols, ", ") 244 | // Replace the whole match with column list 245 | finalQuery := re.ReplaceAllString(query, finalCols) 246 | return finalQuery, nil 247 | } 248 | 249 | func formatValue(v any) string { 250 | switch val := v.(type) { 251 | case nil: 252 | return "NULL" 253 | case int, int32, int64: 254 | return fmt.Sprintf("%d", val) 255 | case float32, float64: 256 | return fmt.Sprintf("%f", val) 257 | case bool: 258 | if val { 259 | return "1" 260 | } 261 | return "0" 262 | case time.Time: 263 | return "'" + val.Format("2006-01-02 15:04:05") + "'" 264 | case []byte: 265 | return "'" + strings.ReplaceAll(string(val), "'", "''") + "'" 266 | case string: 267 | return "'" + strings.ReplaceAll(val, "'", "''") + "'" 268 | default: 269 | return "'" + strings.ReplaceAll(fmt.Sprintf("%v", val), "'", "''") + "'" 270 | } 271 | } 272 | 273 | func (etlx *ETLX) UpdateTarget(dbTargetConn db.DBInterface, sql_target string, data []map[string]any) (int, error) { 274 | sql, err := etlx.BuildInsertSQL(sql_target, data) 275 | if err != nil { 276 | return 0, err 277 | } 278 | //fmt.Println(sql) 279 | return dbTargetConn.ExecuteQuery(sql, []any{}...) 280 | } 281 | -------------------------------------------------------------------------------- /internal/db/odbc.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "encoding/csv" 7 | "fmt" 8 | "io" 9 | "os" 10 | "reflect" 11 | "strings" 12 | "time" 13 | "unicode/utf8" 14 | 15 | "github.com/realdatadriven/etlx/internal/env" 16 | "golang.org/x/text/encoding/charmap" 17 | "golang.org/x/text/transform" 18 | 19 | _ "github.com/alexbrainman/odbc" 20 | ) 21 | 22 | type ODBC struct { 23 | *sql.DB 24 | } 25 | 26 | func NewODBC(dsn string) (*ODBC, error) { 27 | //fmt.Printf("DSN: %s\n", dsn) 28 | db, err := sql.Open("odbc", dsn) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | defaultTimeoutODBC = time.Duration(env.GetInt("ODBC_DFLT_TIMEOUT", 15)) * time.Minute 34 | //fmt.Println(driverName, dsn) 35 | db.SetMaxOpenConns(25) 36 | db.SetMaxIdleConns(25) 37 | db.SetConnMaxIdleTime(defaultTimeoutODBC) 38 | db.SetConnMaxLifetime(2 * time.Hour) 39 | return &ODBC{db}, nil 40 | } 41 | 42 | func (db *ODBC) New(dsn string) (*ODBC, error) { 43 | //fmt.Printf("DSN: %s\n", dsn) 44 | _db, err := sql.Open("odbc", dsn) 45 | if err != nil { 46 | return nil, err 47 | } 48 | //fmt.Println(driverName, dsn) 49 | _db.SetMaxOpenConns(25) 50 | _db.SetMaxIdleConns(25) 51 | _db.SetConnMaxIdleTime(5 * time.Minute) 52 | _db.SetConnMaxLifetime(2 * time.Hour) 53 | return &ODBC{_db}, nil 54 | } 55 | 56 | func (db *ODBC) ExecuteQuery(query string, data ...interface{}) (int, error) { 57 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC) 58 | defer cancel() 59 | result, err := db.ExecContext(ctx, query, data...) 60 | if err != nil { 61 | return 0, err 62 | } 63 | id, err := result.LastInsertId() 64 | if err != nil { 65 | id = 0 66 | } 67 | return int(id), err 68 | } 69 | 70 | func (db *ODBC) ExecuteQueryRowsAffected(query string, data ...interface{}) (int64, error) { 71 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC) 72 | defer cancel() 73 | result, err := db.ExecContext(ctx, query, data...) 74 | if err != nil { 75 | return 0, err 76 | } 77 | id, err := result.RowsAffected() 78 | if err != nil { 79 | return 0, err 80 | } 81 | return id, err 82 | } 83 | 84 | func (db *ODBC) QueryMultiRows(query string, params ...interface{}) (*[]map[string]interface{}, bool, error) { 85 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC) 86 | defer cancel() 87 | var result []map[string]interface{} 88 | rows, err := db.QueryContext(ctx, query, params...) 89 | if err != nil { 90 | return nil, false, err 91 | } 92 | defer rows.Close() 93 | for rows.Next() { 94 | row, err := ScanRowToMap(rows) 95 | if err != nil { 96 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err) 97 | } 98 | result = append(result, row) 99 | } 100 | return &result, true, err 101 | } 102 | 103 | func (db *ODBC) QueryRows(ctx context.Context, query string, params ...interface{}) (*sql.Rows, error) { 104 | return db.QueryContext(ctx, query, params...) 105 | } 106 | 107 | func (db *ODBC) QueryMultiRowsWithCols(query string, params ...interface{}) (*[]map[string]interface{}, []string, bool, error) { 108 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC) 109 | defer cancel() 110 | var result []map[string]interface{} 111 | rows, err := db.QueryContext(ctx, query, params...) 112 | if err != nil { 113 | return nil, nil, false, err 114 | } 115 | defer rows.Close() 116 | columns, err := rows.Columns() 117 | if err != nil { 118 | fmt.Printf("failed to get columns: %s", err) 119 | } 120 | for rows.Next() { 121 | row, err := ScanRowToMap(rows) 122 | if err != nil { 123 | return nil, nil, false, fmt.Errorf("failed to scan row to map: %w", err) 124 | } 125 | result = append(result, row) 126 | } 127 | return &result, columns, true, err 128 | } 129 | 130 | func (db *ODBC) AllTables(params map[string]interface{}, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) { 131 | // Logic to get all tables for DuckDB 132 | return nil, false, nil 133 | } 134 | 135 | func (db *ODBC) TableSchema(params map[string]interface{}, table string, dbName string, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) { 136 | return nil, false, nil 137 | } 138 | 139 | func (db *ODBC) ExecuteNamedQuery(query string, data map[string]interface{}) (int, error) { 140 | return 0, fmt.Errorf("not suported yet %s", "_") 141 | } 142 | 143 | func (db *ODBC) ExecuteQueryPGInsertWithLastInsertId(query string, data ...interface{}) (int, error) { 144 | return 0, fmt.Errorf("not suported %s", "_") 145 | } 146 | 147 | func isUTF8(s string) bool { 148 | return utf8.ValidString(s) 149 | } 150 | 151 | func convertToUTF8(isoStr string) (string, error) { 152 | if isUTF8(isoStr) { 153 | return isoStr, nil 154 | } 155 | reader := strings.NewReader(isoStr) 156 | transformer := charmap.ISO8859_1.NewDecoder() 157 | utf8Bytes, err := io.ReadAll(transform.NewReader(reader, transformer)) 158 | if err != nil { 159 | return "", err 160 | } 161 | return string(utf8Bytes), nil 162 | } 163 | 164 | func hasDecimalPlace(v interface{}) (bool, error) { 165 | // Try to cast v to float64 166 | floatVal, ok := v.(float64) 167 | if !ok { 168 | return false, fmt.Errorf("value is not a float64, it is %v", reflect.TypeOf(v)) 169 | } 170 | 171 | // Check if the float has a decimal part 172 | if floatVal != float64(int(floatVal)) { 173 | return true, nil 174 | } 175 | return false, nil 176 | } 177 | 178 | func (db *ODBC) Query2CSV(query string, csv_path string, params ...interface{}) (bool, error) { 179 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC) 180 | defer cancel() 181 | rows, err := db.QueryContext(ctx, query, params...) 182 | if err != nil { 183 | //fmt.Println(1, err) 184 | return false, err 185 | } 186 | defer rows.Close() 187 | csvFile, err := os.Create(csv_path) 188 | if err != nil { 189 | return false, fmt.Errorf("error creating CSV file: %w", err) 190 | } 191 | defer csvFile.Close() 192 | // CSV 193 | csvWriter := csv.NewWriter(csvFile) 194 | defer csvWriter.Flush() 195 | // Get column names 196 | columns, err := rows.Columns() 197 | if err != nil { 198 | return false, fmt.Errorf("error getting column names: %w", err) 199 | } 200 | // Write column names to CSV header 201 | csvWriter.Write(columns) 202 | for rows.Next() { 203 | row, err := ScanRowToMap(rows) 204 | if err != nil { 205 | return false, fmt.Errorf("failed to scan row to map: %w", err) 206 | } 207 | var rowData []string 208 | //for _, value := range row { 209 | for _, col := range columns { 210 | value := row[col] 211 | //rowData = append(rowData, fmt.Sprintf("%v", value)) 212 | switch v := value.(type) { 213 | case nil: 214 | // Format integer types 215 | rowData = append(rowData, "") 216 | case int, int8, int16, int32, int64: 217 | // Format integer types 218 | rowData = append(rowData, fmt.Sprintf("%d", v)) 219 | case float64, float32: 220 | //fmt.Println(col, v) 221 | // Format large numbers without scientific notation 222 | hasDec, err := hasDecimalPlace(v) 223 | if err != nil { 224 | fmt.Println(err) 225 | rowData = append(rowData, fmt.Sprintf("%v", value)) 226 | } else if hasDec { 227 | rowData = append(rowData, fmt.Sprintf("%f", v)) 228 | } else { 229 | rowData = append(rowData, fmt.Sprintf("%.f", v)) 230 | } 231 | case []byte: 232 | // Convert byte slice (UTF-8 data) to a string 233 | utf8Str, err := convertToUTF8(string(v)) 234 | if err != nil { 235 | fmt.Println("Failed to convert to UTF-8:", v, err) 236 | } 237 | rowData = append(rowData, strings.TrimSpace(string(utf8Str))) 238 | default: 239 | // Default formatting for other types 240 | rowData = append(rowData, fmt.Sprintf("%v", value)) 241 | } 242 | } 243 | csvWriter.Write(rowData) 244 | } 245 | if err := rows.Err(); err != nil { 246 | return false, fmt.Errorf("error iterating rows: %w", err) 247 | } 248 | return true, nil 249 | } 250 | 251 | func (db *ODBC) QuerySingleRow(query string, params ...interface{}) (*map[string]interface{}, bool, error) { 252 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutODBC) 253 | defer cancel() 254 | result := map[string]interface{}{} 255 | rows, err := db.QueryContext(ctx, query, params...) 256 | if err != nil { 257 | return nil, false, err 258 | } 259 | defer rows.Close() 260 | if rows.Next() { 261 | /*if err := rows.Scan(result); err != nil { 262 | return nil, false, err 263 | }*/ 264 | result, err = ScanRowToMap(rows) 265 | if err != nil { 266 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err) 267 | } 268 | } 269 | return &result, true, err 270 | } 271 | 272 | func (db *ODBC) FromParams(params map[string]interface{}, extra_conf map[string]interface{}) (*DB, string, string, error) { 273 | return nil, "", "", fmt.Errorf("not implemented yet %s", "_") 274 | } 275 | 276 | func (db *ODBC) GetDriverName() string { 277 | return "odbc" 278 | } 279 | 280 | func (db *ODBC) GetUserByNameOrEmail(email string) (map[string]interface{}, bool, error) { 281 | return nil, false, fmt.Errorf("not implemented yet %s", "_") 282 | } 283 | 284 | func (db *ODBC) IsEmpty(value interface{}) bool { 285 | switch v := value.(type) { 286 | case nil: 287 | return true 288 | case string: 289 | return len(v) == 0 290 | case []interface{}: 291 | return len(v) == 0 292 | case map[interface{}]interface{}: 293 | return len(v) == 0 294 | default: 295 | return false 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /internal/etlx/load_requirements.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | ) 8 | 9 | func (etlx *ETLX) LoadREQUIRES(conf map[string]any, keys ...string) ([]map[string]any, error) { 10 | key := "REQUIRES" 11 | if len(keys) > 0 && keys[0] != "" { 12 | key = keys[0] 13 | } 14 | //fmt.Println(key, dateRef) 15 | var processLogs []map[string]any 16 | start := time.Now() 17 | processLogs = append(processLogs, map[string]any{ 18 | "name": key, 19 | "start_at": start, 20 | }) 21 | mainDescription := "" 22 | // Define the runner as a simple function 23 | REQUIRESRunner := func(metadata map[string]any, itemKey string, item map[string]any) error { 24 | //fmt.Println(metadata, itemKey, item) 25 | // ACTIVE 26 | if active, okActive := metadata["active"]; okActive { 27 | if !active.(bool) { 28 | processLogs = append(processLogs, map[string]any{ 29 | "name": fmt.Sprintf("KEY %s", key), 30 | "description": metadata["description"].(string), 31 | "start_at": time.Now(), 32 | "end_at": time.Now(), 33 | "success": true, 34 | "msg": "Deactivated", 35 | }) 36 | return fmt.Errorf("dectivated %s", "") 37 | } 38 | } 39 | mainConn, _ := metadata["connection"].(string) 40 | mainDescription = metadata["description"].(string) 41 | itemMetadata, ok := item["metadata"].(map[string]any) 42 | if !ok { 43 | processLogs = append(processLogs, map[string]any{ 44 | "name": fmt.Sprintf("%s->%s", key, itemKey), 45 | "description": itemMetadata["description"].(string), 46 | "start_at": time.Now(), 47 | "end_at": time.Now(), 48 | "success": true, 49 | "msg": "Missing metadata in item", 50 | }) 51 | return nil 52 | } 53 | // ACTIVE 54 | if active, okActive := itemMetadata["active"]; okActive { 55 | if !active.(bool) { 56 | processLogs = append(processLogs, map[string]any{ 57 | "name": fmt.Sprintf("%s->%s", key, itemKey), 58 | "description": itemMetadata["description"].(string), 59 | "start_at": time.Now(), 60 | "end_at": time.Now(), 61 | "success": true, 62 | "msg": "Deactivated", 63 | }) 64 | return nil 65 | } 66 | } 67 | start3 := time.Now() 68 | _log2 := map[string]any{ 69 | "name": fmt.Sprintf("%s->%s", key, itemKey), 70 | "description": itemMetadata["description"].(string), 71 | "start_at": start3, 72 | } 73 | path, okPath := itemMetadata["path"] 74 | beforeSQL, okBefore := itemMetadata["before_sql"] 75 | query, okQuery := itemMetadata["query"] 76 | column, okColumn := itemMetadata["column"] 77 | afterSQL, okAfter := itemMetadata["after_sql"] 78 | config := make(map[string]any) 79 | etl := &ETLX{Config: config, autoLogsDisabled: true} 80 | var mdConf any 81 | if okQuery && query != "" { 82 | conn, okCon := itemMetadata["connection"] 83 | if !okCon { 84 | conn = mainConn 85 | } 86 | dbConn, err := etlx.GetDB(conn.(string)) 87 | if err != nil { 88 | _log2["success"] = false 89 | _log2["msg"] = fmt.Sprintf("%s -> %s ERR: connecting to %s in : %s", key, itemKey, conn, err) 90 | _log2["end_at"] = time.Now() 91 | _log2["duration"] = time.Since(start3).Seconds() 92 | processLogs = append(processLogs, _log2) 93 | return nil 94 | } 95 | defer dbConn.Close() 96 | _log2["success"] = true 97 | _log2["msg"] = fmt.Sprintf("%s -> %s CONN: Connectinon to %s successfull", key, itemKey, conn) 98 | _log2["end_at"] = time.Now() 99 | _log2["duration"] = time.Since(start3).Seconds() 100 | processLogs = append(processLogs, _log2) 101 | // QUERIES TO RUN AT BEGINING 102 | if okBefore { 103 | start3 := time.Now() 104 | _log2 := map[string]any{ 105 | "name": fmt.Sprintf("%s->%s", key, itemKey), 106 | "description": itemMetadata["description"].(string), 107 | "start_at": start3, 108 | } 109 | err = etlx.ExecuteQuery(dbConn, beforeSQL, item, "", "", nil) 110 | if err != nil { 111 | _log2["success"] = false 112 | _log2["msg"] = fmt.Sprintf("%s -> %s Before error: %s", key, itemKey, err) 113 | _log2["end_at"] = time.Now() 114 | _log2["duration"] = time.Since(start3).Seconds() 115 | } else { 116 | _log2["success"] = true 117 | _log2["msg"] = fmt.Sprintf("%s -> %s Before ", key, itemKey) 118 | _log2["end_at"] = time.Now() 119 | _log2["duration"] = time.Since(start3).Seconds() 120 | } 121 | processLogs = append(processLogs, _log2) 122 | } 123 | // MAIN QUERY 124 | rows, _, err := etlx.Query(dbConn, query.(string), item, "", "", nil) 125 | // Fetch data from the database using the provided SQL query 126 | if err != nil { 127 | _log2["success"] = false 128 | _log2["msg"] = fmt.Sprintf("%s -> %s -> failed to execute get md conf query: %s", key, itemKey, err) 129 | _log2["end_at"] = time.Now() 130 | _log2["duration"] = time.Since(start3).Seconds() 131 | processLogs = append(processLogs, _log2) 132 | return nil 133 | } 134 | if len(*rows) > 0 { 135 | okConf := false 136 | if column != nil && okColumn { 137 | mdConf, okConf = (*rows)[0][column.(string)] 138 | } else { 139 | mdConf, okConf = (*rows)[0]["conf"] 140 | } 141 | if okConf && mdConf != nil { 142 | err := etl.ConfigFromMDText(mdConf.(string)) 143 | if err != nil { 144 | _log2["success"] = false 145 | _log2["msg"] = fmt.Sprintf("Error parsing config string: %s", err) 146 | _log2["end_at"] = time.Now() 147 | _log2["duration"] = time.Since(start3).Seconds() 148 | processLogs = append(processLogs, _log2) 149 | return nil 150 | } 151 | } else { 152 | _log2["success"] = false 153 | _log2["msg"] = fmt.Sprintf("%s -> %s -> failed to get md conf string query: %s column %s", key, itemKey, query, column) 154 | _log2["end_at"] = time.Now() 155 | _log2["duration"] = time.Since(start3).Seconds() 156 | processLogs = append(processLogs, _log2) 157 | return nil 158 | } 159 | } else { 160 | _log2["success"] = false 161 | _log2["msg"] = fmt.Sprintf("%s -> %s -> failed to execute get md conf query: %s", key, itemKey, err) 162 | _log2["end_at"] = time.Now() 163 | _log2["duration"] = time.Since(start3).Seconds() 164 | processLogs = append(processLogs, _log2) 165 | return nil 166 | } 167 | // QUERIES TO RUN AT THE END 168 | if okAfter { 169 | start3 := time.Now() 170 | _log2 := map[string]any{ 171 | "name": fmt.Sprintf("%s->%s", key, itemKey), 172 | "description": itemMetadata["description"].(string), 173 | "start_at": start3, 174 | } 175 | err = etlx.ExecuteQuery(dbConn, afterSQL, item, "", "", nil) 176 | if err != nil { 177 | _log2["success"] = false 178 | _log2["msg"] = fmt.Sprintf("%s -> %s After error: %s", key, itemKey, err) 179 | _log2["end_at"] = time.Now() 180 | _log2["duration"] = time.Since(start3).Seconds() 181 | } else { 182 | _log2["success"] = true 183 | _log2["msg"] = fmt.Sprintf("%s -> %s After ", key, itemKey) 184 | _log2["end_at"] = time.Now() 185 | _log2["duration"] = time.Since(start3).Seconds() 186 | } 187 | processLogs = append(processLogs, _log2) 188 | } 189 | } else if path != nil && okPath { 190 | if ok, _ := fileExists(path.(string)); ok { 191 | err := etl.ConfigFromFile(path.(string)) 192 | if err != nil { 193 | _log2["success"] = false 194 | _log2["msg"] = fmt.Sprintf("Error parsing config: %s -> %s", path, err) 195 | _log2["end_at"] = time.Now() 196 | _log2["duration"] = time.Since(start3).Seconds() 197 | processLogs = append(processLogs, _log2) 198 | } 199 | } else { 200 | _log2["success"] = false 201 | _log2["msg"] = fmt.Sprintf("file doesn't exists: %s", path) 202 | _log2["end_at"] = time.Now() 203 | _log2["duration"] = time.Since(start3).Seconds() 204 | processLogs = append(processLogs, _log2) 205 | return nil 206 | } 207 | } 208 | //fmt.Println("LOADED ETLX CONF:", etl.Config) 209 | if len(etl.Config) == 1 && etl.Config["__order"] != nil { 210 | etlx.Config[itemKey] = map[string]any{} 211 | if okQuery && query != "" && mdConf != nil { 212 | //etlx.Config[itemKey].(map[string]any)[itemKey] = mdConf.(string) 213 | etlx.Config[itemKey] = mdConf.(string) 214 | } else if path != nil && okPath { 215 | data, err := os.ReadFile(path.(string)) 216 | if err != nil { 217 | fmt.Printf("LOAD RAW FILE: failed to read file: %s", err) 218 | } else { 219 | etlx.Config[itemKey] = string(data) 220 | } 221 | } 222 | } else { 223 | for newConfKey, value := range etl.Config { 224 | if newConfKey == "metadata" || newConfKey == "__order" || newConfKey == "order" { 225 | continue 226 | } 227 | if _, ok := etlx.Config[newConfKey]; !ok { 228 | etlx.Config[newConfKey] = value 229 | } else { 230 | fmt.Println(newConfKey, "Already exists!") 231 | } 232 | } 233 | } 234 | _log2["success"] = true 235 | _log2["msg"] = "Successfully loaded!" 236 | _log2["end_at"] = time.Now() 237 | _log2["duration"] = time.Since(start3).Seconds() 238 | processLogs = append(processLogs, _log2) 239 | return nil 240 | } 241 | // Check if the input conf is nil or empty 242 | if conf == nil { 243 | conf = etlx.Config 244 | } 245 | // Process the MD KEY 246 | err := etlx.ProcessMDKey(key, conf, REQUIRESRunner) 247 | if err != nil { 248 | return processLogs, fmt.Errorf("%s failed: %v", key, err) 249 | } 250 | processLogs[0] = map[string]any{ 251 | "name": key, 252 | "description": mainDescription, 253 | "start_at": processLogs[0]["start_at"], 254 | "end_at": time.Now(), 255 | "duration": time.Since(start).Seconds(), 256 | } 257 | return processLogs, nil 258 | } 259 | -------------------------------------------------------------------------------- /examples/s3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The [`httpfs`](https://duckdb.org/docs/extensions/httpfs/s3api, \"httpfs\") extension supports reading/writing/globbing files on object storage servers using the S3 API. S3 offers a standard API to read and write to remote files (while regular http servers, predating S3, do not offer a common write API). DuckDB conforms to the S3 API, that is now common among industry storage providers." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The preferred way to configure and authenticate to S3 endpoints is to use secrets. Multiple secret providers are available" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# ETL" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "```yaml metadata\n", 29 | "name: S3_EXTRACT\n", 30 | "description: \"Example extrating from S3 to a local sqlite3 file\"\n", 31 | "connection: \"duckdb:\"\n", 32 | "active: true\n", 33 | "```" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## train_services" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "```yaml metadata\n", 48 | "name: train_services\n", 49 | "description: \"train_services\"\n", 50 | "table: train_services\n", 51 | "load_conn: \"duckdb:\"\n", 52 | "load_before_sql:\n", 53 | " - load_extentions\n", 54 | " - attach_db\n", 55 | "load_sql: load_query\n", 56 | "load_after_sql: detach_db\n", 57 | "drop_sql: drop_sql\n", 58 | "clean_sql: clean_sql\n", 59 | "rows_sql: nrows\n", 60 | "active: true\n", 61 | "```" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "```sql\n", 69 | "-- load_extentions\n", 70 | "INSTALL Sqlite;\n", 71 | "LOAD Sqlite;\n", 72 | "INSTALL httpfs;\n", 73 | "LOAD httpfs;\n", 74 | "```" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "```sql\n", 82 | "-- attach_db\n", 83 | "ATTACH 'examples/S3_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n", 84 | "```" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "```sql\n", 92 | "-- detach_db\n", 93 | "DETACH \"DB\";\n", 94 | "```" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "```sql\n", 102 | "-- load_query\n", 103 | "CREATE OR REPLACE TABLE \"DB\".\"
\" AS\n", 104 | "FROM 's3://duckdb-blobs/train_services.parquet';\n", 105 | "```" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "```sql\n", 113 | "-- drop_sql\n", 114 | "DROP TABLE IF EXISTS \"DB\".\"
\";\n", 115 | "```" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "```sql\n", 123 | "-- clean_sql\n", 124 | "DELETE FROM \"DB\".\"
\";\n", 125 | "```" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "```sql\n", 133 | "-- nrows\n", 134 | "SELECT COUNT(*) AS \"nrows\" FROM \"DB\".\"
\"\n", 135 | "```" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "## S3_EXTRACT" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "```yaml metadata\n", 150 | "name: S3_EXTRACT\n", 151 | "description: \"Example extrating from S3 to a local sqlite3 file\"\n", 152 | "table: S3_EXTRACT\n", 153 | "load_conn: \"duckdb:\"\n", 154 | "load_before_sql:\n", 155 | " - load_extentions\n", 156 | " - attach_db\n", 157 | " - create_S3_token\n", 158 | "load_sql: load_query\n", 159 | "load_after_sql: detach_db\n", 160 | "drop_sql: drop_sql\n", 161 | "clean_sql: clean_sql\n", 162 | "rows_sql: nrows\n", 163 | "active: true\n", 164 | "```" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "```sql\n", 172 | "-- load_extentions\n", 173 | "INSTALL httpfs;\n", 174 | "LOAD httpfs;\n", 175 | "```" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "```sql\n", 183 | "-- attach_db\n", 184 | "ATTACH 'examples/S3_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n", 185 | "```" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "Example with a [Minio](https://min.io/) local instance" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "```sql\n", 200 | "-- create_S3_token\n", 201 | "CREATE SECRET S3_token (\n", 202 | " TYPE S3,\n", 203 | " KEY_ID '@S3_KEY_ID',\n", 204 | " SECRET '@S3_SECRET',\n", 205 | " ENDPOINT '127.0.0.1:3000',\n", 206 | " URL_STYLE 'path'\n", 207 | ");\n", 208 | "```" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "```sql\n", 216 | "-- detach_db\n", 217 | "DETACH \"DB\";\n", 218 | "```" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "```sql\n", 226 | "-- load_query\n", 227 | "CREATE OR REPLACE TABLE \"DB\".\"
\" AS\n", 228 | "SELECT * \n", 229 | "FROM 's3://uploads/flights.csv';\n", 230 | "```" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "```sql\n", 238 | "-- drop_sql\n", 239 | "DROP TABLE IF EXISTS \"DB\".\"
\";\n", 240 | "```" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "```sql\n", 248 | "-- clean_sql\n", 249 | "DELETE FROM \"DB\".\"
\";\n", 250 | "```" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "```sql\n", 258 | "-- nrows\n", 259 | "SELECT COUNT(*) AS \"nrows\" FROM \"DB\".\"
\"\n", 260 | "```" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "```shell\n", 268 | "bin/etlx --config examples/s3.ipynb\n", 269 | "```" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "```shell\n", 277 | "bin/etlx --config examples/s3.ipynb\n", 278 | "```" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "# LOGS" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "```yaml metadata\n", 293 | "name: LOGS\n", 294 | "description: \"Example saving logs\"\n", 295 | "table: logs\n", 296 | "connection: \"duckdb:\"\n", 297 | "before_sql:\n", 298 | " - load_extentions\n", 299 | " - attach_db\n", 300 | " - get_dyn_queries[create_columns_missing]\n", 301 | "save_log_sql: load_query\n", 302 | "after_sql: detach_db\n", 303 | "active: true\n", 304 | "```" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "```sql\n", 312 | "-- load_extentions\n", 313 | "INSTALL Sqlite;\n", 314 | "LOAD Sqlite;\n", 315 | "```" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "```sql\n", 323 | "-- attach_db\n", 324 | "ATTACH 'examples/S3_EXTRACT.db' AS \"DB\" (TYPE SQLITE)\n", 325 | "```" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "```sql\n", 333 | "-- detach_db\n", 334 | "DETACH \"DB\";\n", 335 | "```" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "```sql\n", 343 | "-- load_query\n", 344 | "CREATE OR REPLACE TABLE \"DB\".\"
\" AS\n", 345 | "SELECT * \n", 346 | "FROM '';\n", 347 | "```" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "```sql\n", 355 | "-- create_columns_missing\n", 356 | "WITH source_columns AS (\n", 357 | " SELECT column_name, column_type \n", 358 | " FROM (DESCRIBE SELECT * FROM read_json(''))\n", 359 | "),\n", 360 | "destination_columns AS (\n", 361 | " SELECT column_name, data_type as column_type\n", 362 | " FROM duckdb_columns \n", 363 | " WHERE table_name = '
'\n", 364 | "),\n", 365 | "missing_columns AS (\n", 366 | " SELECT s.column_name, s.column_type\n", 367 | " FROM source_columns s\n", 368 | " LEFT JOIN destination_columns d ON s.column_name = d.column_name\n", 369 | " WHERE d.column_name IS NULL\n", 370 | ")\n", 371 | "SELECT 'ALTER TABLE \"DB\".\"
\" ADD COLUMN \"' || column_name || '\" ' || column_type || ';' AS query\n", 372 | "FROM missing_columns" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [] 379 | } 380 | ], 381 | "metadata": { 382 | "kernelspec": { 383 | "display_name": "Python 3", 384 | "language": "python", 385 | "name": "python3" 386 | }, 387 | "language_info": { 388 | "codemirror_mode": { 389 | "name": "ipython", 390 | "version": 3 391 | }, 392 | "file_extension": ".py", 393 | "mimetype": "text/x-python", 394 | "name": "python", 395 | "nbconvert_exporter": "python", 396 | "pygments_lexer": "ipython3", 397 | "version": "3.11.7" 398 | } 399 | }, 400 | "nbformat": 4, 401 | "nbformat_minor": 2 402 | } 403 | -------------------------------------------------------------------------------- /internal/db/duckdb.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/realdatadriven/etlx/internal/env" 10 | 11 | _ "github.com/duckdb/duckdb-go/v2" 12 | ) 13 | 14 | type DuckDB struct { 15 | *sql.DB 16 | } 17 | 18 | // ScanRowToMap converts a single row into a map[string]interface{}. 19 | func ScanRowToMap(rows *sql.Rows) (map[string]interface{}, error) { 20 | columns, err := rows.Columns() 21 | if err != nil { 22 | return nil, fmt.Errorf("failed to get columns: %w", err) 23 | } 24 | values := make([]interface{}, len(columns)) 25 | valuePointers := make([]interface{}, len(columns)) 26 | for i := range values { 27 | valuePointers[i] = &values[i] 28 | } 29 | if err := rows.Scan(valuePointers...); err != nil { 30 | return nil, fmt.Errorf("failed to scan row: %w", err) 31 | } 32 | rowMap := make(map[string]interface{}) 33 | for i, colName := range columns { 34 | rowMap[colName] = values[i] 35 | } 36 | return rowMap, nil 37 | } 38 | 39 | func NewDuckDB(dsn string) (*DuckDB, error) { 40 | //fmt.Printf("db DRIVER: %s DSN: %s\n", driverName, dsn) 41 | db, err := sql.Open("duckdb", dsn) 42 | if err != nil { 43 | return nil, err 44 | } 45 | defaultTimeoutDuckDB = time.Duration(env.GetInt("DUCKDB_DFLT_TIMEOUT", 15)) * time.Minute 46 | //fmt.Println(driverName, dsn) 47 | db.SetMaxOpenConns(25) 48 | db.SetMaxIdleConns(25) 49 | db.SetConnMaxIdleTime(defaultTimeoutDuckDB) 50 | db.SetConnMaxLifetime(2 * time.Hour) 51 | return &DuckDB{db}, nil 52 | } 53 | 54 | func (db *DuckDB) New(dsn string) (*DuckDB, error) { 55 | //fmt.Printf("db DRIVER: %s DSN: %s\n", driverName, dsn) 56 | _db, err := sql.Open("duckdb", dsn) 57 | if err != nil { 58 | return nil, err 59 | } 60 | //fmt.Println(driverName, dsn) 61 | _db.SetMaxOpenConns(25) 62 | _db.SetMaxIdleConns(25) 63 | _db.SetConnMaxIdleTime(defaultTimeoutDuckDB) 64 | _db.SetConnMaxLifetime(2 * time.Hour) 65 | return &DuckDB{_db}, nil 66 | } 67 | 68 | func (db *DuckDB) ExecuteQuery(query string, data ...interface{}) (int, error) { 69 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB) 70 | defer cancel() 71 | result, err := db.ExecContext(ctx, query, data...) 72 | if err != nil { 73 | return 0, err 74 | } 75 | id, err := result.LastInsertId() 76 | if err != nil { 77 | return 0, err 78 | } 79 | return int(id), err 80 | } 81 | 82 | func (db *DuckDB) ExecuteQueryRowsAffected(query string, data ...interface{}) (int64, error) { 83 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB) 84 | defer cancel() 85 | result, err := db.ExecContext(ctx, query, data...) 86 | if err != nil { 87 | return 0, err 88 | } 89 | id, err := result.RowsAffected() 90 | if err != nil { 91 | return 0, err 92 | } 93 | return id, err 94 | } 95 | 96 | func (db *DuckDB) AllTables(params map[string]interface{}, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) { 97 | _query := `SELECT table_name as name FROM information_schema.tables` 98 | _query = `SHOW TABLES` 99 | // fmt.Println(_query) 100 | return db.QueryMultiRows(_query, []interface{}{}...) 101 | } 102 | 103 | func (db *DuckDB) TableSchema(params map[string]interface{}, table string, dbName string, extra_conf map[string]interface{}) (*[]map[string]interface{}, bool, error) { 104 | user_id := int(params["user"].(map[string]interface{})["user_id"].(float64)) 105 | /*_query := fmt.Sprintf(`SELECT ROW_NUMBER() OVER () - 1 AS cid 106 | , column_name AS name 107 | , data_type AS type 108 | , CASE is_nullable WHEN 'NO' THEN 1 ELSE 0 END AS notnull 109 | , column_default AS dflt_value 110 | , CASE 111 | WHEN column_name IN ( 112 | SELECT kcu.column_name 113 | FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu 114 | JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc 115 | ON kcu.constraint_name = tc.constraint_name 116 | WHERE tc.constraint_type = 'PRIMARY KEY' AND kcu.table_name = '%s' 117 | ) THEN 1 118 | ELSE 0 119 | END AS pk 120 | FROM information_schema.tables 121 | WHERE table_schema = 'public' 122 | AND table_name = '%s';`, table, table)*/ 123 | _query := fmt.Sprintf(`PRAGMA table_info("%s")`, table) 124 | //fmt.Println(table, _query) 125 | _aux_data := []map[string]interface{}{} 126 | _aux_data_fk := map[string]interface{}{} 127 | res, _, err := db.QueryMultiRows(_query, []interface{}{}...) 128 | if err != nil { 129 | return nil, false, err 130 | } 131 | _query = fmt.Sprintf(`WITH foreign_keys AS ( 132 | SELECT rc.constraint_name AS fk_name, 133 | rc.unique_constraint_name AS unique_name, 134 | kcu.table_name AS table, 135 | kcu.column_name AS "from", 136 | kcu.ordinal_position AS seq, 137 | kcu.table_name AS "to", 138 | kcu.column_name AS to_column, 139 | 'tc.delete_rule' AS on_delete, 140 | 'tc.update_rule' AS on_update 141 | FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc 142 | JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu ON rc.constraint_name = kcu.constraint_name 143 | JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc ON rc.constraint_name = tc.constraint_name 144 | WHERE kcu.table_name = '%s' 145 | ) 146 | SELECT ROW_NUMBER() OVER () - 1 AS id, 147 | seq, 148 | "table" AS parent_table, 149 | "from", 150 | "to", 151 | on_update, 152 | on_delete, 153 | 'NONE' AS match 154 | FROM foreign_keys;`, table) 155 | res_fk, _, err := db.QueryMultiRows(_query, []interface{}{}...) 156 | if err != nil { 157 | return nil, false, err 158 | } 159 | for _, row := range *res_fk { 160 | // fmt.Println(row) 161 | _aux_data_fk[row["from"].(string)] = map[string]interface{}{ 162 | "referred_table": row["table"].(string), 163 | "referred_column": row["to"].(string), 164 | } 165 | } 166 | for _, row := range *res { 167 | fk := false 168 | var referred_table string 169 | var referred_column string 170 | if _, exists := _aux_data_fk[row["name"].(string)]; exists { 171 | fk = true 172 | referred_table = _aux_data_fk[row["name"].(string)].(map[string]interface{})["referred_table"].(string) 173 | referred_column = _aux_data_fk[row["name"].(string)].(map[string]interface{})["referred_column"].(string) 174 | } 175 | pk := false 176 | if _pk, ok := row["pk"].(bool); ok { 177 | pk = _pk 178 | } else if _pk, ok := row["pk"].(int); ok { 179 | if _pk == 1 { 180 | pk = true 181 | } 182 | } 183 | nullable := false 184 | if notnull, ok := row["notnull"].(bool); ok { 185 | nullable = notnull 186 | } else if notnull, ok := row["notnull"].(int); ok { 187 | if notnull == 0 { 188 | nullable = true 189 | } 190 | } 191 | _aux_row := map[string]interface{}{ 192 | "db": dbName, 193 | "table": table, 194 | "field": row["name"].(string), 195 | "type": row["type"].(string), 196 | "comment": nil, 197 | "pk": pk, 198 | "autoincrement": nil, 199 | "nullable": nullable, 200 | "computed": nil, 201 | "default": nil, 202 | "fk": fk, 203 | "referred_table": referred_table, 204 | "referred_column": referred_column, 205 | "user_id": user_id, 206 | "created_at": time.Now(), 207 | "updated_at": time.Now(), 208 | "excluded": false, 209 | } 210 | // fmt.Println(1, row["name"].(string), _aux_row) 211 | _aux_data = append(_aux_data, _aux_row) 212 | } 213 | return &_aux_data, true, nil 214 | } 215 | 216 | func (db *DuckDB) QueryMultiRowsWithCols(query string, params ...interface{}) (*[]map[string]interface{}, []string, bool, error) { 217 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB) 218 | defer cancel() 219 | var result []map[string]interface{} 220 | rows, err := db.QueryContext(ctx, query, params...) 221 | if err != nil { 222 | return nil, nil, false, err 223 | } 224 | defer rows.Close() 225 | columns, err := rows.Columns() 226 | if err != nil { 227 | fmt.Printf("failed to get columns: %s", err) 228 | } 229 | for rows.Next() { 230 | row, err := ScanRowToMap(rows) 231 | if err != nil { 232 | return nil, nil, false, fmt.Errorf("failed to scan row to map: %w", err) 233 | } 234 | result = append(result, row) 235 | } 236 | return &result, columns, true, err 237 | } 238 | 239 | func (db *DuckDB) QueryMultiRows(query string, params ...interface{}) (*[]map[string]interface{}, bool, error) { 240 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB) 241 | defer cancel() 242 | var result []map[string]interface{} 243 | rows, err := db.QueryContext(ctx, query, params...) 244 | if err != nil { 245 | //fmt.Println(1, err) 246 | return nil, false, err 247 | } 248 | defer rows.Close() 249 | //for rows.Next() { 250 | // row := map[string]interface{}{} 251 | for rows.Next() { 252 | row, err := ScanRowToMap(rows) 253 | if err != nil { 254 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err) 255 | } 256 | result = append(result, row) 257 | } 258 | /*if err := rows.Scan(row); err != nil { 259 | return nil, false, err 260 | }*/ 261 | // result = append(result, row) 262 | //} 263 | return &result, true, err 264 | } 265 | 266 | func (db *DuckDB) QueryRows(ctx context.Context, query string, params ...interface{}) (*sql.Rows, error) { 267 | return db.QueryContext(ctx, query, params...) 268 | } 269 | 270 | func (db *DuckDB) QuerySingleRow(query string, params ...interface{}) (*map[string]interface{}, bool, error) { 271 | ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutDuckDB) 272 | defer cancel() 273 | result := map[string]interface{}{} 274 | rows, err := db.QueryContext(ctx, query, params...) 275 | if err != nil { 276 | return nil, false, err 277 | } 278 | defer rows.Close() 279 | if rows.Next() { 280 | result, err = ScanRowToMap(rows) 281 | if err != nil { 282 | return nil, false, fmt.Errorf("failed to scan row to map: %w", err) 283 | } 284 | } 285 | return &result, true, err 286 | } 287 | 288 | func (db *DuckDB) ExecuteNamedQuery(query string, data map[string]interface{}) (int, error) { 289 | return 0, fmt.Errorf("not implemented yet %s", "_") 290 | } 291 | 292 | func (db *DuckDB) ExecuteQueryPGInsertWithLastInsertId(query string, data ...interface{}) (int, error) { 293 | return 0, fmt.Errorf("not implemented yet %s", "_") 294 | } 295 | 296 | func (db *DuckDB) FromParams(params map[string]interface{}, extra_conf map[string]interface{}) (*DB, string, string, error) { 297 | return nil, "", "", fmt.Errorf("not implemented yet %s", "_") 298 | } 299 | 300 | func (db *DuckDB) GetDriverName() string { 301 | return "duckdb" 302 | } 303 | 304 | func (db *DuckDB) GetUserByNameOrEmail(email string) (map[string]interface{}, bool, error) { 305 | return nil, false, fmt.Errorf("not implemented yet %s", "_") 306 | } 307 | 308 | func (db *DuckDB) Query2CSV(query string, csv_path string, params ...interface{}) (bool, error) { 309 | return false, fmt.Errorf("not implemented yet %s", "_") 310 | } 311 | 312 | func (db *DuckDB) IsEmpty(value interface{}) bool { 313 | switch v := value.(type) { 314 | case nil: 315 | return true 316 | case string: 317 | return len(v) == 0 318 | case []interface{}: 319 | return len(v) == 0 320 | case map[interface{}]interface{}: 321 | return len(v) == 0 322 | default: 323 | return false 324 | } 325 | } 326 | -------------------------------------------------------------------------------- /examples/http.cs.md: -------------------------------------------------------------------------------- 1 | # ETL 2 | 3 | 4 | 5 | ```yaml metadata 6 | name: HTTP_EXTRACT 7 | description: "Example extrating from web to a local sqlite3 file" 8 | connection: "duckdb:" 9 | database: HTTP_EXTRACT.db 10 | active: true 11 | ``` 12 | 13 | ## VERSION 14 | 15 | ```yaml metadata 16 | name: VERSION 17 | description: "DDB Version" 18 | table: VERSION 19 | load_conn: "duckdb:" 20 | load_before_sql: "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)" 21 | load_sql: 'CREATE OR REPLACE TABLE DB."
" AS SELECT version() AS "VERSION";' 22 | load_after_sql: "DETACH DB;" 23 | rows_sql: 'SELECT COUNT(*) AS "nrows" FROM DB."
"' 24 | active: true 25 | ``` 26 | 27 | ## NYC_TAXI 28 | 29 | ```yaml metadata 30 | name: NYC_TAXI 31 | description: "Example extrating from web to a local sqlite3 file" 32 | table: NYC_TAXI 33 | load_conn: "duckdb:" 34 | load_before_sql: 35 | - load_extentions 36 | - attach_db 37 | load_sql: load_query 38 | load_after_sql: detach_db 39 | drop_sql: drop_sql 40 | clean_sql: clean_sql 41 | rows_sql: nrows 42 | active: false 43 | ``` 44 | 45 | ```sql 46 | -- load_extentions 47 | INSTALL sqlite; 48 | LOAD sqlite; 49 | ``` 50 | 51 | ```sql 52 | -- attach_db 53 | ATTACH 'database/HTTP_EXTRACT.db' AS "DB" (TYPE SQLITE) 54 | ``` 55 | 56 | ```sql 57 | -- detach_db 58 | DETACH "DB"; 59 | ``` 60 | 61 | ```sql 62 | -- load_query 63 | CREATE OR REPLACE TABLE "DB"."
" AS 64 | SELECT * 65 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-01.parquet'; 66 | ``` 67 | 68 | ```sql 69 | -- drop_sql 70 | DROP TABLE IF EXISTS "DB"."
"; 71 | ``` 72 | 73 | ```sql 74 | -- clean_sql 75 | DELETE FROM "DB"."
"; 76 | ``` 77 | 78 | ```sql 79 | -- nrows 80 | SELECT COUNT(*) AS "nrows" FROM "DB"."
" 81 | ``` 82 | 83 | # DATA_QUALITY 84 | 85 | ```yaml 86 | description: "Runs some queries to check quality / validate." 87 | active: false 88 | ``` 89 | 90 | ## Rule0001 91 | 92 | ```yaml 93 | name: Rule0001 94 | description: "Check if the field trip_distance from the NYC_TAXI is missing or zero" 95 | connection: "duckdb:" 96 | before_sql: 97 | - "LOAD sqlite" 98 | - "ATTACH 'database/HTTP_EXTRACT.db' AS \"DB\" (TYPE SQLITE)" 99 | query: quality_check_query 100 | fix_quality_err: fix_quality_err_query 101 | column: total_reg_with_err # Defaults to 'total'. 102 | check_only: true 103 | fix_only: false 104 | after_sql: "DETACH DB" 105 | active: true 106 | ``` 107 | 108 | ```sql 109 | -- quality_check_query 110 | SELECT COUNT(*) AS "total_reg_with_err" 111 | FROM "DB"."NYC_TAXI" 112 | WHERE "trip_distance" IS NULL 113 | OR "trip_distance" = 0; 114 | ``` 115 | 116 | ```sql 117 | -- fix_quality_err_query 118 | UPDATE "DB"."NYC_TAXI" 119 | SET "trip_distance" = "trip_distance" 120 | WHERE "trip_distance" IS NULL 121 | OR "trip_distance" = 0; 122 | ``` 123 | 124 | # MULTI_QUERIES 125 | 126 | ```yaml 127 | description: "Define multiple structured queries combined with UNION." 128 | connection: "duckdb:" 129 | before_sql: 130 | - "LOAD sqlite" 131 | - "ATTACH 'database/HTTP_EXTRACT.db' AS \"DB\" (TYPE SQLITE)" 132 | save_sql: save_mult_query_res 133 | save_on_err_patt: '(?i)table.+with.+name.+(\w+).+does.+not.+exist' 134 | save_on_err_sql: create_mult_query_res 135 | after_sql: "DETACH DB" 136 | union_key: "UNION ALL\n" # Defaults to UNION. 137 | active: false 138 | ``` 139 | 140 | ```sql 141 | -- save_mult_query_res 142 | INSERT INTO "DB"."MULTI_QUERY" BY NAME 143 | [[final_query]] 144 | ``` 145 | 146 | ```sql 147 | -- create_mult_query_res 148 | CREATE OR REPLACE TABLE "DB"."MULTI_QUERY" AS 149 | [[final_query]] 150 | ``` 151 | 152 | ## Row1 153 | 154 | ```yaml 155 | name: Row1 156 | description: "Row 1" 157 | query: row_query 158 | active: true 159 | ``` 160 | 161 | ```sql 162 | -- row_query 163 | SELECT '# number of rows' AS "variable", COUNT(*) AS "value" 164 | FROM "DB"."NYC_TAXI" 165 | ``` 166 | 167 | ## Row2 168 | 169 | ```yaml 170 | name: Row2 171 | description: "Row 2" 172 | query: row_query 173 | active: true 174 | ``` 175 | 176 | ```sql 177 | -- row_query 178 | SELECT 'total revenue' AS "variable", SUM("total_amount") AS "value" 179 | FROM "DB"."NYC_TAXI" 180 | ``` 181 | 182 | ## Row3 183 | 184 | ```yaml 185 | name: Row3 186 | description: "Row 3" 187 | query: row_query 188 | active: true 189 | ``` 190 | 191 | ```sql 192 | -- row_query 193 | SELECT * 194 | FROM ( 195 | SELECT "DOLocationID" AS "variable", SUM("total_amount") AS "value" 196 | FROM "DB"."NYC_TAXI" 197 | GROUP BY "DOLocationID" 198 | ORDER BY "DOLocationID" 199 | ) AS "T" 200 | ``` 201 | 202 | # EXPORTS 203 | 204 | Exports data to files. 205 | 206 | ```yaml metadata 207 | name: DailyReports 208 | description: "Daily reports" 209 | connection: "duckdb:" 210 | path: "static/uploads/tmp" 211 | active: true 212 | ``` 213 | 214 | ## CSV_EXPORT 215 | 216 | ```yaml metadata 217 | name: CSV_EXPORT 218 | description: "Export data to CSV" 219 | connection: "duckdb:" 220 | before_sql: 221 | - "INSTALL sqlite" 222 | - "LOAD sqlite" 223 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)" 224 | export_sql: export 225 | after_sql: "DETACH DB" 226 | path: 'nyc_taxy_YYYYMMDD.csv' 227 | tmp_prefix: 'tmp' 228 | active: false 229 | ``` 230 | 231 | ```sql 232 | -- export 233 | COPY ( 234 | SELECT * 235 | FROM "DB"."NYC_TAXI" 236 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}' 237 | LIMIT 100 238 | ) TO '' (FORMAT 'csv', HEADER TRUE); 239 | ``` 240 | 241 | ## XLSX_EXPORT 242 | 243 | ```yaml metadata 244 | name: XLSX_EXPORT 245 | description: "Export data to Excel file" 246 | connection: "duckdb:" 247 | before_sql: 248 | - "INSTALL sqlite" 249 | - "LOAD sqlite" 250 | - "INSTALL excel" 251 | - "LOAD excel" 252 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)" 253 | export_sql: xl_export 254 | after_sql: "DETACH DB" 255 | path: 'nyc_taxy_YYYYMMDD.xlsx' 256 | tmp_prefix: 'tmp' 257 | active: false 258 | ``` 259 | 260 | ```sql 261 | -- xl_export 262 | COPY ( 263 | SELECT * 264 | FROM "DB"."NYC_TAXI" 265 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}' 266 | LIMIT 100 267 | ) TO '' (FORMAT XLSX, HEADER TRUE, SHEET 'NYC'); 268 | ``` 269 | 270 | ## XLSX_TMPL 271 | 272 | ```yaml metadata 273 | name: XLSX_TMPL 274 | description: "Export data to Excel template" 275 | connection: "duckdb:" 276 | before_sql: 277 | - "INSTALL sqlite" 278 | - "LOAD sqlite" 279 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)" 280 | after_sql: "DETACH DB" 281 | tmp_prefix: 'tmp' 282 | template: "../nyc_taxy_YYYYMMDD.xlsx" 283 | path: "nyc_taxy_YYYYMMDD.xlsx" 284 | mapping: 285 | - sheet: resume 286 | range: A2 287 | sql: resume 288 | type: value 289 | key: total 290 | - sheet: detail 291 | range: A1 292 | sql: detail 293 | type: range 294 | header: true 295 | active: true 296 | ``` 297 | 298 | ```sql 299 | -- resume 300 | SELECT COUNT(*) AS "total" 301 | FROM "DB"."NYC_TAXI" 302 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}' 303 | ``` 304 | 305 | ```sql 306 | -- detail2 307 | SELECT * 308 | FROM "DB"."NYC_TAXI" 309 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}' 310 | LIMIT 100 311 | ``` 312 | 313 | ```sql 314 | -- detail 315 | pivot (select * from "DB"."NYC_TAXI") as t 316 | on strftime("tpep_pickup_datetime"::datetime, '%d') 317 | using sum(total_amount) AS total, count(*) AS total_trips 318 | group by PULocationID 319 | ``` 320 | 321 | 322 | ```sql 323 | -- data_to_export 324 | SELECT * 325 | FROM "DB"."NYC_TAXI" 326 | WHERE "tpep_pickup_datetime"::DATETIME <= '{YYYY-MM-DD}' 327 | LIMIT 100 328 | ``` 329 | 330 | # LOGS 331 | 332 | ```yaml metadata 333 | name: LOGS 334 | description: "Example saving logs" 335 | table: etlx_logs 336 | connection: "duckdb:" 337 | before_sql: 338 | - load_extentions 339 | - attach_db 340 | - 'USE DB;' 341 | save_log_sql: load_logs 342 | save_on_err_patt: '(?i)table.+does.+not.+exist|does.+not.+have.+column.+with.+name' 343 | save_on_err_sql: 344 | - create_logs 345 | - get_dyn_queries[create_columns_missing] 346 | - load_logs 347 | after_sql: 348 | - 'USE memory;' 349 | - detach_db 350 | tmp_dir: /tmp 351 | active: true 352 | ``` 353 | 354 | ```sql 355 | -- load_extentions 356 | INSTALL Sqlite; 357 | LOAD Sqlite; 358 | INSTALL json; 359 | LOAD json; 360 | ``` 361 | 362 | ```sql 363 | -- attach_db 364 | ATTACH 'database/HTTP_EXTRACT.db' AS "DB" (TYPE SQLITE) 365 | ``` 366 | 367 | ```sql 368 | -- detach_db 369 | DETACH "DB"; 370 | ``` 371 | 372 | ```sql 373 | -- load_logs 374 | INSERT INTO "DB"."
" BY NAME 375 | SELECT * 376 | FROM read_json(''); 377 | ``` 378 | 379 | ```sql 380 | -- create_logs 381 | CREATE TABLE IF NOT EXISTS "DB"."
" AS 382 | SELECT * 383 | FROM read_json(''); 384 | ``` 385 | 386 | ```sql 387 | -- create_columns_missing 388 | WITH source_columns AS ( 389 | SELECT column_name, column_type 390 | FROM (DESCRIBE SELECT * FROM read_json('')) 391 | ), 392 | destination_columns AS ( 393 | SELECT column_name, data_type as column_type 394 | FROM duckdb_columns 395 | WHERE table_name = '
' 396 | ), 397 | missing_columns AS ( 398 | SELECT s.column_name, s.column_type 399 | FROM source_columns s 400 | LEFT JOIN destination_columns d ON s.column_name = d.column_name 401 | WHERE d.column_name IS NULL 402 | ) 403 | SELECT 'ALTER TABLE "DB"."
" ADD COLUMN "' || column_name || '" ' || column_type || ';' AS query 404 | FROM missing_columns; 405 | ``` 406 | 407 | # NOTIFY 408 | 409 | ```yaml metadata 410 | name: Notefication 411 | description: "Notefication" 412 | connection: "duckdb:" 413 | path: "examples" 414 | active: false 415 | ``` 416 | 417 | ## ETL_STATUS 418 | 419 | ```yaml metadata 420 | name: ETL_STATUS 421 | description: "ETL Satus" 422 | connection: "duckdb:" 423 | before_sql: 424 | - "INSTALL sqlite" 425 | - "LOAD sqlite" 426 | - "ATTACH 'database/HTTP_EXTRACT.db' AS DB (TYPE SQLITE)" 427 | data_sql: 428 | - logs 429 | after_sql: "DETACH DB" 430 | to: 431 | - real.datadriven@gmail.com 432 | cc: null 433 | bcc: null 434 | subject: 'ETLX YYYYMMDD' 435 | body: body_tml 436 | attachments_: 437 | - hf.md 438 | - http.md 439 | active: true 440 | ``` 441 | 442 | ```html body_tml 443 | Good Morning

444 | This email is gebnerated by ETLX automatically!
445 | {{ with .logs }} 446 | {{ if eq .success true }} 447 |
448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | {{ range .data }} 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | {{ else }} 468 | 469 | 470 | 471 | {{ end }} 472 |
NameRefStartEndDurationSuccessMessage
{{ .name }}{{ .ref }}{{ .start_at }}{{ .end_at }}{{ .duration }}{{ .success }}{{ .msg }}
No items available
473 | {{ else }} 474 |

{{.msg}}

475 | {{ end }} 476 | {{ else }} 477 |

Logs information missing.

478 | {{ end }} 479 | ``` 480 | 481 | ```sql 482 | -- logs 483 | SELECT * 484 | FROM "DB"."etlx_logs" 485 | WHERE "ref" = '{YYYY-MM-DD}' 486 | ``` 487 | -------------------------------------------------------------------------------- /internal/etlx/run_notify.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "time" 8 | ) 9 | 10 | func (etlx *ETLX) RunNOTIFY(dateRef []time.Time, conf map[string]any, extraConf map[string]any, keys ...string) ([]map[string]any, error) { 11 | key := "NOTIFY" 12 | if len(keys) > 0 && keys[0] != "" { 13 | key = keys[0] 14 | } 15 | //fmt.Println(key, dateRef) 16 | var processLogs []map[string]any 17 | start := time.Now() 18 | processLogs = append(processLogs, map[string]any{ 19 | "name": key, 20 | "key": key, "start_at": start, 21 | }) 22 | mainDescription := "" 23 | // Define the runner as a simple function 24 | NOTIFYRunner := func(metadata map[string]any, itemKey string, item map[string]any) error { 25 | //fmt.Println(metadata, itemKey, item) 26 | // ACTIVE 27 | if active, okActive := metadata["active"]; okActive { 28 | if !active.(bool) { 29 | processLogs = append(processLogs, map[string]any{ 30 | "name": fmt.Sprintf("KEY %s", key), 31 | "description": metadata["description"].(string), 32 | "key": key, "item_key": itemKey, "start_at": time.Now(), 33 | "end_at": time.Now(), 34 | "success": true, 35 | "msg": "Deactivated", 36 | }) 37 | return fmt.Errorf("deactivated %s", "") 38 | } 39 | } 40 | // MAIN PATH 41 | mainPath, okMainPath := metadata["path"].(string) 42 | mainConn, _ := metadata["connection"].(string) 43 | mainDescription = metadata["description"].(string) 44 | itemMetadata, ok := item["metadata"].(map[string]any) 45 | if !ok { 46 | processLogs = append(processLogs, map[string]any{ 47 | "name": fmt.Sprintf("%s->%s", key, itemKey), 48 | "description": itemMetadata["description"].(string), 49 | "key": key, "item_key": itemKey, "start_at": time.Now(), 50 | "end_at": time.Now(), 51 | "success": true, 52 | "msg": "Missing metadata in item", 53 | }) 54 | return nil 55 | } 56 | // ACTIVE 57 | if active, okActive := itemMetadata["active"]; okActive { 58 | if !active.(bool) { 59 | processLogs = append(processLogs, map[string]any{ 60 | "name": fmt.Sprintf("%s->%s", key, itemKey), 61 | "description": itemMetadata["description"].(string), 62 | "key": key, "item_key": itemKey, "start_at": time.Now(), 63 | "end_at": time.Now(), 64 | "success": true, 65 | "msg": "Deactivated", 66 | }) 67 | return nil 68 | } 69 | } 70 | beforeSQL, okBefore := itemMetadata["before_sql"] 71 | dataSQL, okData := itemMetadata["data_sql"] 72 | afterSQL, okAfter := itemMetadata["after_sql"] 73 | conn, okCon := itemMetadata["connection"] 74 | if !okCon { 75 | conn = mainConn 76 | } 77 | dtRef, okDtRef := itemMetadata["date_ref"] 78 | if okDtRef && dtRef != "" { 79 | _dt, err := time.Parse("2006-01-02", dtRef.(string)) 80 | if err == nil { 81 | dateRef = append([]time.Time{}, _dt) 82 | } 83 | } else { 84 | if len(dateRef) > 0 { 85 | dtRef = dateRef[0].Format("2006-01-02") 86 | } 87 | } 88 | start3 := time.Now() 89 | _log2 := map[string]any{ 90 | "name": fmt.Sprintf("%s->%s", key, itemKey), 91 | "description": itemMetadata["description"].(string), 92 | "key": key, "item_key": itemKey, "start_at": start3, 93 | "ref": dtRef, 94 | } 95 | dbConn, err := etlx.GetDB(conn.(string)) 96 | if err != nil { 97 | _log2["success"] = false 98 | _log2["msg"] = fmt.Sprintf("%s -> %s ERR: connecting to %s in : %s", key, itemKey, conn, err) 99 | _log2["end_at"] = time.Now() 100 | _log2["duration"] = time.Since(start3).Seconds() 101 | processLogs = append(processLogs, _log2) 102 | return nil 103 | } 104 | defer dbConn.Close() 105 | _log2["success"] = true 106 | _log2["msg"] = fmt.Sprintf("%s -> %s CONN: connection to %s successfull", key, itemKey, conn) 107 | _log2["end_at"] = time.Now() 108 | _log2["duration"] = time.Since(start3).Seconds() 109 | processLogs = append(processLogs, _log2) 110 | // FILE 111 | table := itemMetadata["name"].(string) 112 | path, okPath := itemMetadata["path"].(string) 113 | if !okPath { 114 | if okMainPath { 115 | var pth any = mainPath 116 | itemMetadata["path"] = pth 117 | } 118 | } 119 | fname := fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, os.TempDir(), table) 120 | if okPath && path != "" { 121 | fname = path 122 | if filepath.IsAbs(fname) { 123 | } else if filepath.IsLocal(fname) { 124 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname) 125 | } else if filepath.Dir(fname) != "" && okMainPath && mainPath != "" { 126 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname) 127 | } 128 | } else if okMainPath && mainPath != "" { 129 | fname = fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, mainPath, table) 130 | } 131 | // QUERIES TO RUN AT beginning 132 | if okBefore { 133 | start3 := time.Now() 134 | _log2 := map[string]any{ 135 | "name": fmt.Sprintf("%s->%s", key, itemKey), 136 | "description": itemMetadata["description"].(string), 137 | "key": key, "item_key": itemKey, "start_at": start3, 138 | } 139 | err = etlx.ExecuteQuery(dbConn, beforeSQL, item, fname, "", dateRef) 140 | if err != nil { 141 | _log2["success"] = false 142 | _log2["msg"] = fmt.Sprintf("%s -> %s Before error: %s", key, itemKey, err) 143 | _log2["end_at"] = time.Now() 144 | _log2["duration"] = time.Since(start3).Seconds() 145 | } else { 146 | _log2["success"] = true 147 | _log2["msg"] = fmt.Sprintf("%s -> %s Before ", key, itemKey) 148 | _log2["end_at"] = time.Now() 149 | _log2["duration"] = time.Since(start3).Seconds() 150 | } 151 | processLogs = append(processLogs, _log2) 152 | } 153 | // CHECK CONDITION 154 | condition, okCondition := itemMetadata["condition"].(string) 155 | condMsg, okCondMsg := itemMetadata["condition_msg"].(string) 156 | failedCondition := false 157 | if okCondition && condition != "" { 158 | cond, err := etlx.ExecuteCondition(dbConn, condition, itemMetadata, fname, "", dateRef) 159 | if err != nil { 160 | _log2["success"] = false 161 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, err) 162 | _log2["end_at"] = time.Now() 163 | _log2["duration"] = time.Since(start3).Seconds() 164 | processLogs = append(processLogs, _log2) 165 | //return fmt.Errorf("%s", _log2["msg"]) 166 | failedCondition = true 167 | } else if !cond { 168 | _log2["success"] = false 169 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed the condition %s was not met!", key, itemKey, condition) 170 | _log2["end_at"] = time.Now() 171 | _log2["duration"] = time.Since(start3).Seconds() 172 | if okCondMsg && condMsg != "" { 173 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, etlx.SetQueryPlaceholders(condMsg, table, fname, dateRef)) 174 | } 175 | processLogs = append(processLogs, _log2) 176 | // return fmt.Errorf("%s", _log2["msg"]) 177 | failedCondition = true 178 | } 179 | } 180 | data := map[string]any{} 181 | // MAIN QUERIES 182 | if okData && !failedCondition { 183 | start3 := time.Now() 184 | _log2 := map[string]any{ 185 | "name": fmt.Sprintf("%s->%s", key, itemKey), 186 | "description": itemMetadata["description"].(string), 187 | "key": key, "item_key": itemKey, "start_at": start3, 188 | } 189 | switch _map := dataSQL.(type) { 190 | case string: 191 | sql := _map 192 | if _, ok := item[_map]; ok { 193 | sql = item[sql].(string) 194 | } 195 | sql = etlx.SetQueryPlaceholders(sql, table, fname, dateRef) 196 | rows, _, err := etlx.Query(dbConn, sql, item, fname, "", dateRef) 197 | if err != nil { 198 | data[_map] = map[string]any{ 199 | "success": false, 200 | "msg": fmt.Sprintf("failed to execute map query %s %s", _map, err), 201 | "data": []map[string]any{}, 202 | } 203 | } else { 204 | data[_map] = map[string]any{ 205 | "success": true, 206 | "data": *rows, 207 | } 208 | } 209 | case []any: 210 | for _, _sql := range dataSQL.([]any) { 211 | sql := _sql.(string) 212 | if _, ok := item[_sql.(string)]; ok { 213 | sql = item[_sql.(string)].(string) 214 | } 215 | sql = etlx.SetQueryPlaceholders(sql, table, fname, dateRef) 216 | rows, _, err := etlx.Query(dbConn, sql, item, fname, "", dateRef) 217 | if err != nil { 218 | data[_sql.(string)] = map[string]any{ 219 | "success": false, 220 | "msg": fmt.Sprintf("failed to execute map query %s %s", _map, err), 221 | "data": []map[string]any{}, 222 | } 223 | } else { 224 | data[_sql.(string)] = map[string]any{ 225 | "success": true, 226 | "data": *rows, 227 | } 228 | } 229 | } 230 | default: 231 | _log2["success"] = false 232 | _log2["msg"] = fmt.Sprintf("%s -> %s invalid queries data type: %T", key, itemKey, _map) 233 | _log2["end_at"] = time.Now() 234 | _log2["duration"] = time.Since(start3).Seconds() 235 | } 236 | if _, ok := itemMetadata["data"].(map[string]any); ok { 237 | for key, d := range data { 238 | itemMetadata["data"].(map[string]any)[key] = d 239 | } 240 | } else { 241 | itemMetadata["data"] = data 242 | } 243 | itemMetadata["subject"] = etlx.SetQueryPlaceholders(itemMetadata["subject"].(string), table, fname, dateRef) 244 | body, ok := item[itemMetadata["body"].(string)].(string) 245 | if ok { 246 | itemMetadata["body"] = body 247 | } 248 | //itemMetadata["body"] = etlx.SetQueryPlaceholders(itemMetadata["body"].(string), table, fname, dateRef) 249 | attachments, okAtt := itemMetadata["attachments"].([]any) 250 | atts := []any{} 251 | var aux_att any 252 | if okAtt { 253 | for _, att := range attachments { 254 | aux_att = etlx.SetQueryPlaceholders(att.(string), table, fname, dateRef) 255 | // fmt.Println("ATT:", aux_att) 256 | atts = append(atts, aux_att) 257 | } 258 | itemMetadata["attachments"] = atts 259 | } 260 | err := etlx.SendEmail(itemMetadata) 261 | if err != nil { 262 | _log2["success"] = false 263 | _log2["msg"] = fmt.Sprintf("%s -> %s err sending email: %s", key, itemKey, err) 264 | _log2["end_at"] = time.Now() 265 | _log2["duration"] = time.Since(start3).Seconds() 266 | } else { 267 | _log2["success"] = true 268 | _log2["msg"] = fmt.Sprintf("%s -> %s Notefication sent!", key, itemKey) 269 | _log2["end_at"] = time.Now() 270 | _log2["duration"] = time.Since(start3).Seconds() 271 | } 272 | //fmt.Println(key, _log2["msg"]) 273 | processLogs = append(processLogs, _log2) 274 | } 275 | // QUERIES TO RUN AT THE END 276 | if okAfter { 277 | start3 := time.Now() 278 | _log2 := map[string]any{ 279 | "name": fmt.Sprintf("%s->%s", key, itemKey), 280 | "description": itemMetadata["description"].(string), 281 | "key": key, "item_key": itemKey, "start_at": start3, 282 | } 283 | err = etlx.ExecuteQuery(dbConn, afterSQL, item, fname, "", dateRef) 284 | if err != nil { 285 | _log2["success"] = false 286 | _log2["msg"] = fmt.Sprintf("%s -> %s After error: %s", key, itemKey, err) 287 | _log2["end_at"] = time.Now() 288 | _log2["duration"] = time.Since(start3).Seconds() 289 | } else { 290 | _log2["success"] = true 291 | _log2["msg"] = fmt.Sprintf("%s -> %s After ", key, itemKey) 292 | _log2["end_at"] = time.Now() 293 | _log2["duration"] = time.Since(start3).Seconds() 294 | } 295 | processLogs = append(processLogs, _log2) 296 | } 297 | // fmt.Println(processLogs) 298 | return nil 299 | } 300 | // Check if the input conf is nil or empty 301 | if conf == nil { 302 | conf = etlx.Config 303 | } 304 | // Process the MD KEY 305 | err := etlx.ProcessMDKey(key, conf, NOTIFYRunner) 306 | if err != nil { 307 | return processLogs, fmt.Errorf("%s failed: %v", key, err) 308 | } 309 | processLogs[0] = map[string]any{ 310 | "name": key, 311 | "description": mainDescription, 312 | "key": key, "start_at": processLogs[0]["start_at"], 313 | "end_at": time.Now(), 314 | "duration": time.Since(start).Seconds(), 315 | } 316 | return processLogs, nil 317 | } 318 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strings" 9 | "time" 10 | 11 | "github.com/realdatadriven/etlx" 12 | ) 13 | 14 | func main() { 15 | etlx.LoadDotEnv() 16 | // Config file path 17 | filePath := flag.String("config", "config.md", "Config File") 18 | // date of reference 19 | date_ref := flag.String("date", time.Now().AddDate(0, 0, -1).Format("2006-01-02"), "Date Reference format YYYY-MM-DD") 20 | // to skip 21 | skip := flag.String("skip", "", "The keys to skip") 22 | // to skip 23 | only := flag.String("only", "", "The only keys to run") 24 | // to steps 25 | steps := flag.String("steps", "", "The steps to run") 26 | // extrat from a file 27 | file := flag.String("file", "", "The file to extract data from, the flag shoud be used in combination with the only appointing to the ETL key the data is meant to") 28 | // To clean / delete data (execute clean_sql on every item) 29 | clean := flag.Bool("clean", false, "To clean data (execute clean_sql on every item, conditioned by only and skip)") 30 | // To drop the table (execute drop_sql on every item condition by only and skip) 31 | drop := flag.Bool("drop", false, "To drop the table (execute drop_sql on every item, conditioned by only and skip)") 32 | // To get number of rows in the table (execute rows_sql on every item, conditioned by only and skip) 33 | rows := flag.Bool("rows", false, "To get number of rows in the table (execute rows_sql on every item, conditioned by only and skip)") 34 | flag.Parse() 35 | config := make(map[string]any) 36 | // Parse the file content 37 | etlxlib := &etlx.ETLX{Config: config} 38 | err := etlxlib.ConfigFromFile(*filePath) 39 | if err != nil { 40 | log.Fatalf("Error parsing Markdown: %v", err) 41 | } 42 | if _, ok := etlxlib.Config["REQUIRES"]; ok { 43 | _logs, err := etlxlib.LoadREQUIRES(nil) 44 | if err != nil { 45 | fmt.Printf("REQUIRES ERR: %v\n", err) 46 | } 47 | for _, _log := range _logs { 48 | fmt.Println(_log["start_at"], _log["end_at"], _log["duration"], _log["name"], _log["success"], _log["msg"], _log["rows"]) 49 | } 50 | } 51 | // Print the parsed configuration 52 | if os.Getenv("ETLX_DEBUG_QUERY") == "true" { 53 | etlxlib.PrintConfigAsJSON(etlxlib.Config) 54 | } 55 | /*/ Walk through the data and process each key-value pair 56 | etlxlib.Walk(etlxlib.Config, "", func(keyPath string, value any) { 57 | fmt.Printf("Key: %s, Value: %v\n", keyPath, value) 58 | if reflect.TypeOf(value).Kind() != reflect.Map { 59 | fmt.Printf("Key: %s, Value: %v\n", keyPath, value) 60 | } else { 61 | fmt.Printf("Entering: %s\n", keyPath) 62 | } 63 | })*/ 64 | var dateRef []time.Time 65 | _dt, _ := time.Parse("2006-01-02", *date_ref) 66 | dateRef = append(dateRef, _dt) 67 | // fmt.Println("date_ref:", *date_ref, dateRef) 68 | extraConf := map[string]any{ 69 | "clean": *clean, 70 | "drop": *drop, 71 | "rows": *rows, 72 | "file": *file, 73 | } 74 | if *only != "" { 75 | extraConf["only"] = strings.Split(*only, ",") 76 | } 77 | if *skip != "" { 78 | extraConf["skip"] = strings.Split(*skip, ",") 79 | } 80 | if *steps != "" { 81 | extraConf["steps"] = strings.Split(*steps, ",") 82 | } 83 | logs := []map[string]any{} 84 | // RUN ETL 85 | if _, ok := etlxlib.Config["ETL"]; ok { 86 | _logs, err := etlxlib.RunETL(dateRef, nil, extraConf) 87 | if err != nil { 88 | fmt.Printf("ETL ERR: %v\n", err) 89 | } else { 90 | // LOGS 91 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 92 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 93 | if err != nil { 94 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 95 | } 96 | } 97 | logs = append(logs, _logs...) 98 | } 99 | } 100 | // DATA_QUALITY 101 | if _, ok := etlxlib.Config["DATA_QUALITY"]; ok { 102 | _logs, err := etlxlib.RunDATA_QUALITY(dateRef, nil, extraConf) 103 | if err != nil { 104 | fmt.Printf("DATA_QUALITY ERR: %v\n", err) 105 | } else { 106 | // LOGS 107 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 108 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 109 | if err != nil { 110 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 111 | } 112 | } 113 | logs = append(logs, _logs...) 114 | } 115 | } 116 | // EXPORTS 117 | if _, ok := etlxlib.Config["EXPORTS"]; ok { 118 | _logs, err := etlxlib.RunEXPORTS(dateRef, nil, extraConf) 119 | if err != nil { 120 | fmt.Printf("EXPORTS ERR: %v\n", err) 121 | } else { 122 | // LOGS 123 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 124 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 125 | if err != nil { 126 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 127 | } 128 | } 129 | logs = append(logs, _logs...) 130 | } 131 | } 132 | // SCRIPTS 133 | if _, ok := etlxlib.Config["SCRIPTS"]; ok { 134 | _logs, err := etlxlib.RunSCRIPTS(dateRef, nil, extraConf) 135 | if err != nil { 136 | fmt.Printf("SCRIPTS ERR: %v\n", err) 137 | } else { 138 | // LOGS 139 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 140 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 141 | if err != nil { 142 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 143 | } 144 | } 145 | logs = append(logs, _logs...) 146 | } 147 | } 148 | // MULTI_QUERIES 149 | if _, ok := etlxlib.Config["MULTI_QUERIES"]; ok { 150 | _logs, _, err := etlxlib.RunMULTI_QUERIES(dateRef, nil, extraConf) 151 | if err != nil { 152 | fmt.Printf("MULTI_QUERIES ERR: %v\n", err) 153 | } else { 154 | // LOGS 155 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 156 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 157 | if err != nil { 158 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 159 | } 160 | } 161 | logs = append(logs, _logs...) 162 | } 163 | } 164 | // ACTIONS 165 | if _, ok := etlxlib.Config["ACTIONS"]; ok { 166 | _logs, err := etlxlib.RunACTIONS(dateRef, nil, extraConf) 167 | if err != nil { 168 | fmt.Printf("ACTIONS ERR: %v\n", err) 169 | } else { 170 | // LOGS 171 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 172 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 173 | if err != nil { 174 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 175 | } 176 | } 177 | logs = append(logs, _logs...) 178 | } 179 | } 180 | // LOGS 181 | if _, ok := etlxlib.Config["LOGS"]; ok { 182 | _logs, err := etlxlib.RunLOGS(dateRef, nil, logs) 183 | if err != nil { 184 | fmt.Printf("LOGS ERR: %v\n", err) 185 | } else { 186 | // LOGS 187 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 188 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 189 | if err != nil { 190 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 191 | } 192 | } 193 | } 194 | } 195 | // NOTIFY 196 | if _, ok := etlxlib.Config["NOTIFY"]; ok { 197 | _logs, err := etlxlib.RunNOTIFY(dateRef, nil, extraConf) 198 | if err != nil { 199 | fmt.Printf("LOGS ERR: %v\n", err) 200 | } else { 201 | // LOGS 202 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 203 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 204 | if err != nil { 205 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 206 | } 207 | } 208 | } 209 | } 210 | 211 | _keys := []string{"NOTIFY", "LOGS", "SCRIPTS", "MULTI_QUERIES", "EXPORTS", "DATA_QUALITY", "ETL", "ACTIONS", "AUTO_LOGS", "REQUIRES"} 212 | __order, ok := etlxlib.Config["__order"].([]string) 213 | hasOrderedKeys := false 214 | if !ok { 215 | __order2, ok := etlxlib.Config["__order"].([]any) 216 | if ok { 217 | hasOrderedKeys = true 218 | __order = []string{} 219 | for _, key := range __order2 { 220 | __order = append(__order, key.(string)) 221 | } 222 | } 223 | } else { 224 | hasOrderedKeys = true 225 | } 226 | // fmt.Println("LEVEL 1 H:", __order, len(__order)) 227 | if !hasOrderedKeys { 228 | } else if len(__order) > 0 { 229 | //fmt.Print("LEVEL 1 H:", __order) 230 | for _, key := range __order { 231 | if !etlxlib.Contains(_keys, any(key)) { 232 | _key_conf, ok := etlxlib.Config[key].(map[string]any) 233 | if !ok { 234 | continue 235 | } 236 | _key_conf_metadata, ok := _key_conf["metadata"].(map[string]any) 237 | if !ok { 238 | continue 239 | } 240 | if runs_as, ok := _key_conf_metadata["runs_as"]; ok { 241 | fmt.Printf("%s RUN AS %s:\n", key, runs_as) 242 | if etlxlib.Contains(_keys, runs_as) { 243 | switch runs_as { 244 | case "ETL": 245 | _logs, err := etlxlib.RunETL(dateRef, nil, extraConf, key) 246 | if err != nil { 247 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 248 | } else { 249 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 250 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 251 | if err != nil { 252 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 253 | } 254 | } 255 | logs = append(logs, _logs...) 256 | } 257 | case "DATA_QUALITY": 258 | _logs, err := etlxlib.RunDATA_QUALITY(dateRef, nil, extraConf, key) 259 | if err != nil { 260 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 261 | } else { 262 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 263 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 264 | if err != nil { 265 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 266 | } 267 | } 268 | logs = append(logs, _logs...) 269 | } 270 | case "MULTI_QUERIES": 271 | _logs, _, err := etlxlib.RunMULTI_QUERIES(dateRef, nil, extraConf, key) 272 | if err != nil { 273 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 274 | } else { 275 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 276 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 277 | if err != nil { 278 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 279 | } 280 | } 281 | logs = append(logs, _logs...) 282 | } 283 | case "EXPORTS": 284 | _logs, err := etlxlib.RunEXPORTS(dateRef, nil, extraConf, key) 285 | if err != nil { 286 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 287 | } else { 288 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 289 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 290 | if err != nil { 291 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 292 | } 293 | } 294 | logs = append(logs, _logs...) 295 | } 296 | case "NOTIFY": 297 | _logs, err := etlxlib.RunNOTIFY(dateRef, nil, extraConf, key) 298 | if err != nil { 299 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 300 | } else { 301 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 302 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 303 | if err != nil { 304 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 305 | } 306 | } 307 | logs = append(logs, _logs...) 308 | } 309 | case "ACTIONS": 310 | _logs, err := etlxlib.RunACTIONS(dateRef, nil, extraConf, key) 311 | if err != nil { 312 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 313 | } else { 314 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 315 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 316 | if err != nil { 317 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 318 | } 319 | } 320 | logs = append(logs, _logs...) 321 | } 322 | case "SCRIPTS": 323 | _logs, err := etlxlib.RunSCRIPTS(dateRef, nil, extraConf, key) 324 | if err != nil { 325 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 326 | } else { 327 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 328 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 329 | if err != nil { 330 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 331 | } 332 | } 333 | logs = append(logs, _logs...) 334 | } 335 | case "LOGS": 336 | _logs, err := etlxlib.RunLOGS(dateRef, nil, logs, key) 337 | if err != nil { 338 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 339 | } else { 340 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 341 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 342 | if err != nil { 343 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 344 | } 345 | } 346 | logs = append(logs, _logs...) 347 | } 348 | case "REQUIRES": 349 | _logs, err := etlxlib.LoadREQUIRES(nil, key) 350 | if err != nil { 351 | fmt.Printf("%s AS %s ERR: %v\n", key, runs_as, err) 352 | } else { 353 | if _, ok := etlxlib.Config["AUTO_LOGS"]; ok && len(_logs) > 0 { 354 | _, err := etlxlib.RunLOGS(dateRef, nil, _logs, "AUTO_LOGS") 355 | if err != nil { 356 | fmt.Printf("INCREMENTAL AUTOLOGS ERR: %v\n", err) 357 | } 358 | } 359 | logs = append(logs, _logs...) 360 | } 361 | default: 362 | // 363 | } 364 | } 365 | } 366 | } 367 | } 368 | } 369 | } 370 | -------------------------------------------------------------------------------- /internal/etlx/run_multiples_queries.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | func (etlx *ETLX) RunMULTI_QUERIES(dateRef []time.Time, conf map[string]any, extraConf map[string]any, keys ...string) ([]map[string]any, []map[string]any, error) { 11 | key := "MULTI_QUERIES" 12 | if len(keys) > 0 && keys[0] != "" { 13 | key = keys[0] 14 | } 15 | //fmt.Println(key, dateRef) 16 | var processData []map[string]any 17 | var processLogs []map[string]any 18 | start := time.Now() 19 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats() 20 | processLogs = append(processLogs, map[string]any{ 21 | "name": key, 22 | "key": key, "start_at": start, 23 | "ref": nil, 24 | "mem_alloc_start": mem_alloc, 25 | "mem_total_alloc_start": mem_total_alloc, 26 | "mem_sys_start": mem_sys, 27 | "num_gc_start": num_gc, 28 | }) 29 | // Check if the input conf is nil or empty 30 | if conf == nil { 31 | conf = etlx.Config 32 | } 33 | data, ok := conf[key].(map[string]any) 34 | if !ok { 35 | return nil, nil, fmt.Errorf("missing or invalid %s section", key) 36 | } 37 | // Extract metadata 38 | metadata, ok := data["metadata"].(map[string]any) 39 | if !ok { 40 | return nil, nil, fmt.Errorf("missing metadata in %s section", key) 41 | } 42 | // ACTIVE 43 | if active, okActive := metadata["active"]; okActive { 44 | if !active.(bool) { 45 | processLogs = append(processLogs, map[string]any{ 46 | "name": fmt.Sprintf("KEY %s", key), 47 | "description": metadata["description"].(string), 48 | "key": key, 49 | "start_at": time.Now(), 50 | "end_at": time.Now(), 51 | "success": true, 52 | "msg": "Deactivated", 53 | }) 54 | return nil, nil, fmt.Errorf("%s deactivated", key) 55 | } 56 | } 57 | beforeSQL, okBefore := metadata["before_sql"] 58 | afterSQL, okAfter := metadata["after_sql"] 59 | saveSQL, okSave := metadata["save_sql"] 60 | errPatt, okErrPatt := metadata["save_on_err_patt"] 61 | errSQL, okErrSQL := metadata["save_on_err_sql"] 62 | dtRef, okDtRef := metadata["date_ref"] 63 | if okDtRef && dtRef != "" { 64 | _dt, err := time.Parse("2006-01-02", dtRef.(string)) 65 | if err == nil { 66 | dateRef = append([]time.Time{}, _dt) 67 | } 68 | } else { 69 | if len(dateRef) > 0 { 70 | dtRef = dateRef[0].Format("2006-01-02") 71 | } 72 | } 73 | if processLogs[0]["ref"] == nil { 74 | processLogs[0]["ref"] = dtRef 75 | } 76 | queries := []string{} 77 | order := []string{} 78 | __order, okOrder := data["__order"].([]any) 79 | if !okOrder { 80 | for key, _ := range data { 81 | order = append(order, key) 82 | } 83 | } else { 84 | for _, itemKey := range __order { 85 | order = append(order, itemKey.(string)) 86 | } 87 | } 88 | for _, itemKey := range order { 89 | if itemKey == "metadata" || itemKey == "__order" || itemKey == "order" { 90 | continue 91 | } 92 | item := data[itemKey] 93 | if _, isMap := item.(map[string]any); !isMap { 94 | //fmt.Println(itemKey, "NOT A MAP:", item) 95 | continue 96 | } 97 | /*if only, okOnly := extraConf["only"]; okOnly { 98 | if len(only.([]string)) == 0 { 99 | } else if !etlx.Contains(only.([]string), itemKey) { 100 | continue 101 | } 102 | }*/ 103 | if skip, okSkip := extraConf["skip"]; okSkip { 104 | if len(skip.([]string)) == 0 { 105 | } else if etlx.Contains(skip.([]string), itemKey) { 106 | continue 107 | } 108 | } 109 | itemMetadata, ok := item.(map[string]any)["metadata"] 110 | if !ok { 111 | continue 112 | } 113 | // ACTIVE 114 | if active, okActive := itemMetadata.(map[string]any)["active"]; okActive { 115 | if !active.(bool) { 116 | continue 117 | } 118 | } 119 | query, okQuery := itemMetadata.(map[string]any)["query"] 120 | if query != nil && okQuery { 121 | sql := query.(string) 122 | query, ok := item.(map[string]any)[sql].(string) 123 | _, queryDoc := etlx.Config[sql] 124 | if !ok && queryDoc { 125 | query = sql 126 | _sql, _, _, err := etlx.QueryBuilder(nil, sql) 127 | if err != nil { 128 | fmt.Printf("QUERY DOC ERR ON KEY %s: %v\n", queries, err) 129 | _q, _e := etlx.Config[sql].(string) 130 | //fmt.Println(sql, "IS A LOADED SQL STR QUERY?", _q, _e) 131 | if _e { 132 | query = _q 133 | } 134 | } else { 135 | query = _sql 136 | } 137 | } 138 | sql = etlx.SetQueryPlaceholders(query, "", "", dateRef) 139 | queries = append(queries, sql) 140 | } 141 | } 142 | conn, okCon := metadata["connection"] 143 | if !okCon { 144 | return nil, nil, fmt.Errorf("%s err no connection defined", key) 145 | } 146 | start3 := time.Now() 147 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 148 | _log2 := map[string]any{ 149 | "name": key, 150 | "description": metadata["description"].(string), 151 | "key": key, "start_at": start3, 152 | "ref": dtRef, 153 | "mem_alloc_start": mem_alloc, 154 | "mem_total_alloc_start": mem_total_alloc, 155 | "mem_sys_start": mem_sys, 156 | "num_gc_start": num_gc, 157 | } 158 | dbConn, err := etlx.GetDB(conn.(string)) 159 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 160 | _log2["mem_alloc_end"] = mem_alloc 161 | _log2["mem_total_alloc_end"] = mem_total_alloc 162 | _log2["mem_sys_end"] = mem_sys 163 | _log2["num_gc_end"] = num_gc 164 | if err != nil { 165 | _log2["success"] = false 166 | _log2["msg"] = fmt.Sprintf("%s ERR: connecting to %s in : %s", key, conn, err) 167 | _log2["end_at"] = time.Now() 168 | _log2["duration"] = time.Since(start3).Seconds() 169 | processLogs = append(processLogs, _log2) 170 | return nil, nil, fmt.Errorf("%s ERR: connecting to %s in : %s", key, conn, err) 171 | } 172 | defer dbConn.Close() 173 | _log2["success"] = true 174 | _log2["msg"] = fmt.Sprintf("%s CONN: connection to %s successfull", key, conn) 175 | _log2["end_at"] = time.Now() 176 | _log2["duration"] = time.Since(start3).Seconds() 177 | processLogs = append(processLogs, _log2) 178 | // QUERIES TO RUN AT beginning 179 | if okBefore { 180 | start3 := time.Now() 181 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 182 | _log2 = map[string]any{ 183 | "name": key, 184 | "description": metadata["description"].(string), 185 | "key": key, "start_at": start3, 186 | "ref": dtRef, 187 | "mem_alloc_start": mem_alloc, 188 | "mem_total_alloc_start": mem_total_alloc, 189 | "mem_sys_start": mem_sys, 190 | "num_gc_start": num_gc, 191 | } 192 | err = etlx.ExecuteQuery(dbConn, beforeSQL, data, "", "", dateRef) 193 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 194 | if err != nil { 195 | _log2["success"] = false 196 | _log2["msg"] = fmt.Sprintf("%s Before error: %s", key, err) 197 | _log2["end_at"] = time.Now() 198 | _log2["duration"] = time.Since(start3).Seconds() 199 | } else { 200 | _log2["success"] = true 201 | _log2["msg"] = fmt.Sprintf("%s Before ", key) 202 | _log2["end_at"] = time.Now() 203 | _log2["duration"] = time.Since(start3).Seconds() 204 | } 205 | _log2["mem_alloc_end"] = mem_alloc 206 | _log2["mem_total_alloc_end"] = mem_total_alloc 207 | _log2["mem_sys_end"] = mem_sys 208 | _log2["num_gc_end"] = num_gc 209 | processLogs = append(processLogs, _log2) 210 | } 211 | // MAIN QUERY 212 | unionKey, ok := metadata["union_key"].(string) 213 | if !ok { 214 | unionKey = "UNION\n" 215 | } 216 | sql := strings.Join(queries, unionKey) 217 | // fmt.Println(key, sql) 218 | start3 = time.Now() 219 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 220 | _log2 = map[string]any{ 221 | "name": key, 222 | "description": metadata["description"].(string), 223 | "key": key, "start_at": start3, 224 | "ref": dtRef, 225 | "mem_alloc_start": mem_alloc, 226 | "mem_total_alloc_start": mem_total_alloc, 227 | "mem_sys_start": mem_sys, 228 | "num_gc_start": num_gc, 229 | } 230 | // CHECK CONDITION 231 | condition, okCondition := metadata["condition"].(string) 232 | condMsg, okCondMsg := metadata["condition_msg"].(string) 233 | failedCondition := false 234 | if okCondition && condition != "" { 235 | cond, err := etlx.ExecuteCondition(dbConn, condition, metadata, "", "", dateRef) 236 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 237 | _log2["mem_alloc_end"] = mem_alloc 238 | _log2["mem_total_alloc_end"] = mem_total_alloc 239 | _log2["mem_sys_end"] = mem_sys 240 | _log2["num_gc_end"] = num_gc 241 | if err != nil { 242 | _log2["success"] = false 243 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, "", err) 244 | _log2["end_at"] = time.Now() 245 | _log2["duration"] = time.Since(start3).Seconds() 246 | processLogs = append(processLogs, _log2) 247 | //return fmt.Errorf("%s", _log2["msg"]) 248 | failedCondition = true 249 | } else if !cond { 250 | _log2["success"] = false 251 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed the condition %s was not met!", key, "", condition) 252 | _log2["end_at"] = time.Now() 253 | _log2["duration"] = time.Since(start3).Seconds() 254 | if okCondMsg && condMsg != "" { 255 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, "", etlx.SetQueryPlaceholders(condMsg, "", "", dateRef)) 256 | } 257 | processLogs = append(processLogs, _log2) 258 | // return fmt.Errorf("%s", _log2["msg"]) 259 | failedCondition = true 260 | } 261 | } 262 | if saveSQL != "" && okSave && !failedCondition { 263 | data["final_query"] = sql // PUT THE QUERY GENERATED IN THE SCOPE 264 | // fmt.Println(data[saveSQL.(string)]) 265 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 266 | _log2["mem_alloc_start"] = mem_alloc 267 | _log2["mem_total_alloc_start"] = mem_total_alloc 268 | _log2["mem_sys_start"] = mem_sys 269 | _log2["num_gc_start"] = num_gc 270 | err = etlx.ExecuteQuery(dbConn, saveSQL, data, "", "", dateRef) 271 | if err != nil { 272 | _err_by_pass := false 273 | if okErrPatt && errPatt != nil && okErrSQL && errSQL != nil { 274 | //fmt.Println(onErrPatt.(string), onErrSQL.(string)) 275 | re, regex_err := regexp.Compile(errPatt.(string)) 276 | if regex_err != nil { 277 | _log2["success"] = false 278 | _log2["msg"] = fmt.Sprintf("%s ERR: fallback regex matching the error failed to compile: %s", key, regex_err) 279 | _log2["end_at"] = time.Now() 280 | _log2["duration"] = time.Since(start3).Seconds() 281 | } else if re.MatchString(string(err.Error())) { 282 | err = etlx.ExecuteQuery(dbConn, errSQL, data, "", "", dateRef) 283 | if err != nil { 284 | _log2["success"] = false 285 | _log2["msg"] = fmt.Sprintf("%s ERR: main: %s", key, err) 286 | _log2["end_at"] = time.Now() 287 | _log2["duration"] = time.Since(start3).Seconds() 288 | } else { 289 | _err_by_pass = true 290 | } 291 | } 292 | } 293 | if !_err_by_pass { 294 | //return nil, fmt.Errorf("%s ERR: main: %s", key, err) 295 | _log2["success"] = false 296 | _log2["msg"] = fmt.Sprintf("%s ERR: main: %s", key, err) 297 | _log2["end_at"] = time.Now() 298 | _log2["duration"] = time.Since(start3).Seconds() 299 | } else { 300 | _log2["success"] = true 301 | _log2["msg"] = fmt.Sprintf("%s main ", key) 302 | _log2["end_at"] = time.Now() 303 | _log2["duration"] = time.Since(start3).Seconds() 304 | } 305 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 306 | _log2["mem_alloc_end"] = mem_alloc 307 | _log2["mem_total_alloc_end"] = mem_total_alloc 308 | _log2["mem_sys_end"] = mem_sys 309 | _log2["num_gc_end"] = num_gc 310 | } else { 311 | _log2["success"] = true 312 | _log2["msg"] = fmt.Sprintf("%s main ", key) 313 | _log2["end_at"] = time.Now() 314 | _log2["duration"] = time.Since(start3).Seconds() 315 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 316 | _log2["mem_alloc_end"] = mem_alloc 317 | _log2["mem_total_alloc_end"] = mem_total_alloc 318 | _log2["mem_sys_end"] = mem_sys 319 | _log2["num_gc_end"] = num_gc 320 | } 321 | processLogs = append(processLogs, _log2) 322 | } else if !failedCondition { 323 | rows, _, err := etlx.Query(dbConn, sql, data, "", "", dateRef) 324 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 325 | if err != nil { 326 | _log2["success"] = false 327 | _log2["msg"] = fmt.Sprintf("%s After error: %s", key, err) 328 | _log2["end_at"] = time.Now() 329 | _log2["duration"] = time.Since(start3).Seconds() 330 | } else { 331 | processData = *rows 332 | _log2["success"] = true 333 | _log2["msg"] = fmt.Sprintf("%s After ", key) 334 | _log2["end_at"] = time.Now() 335 | _log2["duration"] = time.Since(start3).Seconds() 336 | } 337 | _log2["mem_alloc_end"] = mem_alloc 338 | _log2["mem_total_alloc_end"] = mem_total_alloc 339 | _log2["mem_sys_end"] = mem_sys 340 | _log2["num_gc_end"] = num_gc 341 | processLogs = append(processLogs, _log2) 342 | } 343 | // QUERIES TO RUN AT THE END 344 | if okAfter { 345 | start3 := time.Now() 346 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 347 | _log2 = map[string]any{ 348 | "name": key, 349 | "description": metadata["description"].(string), 350 | "key": key, "start_at": start3, 351 | "ref": dtRef, 352 | "mem_alloc_start": mem_alloc, 353 | "mem_total_alloc_start": mem_total_alloc, 354 | "mem_sys_start": mem_sys, 355 | "num_gc_start": num_gc, 356 | } 357 | err = etlx.ExecuteQuery(dbConn, afterSQL, data, "", "", dateRef) 358 | if err != nil { 359 | _log2["success"] = false 360 | _log2["msg"] = fmt.Sprintf("%s After error: %s", key, err) 361 | _log2["end_at"] = time.Now() 362 | _log2["duration"] = time.Since(start3).Seconds() 363 | } else { 364 | _log2["success"] = true 365 | _log2["msg"] = fmt.Sprintf("%s After ", key) 366 | _log2["end_at"] = time.Now() 367 | _log2["duration"] = time.Since(start3).Seconds() 368 | } 369 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 370 | _log2["mem_alloc_end"] = mem_alloc 371 | _log2["mem_total_alloc_end"] = mem_total_alloc 372 | _log2["mem_sys_end"] = mem_sys 373 | _log2["num_gc_end"] = num_gc 374 | processLogs = append(processLogs, _log2) 375 | } 376 | return processLogs, processData, nil 377 | } 378 | -------------------------------------------------------------------------------- /internal/etlx/run_scripts.go: -------------------------------------------------------------------------------- 1 | package etlxlib 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "regexp" 8 | "time" 9 | ) 10 | 11 | func (etlx *ETLX) RunSCRIPTS(dateRef []time.Time, conf map[string]any, extraConf map[string]any, keys ...string) ([]map[string]any, error) { 12 | key := "SCRIPTS" 13 | if len(keys) > 0 && keys[0] != "" { 14 | key = keys[0] 15 | } 16 | //fmt.Println(key, dateRef) 17 | var processLogs []map[string]any 18 | start := time.Now() 19 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats() 20 | processLogs = append(processLogs, map[string]any{ 21 | "name": key, 22 | "key": key, "start_at": start, 23 | "ref": nil, 24 | "mem_alloc_start": mem_alloc, 25 | "mem_total_alloc_start": mem_total_alloc, 26 | "mem_sys_start": mem_sys, 27 | "num_gc_start": num_gc, 28 | }) 29 | mainDescription := "" 30 | // Define the runner as a simple function 31 | SCRIPTSRunner := func(metadata map[string]any, itemKey string, item map[string]any) error { 32 | //fmt.Println(metadata, itemKey, item) 33 | // ACTIVE 34 | if active, okActive := metadata["active"]; okActive { 35 | if !active.(bool) { 36 | processLogs = append(processLogs, map[string]any{ 37 | "name": fmt.Sprintf("KEY %s", key), 38 | "description": metadata["description"].(string), 39 | "key": key, "item_key": itemKey, "start_at": time.Now(), 40 | "end_at": time.Now(), 41 | "success": true, 42 | "msg": "Deactivated", 43 | }) 44 | return fmt.Errorf("deactivated %s", "") 45 | } 46 | } 47 | // MAIN PATH 48 | mainPath, okMainPath := metadata["path"].(string) 49 | if okMainPath { 50 | pth := etlx.ReplaceQueryStringDate(mainPath, dateRef) 51 | //fmt.Println("MAIN PATH", pth) 52 | if ok, _ := pathExists(pth); !ok { 53 | err := os.Mkdir(pth, 0755) 54 | if err != nil { 55 | return fmt.Errorf("%s ERR: trying to create the script path %s -> %s", key, pth, err) 56 | } 57 | } 58 | } else { 59 | 60 | } 61 | mainConn, _ := metadata["connection"].(string) 62 | mainDescription = metadata["description"].(string) 63 | itemMetadata, ok := item["metadata"].(map[string]any) 64 | if !ok { 65 | processLogs = append(processLogs, map[string]any{ 66 | "name": fmt.Sprintf("%s->%s", key, itemKey), 67 | "description": itemMetadata["description"].(string), 68 | "key": key, "item_key": itemKey, "start_at": time.Now(), 69 | "end_at": time.Now(), 70 | "success": true, 71 | "msg": "Missing metadata in item", 72 | }) 73 | return nil 74 | } 75 | // ACTIVE 76 | if active, okActive := itemMetadata["active"]; okActive { 77 | if !active.(bool) { 78 | processLogs = append(processLogs, map[string]any{ 79 | "name": fmt.Sprintf("%s->%s", key, itemKey), 80 | "description": itemMetadata["description"].(string), 81 | "key": key, "item_key": itemKey, "start_at": time.Now(), 82 | "end_at": time.Now(), 83 | "success": true, 84 | "msg": "Deactivated", 85 | }) 86 | return nil 87 | } 88 | } 89 | beforeSQL, okBefore := itemMetadata["before_sql"] 90 | scriptSQL, okScript := itemMetadata["script_sql"] 91 | afterSQL, okAfter := itemMetadata["after_sql"] 92 | errPatt, okErrPatt := itemMetadata["on_err_patt"] 93 | errSQL, okErrSQL := itemMetadata["on_err_sql"] 94 | conn, okCon := itemMetadata["connection"] 95 | if !okCon { 96 | conn = mainConn 97 | } 98 | dtRef, okDtRef := itemMetadata["date_ref"] 99 | if okDtRef && dtRef != "" { 100 | _dt, err := time.Parse("2006-01-02", dtRef.(string)) 101 | if err == nil { 102 | dateRef = append([]time.Time{}, _dt) 103 | } 104 | } else { 105 | if len(dateRef) > 0 { 106 | dtRef = dateRef[0].Format("2006-01-02") 107 | } 108 | } 109 | if processLogs[0]["ref"] == nil { 110 | processLogs[0]["ref"] = dtRef 111 | } 112 | start3 := time.Now() 113 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats() 114 | _log2 := map[string]any{ 115 | "name": fmt.Sprintf("%s->%s", key, itemKey), 116 | "description": itemMetadata["description"].(string), 117 | "key": key, "item_key": itemKey, "start_at": start3, 118 | "ref": dtRef, 119 | "mem_alloc_start": mem_alloc, 120 | "mem_total_alloc_start": mem_total_alloc, 121 | "mem_sys_start": mem_sys, 122 | "num_gc_start": num_gc, 123 | } 124 | dbConn, err := etlx.GetDB(conn.(string)) 125 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 126 | if err != nil { 127 | _log2["success"] = false 128 | _log2["msg"] = fmt.Sprintf("%s -> %s ERR: connecting to %s in : %s", key, itemKey, conn, err) 129 | _log2["end_at"] = time.Now() 130 | _log2["duration"] = time.Since(start3).Seconds() 131 | _log2["mem_alloc_end"] = mem_alloc 132 | _log2["mem_total_alloc_end"] = mem_total_alloc 133 | _log2["mem_sys_end"] = mem_sys 134 | _log2["num_gc_end"] = num_gc 135 | processLogs = append(processLogs, _log2) 136 | return nil 137 | } 138 | defer dbConn.Close() 139 | _log2["success"] = true 140 | _log2["msg"] = fmt.Sprintf("%s -> %s CONN: connection to %s successfull", key, itemKey, conn) 141 | _log2["end_at"] = time.Now() 142 | _log2["duration"] = time.Since(start3).Seconds() 143 | _log2["mem_alloc_end"] = mem_alloc 144 | _log2["mem_total_alloc_end"] = mem_total_alloc 145 | _log2["mem_sys_end"] = mem_sys 146 | _log2["num_gc_end"] = num_gc 147 | processLogs = append(processLogs, _log2) 148 | // FILE 149 | table := itemMetadata["name"].(string) 150 | path, okPath := itemMetadata["path"].(string) 151 | if !okPath { 152 | path, okPath = itemMetadata["fname"].(string) 153 | if !okPath { 154 | path, okPath = itemMetadata["file"].(string) 155 | } 156 | } 157 | fname := fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, os.TempDir(), table) 158 | if okPath && path != "" { 159 | fname = path 160 | if filepath.IsAbs(fname) { 161 | } else if filepath.IsLocal(fname) { 162 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname) 163 | } else if filepath.Dir(fname) != "" && okMainPath && mainPath != "" { 164 | fname = fmt.Sprintf(`%s/%s`, mainPath, fname) 165 | } 166 | } else if okMainPath && mainPath != "" { 167 | fname = fmt.Sprintf(`%s/%s_{YYYYMMDD}.csv`, mainPath, table) 168 | } 169 | // QUERIES TO RUN AT beginning 170 | if okBefore { 171 | start3 := time.Now() 172 | _log2 := map[string]any{ 173 | "name": fmt.Sprintf("%s->%s", key, itemKey), 174 | "description": itemMetadata["description"].(string), 175 | "key": key, "item_key": itemKey, "start_at": start3, 176 | "ref": dtRef, 177 | "mem_alloc_start": mem_alloc, 178 | "mem_total_alloc_start": mem_total_alloc, 179 | "mem_sys_start": mem_sys, 180 | "num_gc_start": num_gc, 181 | } 182 | err = etlx.ExecuteQuery(dbConn, beforeSQL, item, fname, "", dateRef) 183 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 184 | if err != nil { 185 | _log2["success"] = false 186 | _log2["msg"] = fmt.Sprintf("%s -> %s Before error: %s", key, itemKey, err) 187 | _log2["end_at"] = time.Now() 188 | _log2["duration"] = time.Since(start3).Seconds() 189 | } else { 190 | _log2["success"] = true 191 | _log2["msg"] = fmt.Sprintf("%s -> %s Before ", key, itemKey) 192 | _log2["end_at"] = time.Now() 193 | _log2["duration"] = time.Since(start3).Seconds() 194 | } 195 | _log2["mem_alloc_end"] = mem_alloc 196 | _log2["mem_total_alloc_end"] = mem_total_alloc 197 | _log2["mem_sys_end"] = mem_sys 198 | _log2["num_gc_end"] = num_gc 199 | processLogs = append(processLogs, _log2) 200 | } 201 | // CHECK CONDITION 202 | condition, okCondition := itemMetadata["condition"].(string) 203 | condMsg, okCondMsg := itemMetadata["condition_msg"].(string) 204 | failedCondition := false 205 | if okCondition && condition != "" { 206 | cond, err := etlx.ExecuteCondition(dbConn, condition, itemMetadata, fname, "", dateRef) 207 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 208 | if err != nil { 209 | _log2["success"] = false 210 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, err) 211 | _log2["end_at"] = time.Now() 212 | _log2["duration"] = time.Since(start3).Seconds() 213 | _log2["mem_alloc_end"] = mem_alloc 214 | _log2["mem_total_alloc_end"] = mem_total_alloc 215 | _log2["mem_sys_end"] = mem_sys 216 | _log2["num_gc_end"] = num_gc 217 | processLogs = append(processLogs, _log2) 218 | //return fmt.Errorf("%s", _log2["msg"]) 219 | failedCondition = true 220 | } else if !cond { 221 | _log2["success"] = false 222 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed the condition %s was not met!", key, itemKey, condition) 223 | _log2["end_at"] = time.Now() 224 | _log2["duration"] = time.Since(start3).Seconds() 225 | _log2["mem_alloc_end"] = mem_alloc 226 | _log2["mem_total_alloc_end"] = mem_total_alloc 227 | _log2["mem_sys_end"] = mem_sys 228 | _log2["num_gc_end"] = num_gc 229 | if okCondMsg && condMsg != "" { 230 | _log2["msg"] = fmt.Sprintf("%s -> %s COND: failed %s", key, itemKey, etlx.SetQueryPlaceholders(condMsg, table, fname, dateRef)) 231 | } 232 | processLogs = append(processLogs, _log2) 233 | // return fmt.Errorf("%s", _log2["msg"]) 234 | failedCondition = true 235 | } 236 | } 237 | // MAIN QUERIES 238 | if okScript && !failedCondition { 239 | start3 := time.Now() 240 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats() 241 | _log2 := map[string]any{ 242 | "name": fmt.Sprintf("%s->%s", key, itemKey), 243 | "description": itemMetadata["description"].(string), 244 | "key": key, "item_key": itemKey, "start_at": start3, 245 | "ref": dtRef, 246 | "mem_alloc_start": mem_alloc, 247 | "mem_total_alloc_start": mem_total_alloc, 248 | "mem_sys_start": mem_sys, 249 | "num_gc_start": num_gc, 250 | } 251 | err = etlx.ExecuteQuery(dbConn, scriptSQL, item, fname, "", dateRef) 252 | if err != nil { 253 | _err_by_pass := false 254 | if okErrPatt && errPatt != nil && okErrSQL && errSQL != nil { 255 | //fmt.Println(onErrPatt.(string), onErrSQL.(string)) 256 | re, regex_err := regexp.Compile(errPatt.(string)) 257 | if regex_err != nil { 258 | _log2["success"] = false 259 | _log2["msg"] = fmt.Errorf("%s ERR: fallback regex matching the error failed to compile: %s", key, regex_err) 260 | _log2["end_at"] = time.Now() 261 | _log2["duration"] = time.Since(start3).Seconds() 262 | } else if re.MatchString(string(err.Error())) { 263 | err = etlx.ExecuteQuery(dbConn, errSQL, item, fname, "", dateRef) 264 | if err != nil { 265 | _log2["success"] = false 266 | _log2["msg"] = fmt.Errorf("%s ERR: main: %s", key, err) 267 | _log2["end_at"] = time.Now() 268 | _log2["duration"] = time.Since(start3).Seconds() 269 | } else { 270 | _err_by_pass = true 271 | } 272 | } 273 | } 274 | if !_err_by_pass { 275 | _log2["success"] = false 276 | _log2["msg"] = fmt.Sprintf("%s -> %s error: %s", key, itemKey, err) 277 | _log2["end_at"] = time.Now() 278 | _log2["duration"] = time.Since(start3).Seconds() 279 | } else { 280 | _log2["success"] = true 281 | _log2["msg"] = fmt.Sprintf("%s -> %s Success", key, itemKey) 282 | _log2["end_at"] = time.Now() 283 | _log2["duration"] = time.Since(start3).Seconds() 284 | } 285 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 286 | _log2["mem_alloc_end"] = mem_alloc 287 | _log2["mem_total_alloc_end"] = mem_total_alloc 288 | _log2["mem_sys_end"] = mem_sys 289 | _log2["num_gc_end"] = num_gc 290 | } else { 291 | _log2["success"] = true 292 | _log2["msg"] = fmt.Sprintf("%s -> %s Success", key, itemKey) 293 | _log2["end_at"] = time.Now() 294 | _log2["duration"] = time.Since(start3).Seconds() 295 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 296 | _log2["mem_alloc_end"] = mem_alloc 297 | _log2["mem_total_alloc_end"] = mem_total_alloc 298 | _log2["mem_sys_end"] = mem_sys 299 | _log2["num_gc_end"] = num_gc 300 | } 301 | processLogs = append(processLogs, _log2) 302 | } 303 | // QUERIES TO RUN AT THE END 304 | if okAfter { 305 | start3 := time.Now() 306 | mem_alloc, mem_total_alloc, mem_sys, num_gc := etlx.RuntimeMemStats() 307 | _log2 := map[string]any{ 308 | "name": fmt.Sprintf("%s->%s", key, itemKey), 309 | "description": itemMetadata["description"].(string), 310 | "key": key, "item_key": itemKey, "start_at": start3, 311 | "ref": dtRef, 312 | "mem_alloc_start": mem_alloc, 313 | "mem_total_alloc_start": mem_total_alloc, 314 | "mem_sys_start": mem_sys, 315 | "num_gc_start": num_gc, 316 | } 317 | err = etlx.ExecuteQuery(dbConn, afterSQL, item, fname, "", dateRef) 318 | if err != nil { 319 | _log2["success"] = false 320 | _log2["msg"] = fmt.Sprintf("%s -> %s After error: %s", key, itemKey, err) 321 | _log2["end_at"] = time.Now() 322 | _log2["duration"] = time.Since(start3).Seconds() 323 | } else { 324 | _log2["success"] = true 325 | _log2["msg"] = fmt.Sprintf("%s -> %s After ", key, itemKey) 326 | _log2["end_at"] = time.Now() 327 | _log2["duration"] = time.Since(start3).Seconds() 328 | } 329 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 330 | _log2["mem_alloc_end"] = mem_alloc 331 | _log2["mem_total_alloc_end"] = mem_total_alloc 332 | _log2["mem_sys_end"] = mem_sys 333 | _log2["num_gc_end"] = num_gc 334 | processLogs = append(processLogs, _log2) 335 | } 336 | return nil 337 | } 338 | // Check if the input conf is nil or empty 339 | if conf == nil { 340 | conf = etlx.Config 341 | } 342 | // Process the MD KEY 343 | err := etlx.ProcessMDKey(key, conf, SCRIPTSRunner) 344 | mem_alloc, mem_total_alloc, mem_sys, num_gc = etlx.RuntimeMemStats() 345 | if err != nil { 346 | return processLogs, fmt.Errorf("%s failed: %v", key, err) 347 | } 348 | processLogs[0] = map[string]any{ 349 | "name": key, 350 | "description": mainDescription, 351 | "key": key, "start_at": processLogs[0]["start_at"], 352 | "end_at": time.Now(), 353 | "duration": time.Since(start).Seconds(), 354 | "ref": processLogs[0]["ref"], 355 | "mem_alloc_start": processLogs[0]["mem_alloc_start"], 356 | "mem_total_alloc_start": processLogs[0]["mem_total_alloc_start"], 357 | "mem_sys_start": processLogs[0]["mem_sys_start"], 358 | "num_gc_start": processLogs[0]["num_gc_start"], 359 | } 360 | return processLogs, nil 361 | } 362 | --------------------------------------------------------------------------------