├── package.json ├── go.mod ├── .gitignore ├── scripts └── run_integration.sh ├── internal ├── parser │ ├── json_table.go │ ├── lexer.go │ └── parser.go └── sqlgen │ ├── dialect.go │ └── dialects.go ├── CODE_OF_CONDUCT.md ├── docs ├── PLAN.md ├── SNAPSHOTS_PLAN.md └── DIALECT_PLAN.md ├── compile_error_test.go ├── ast └── ast.go ├── dialect_test.go ├── AGENTS.md ├── examples └── mongo │ └── main.go ├── integration_test.go ├── gophrql.go ├── prql_integration_test.go ├── LICENSE ├── README.md └── compile_test.go /package.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/maxpert/gophrql 2 | 3 | go 1.25.5 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | coverage*.out 3 | *.log 4 | *.swp 5 | .DS_Store 6 | vendor/ 7 | tmp/ 8 | 9 | 10 | CLAUDE.md 11 | AGENTS.md -------------------------------------------------------------------------------- /scripts/run_integration.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # clones or updates `tmp/prql` from upstream and runs the Go integration tests that compare 5 | # against the upstream SQL snapshots. 6 | 7 | REPO_URL="https://github.com/PRQL/prql" 8 | TARGET_DIR="tmp/prql" 9 | 10 | mkdir -p "$(dirname "$TARGET_DIR")" 11 | 12 | if [ -d "$TARGET_DIR/.git" ]; then 13 | echo "Updating existing upstream checkout at $TARGET_DIR" 14 | git -C "$TARGET_DIR" fetch --all --prune 15 | git -C "$TARGET_DIR" reset --hard origin/main 16 | else 17 | echo "Cloning upstream repository into $TARGET_DIR" 18 | git clone "$REPO_URL" "$TARGET_DIR" 19 | fi 20 | 21 | echo "Running Go integration suite" 22 | env GOCACHE=/tmp/go-build go test ./... 23 | -------------------------------------------------------------------------------- /internal/parser/json_table.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/maxpert/gophrql/ast" 8 | ) 9 | 10 | func parseJSONTable(raw string) ([]ast.InlineRow, error) { 11 | var payload struct { 12 | Columns []string `json:"columns"` 13 | Data [][]json.RawMessage `json:"data"` 14 | } 15 | if err := json.Unmarshal([]byte(raw), &payload); err != nil { 16 | return nil, fmt.Errorf("invalid json: %w", err) 17 | } 18 | var rows []ast.InlineRow 19 | for _, row := range payload.Data { 20 | var fields []ast.Field 21 | for i, col := range payload.Columns { 22 | if i >= len(row) { 23 | continue 24 | } 25 | var num json.Number 26 | if err := json.Unmarshal(row[i], &num); err == nil { 27 | fields = append(fields, ast.Field{Name: col, Expr: &ast.Number{Value: num.String()}}) 28 | continue 29 | } 30 | var str string 31 | if err := json.Unmarshal(row[i], &str); err == nil { 32 | fields = append(fields, ast.Field{Name: col, Expr: &ast.StringLit{Value: str}}) 33 | continue 34 | } 35 | } 36 | rows = append(rows, ast.InlineRow{Fields: fields}) 37 | } 38 | return rows, nil 39 | } 40 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to a positive environment for our community include: 10 | - Demonstrating empathy and kindness toward other people. 11 | - Being respectful of differing opinions, viewpoints, and experiences. 12 | - Gracefully accepting constructive criticism. 13 | - Focusing on what is best for the community. 14 | - Showing courtesy and patience. 15 | 16 | Examples of unacceptable behavior include: 17 | - The use of sexualized language or imagery and unwelcome sexual attention or advances. 18 | - Trolling, insulting or harassing comments, or inappropriate jokes. 19 | - Public or private harassment. 20 | - Publishing private information about a person without explicit permission. 21 | - Any other conduct which could reasonably be considered inappropriate in a professional setting. 22 | 23 | ## Enforcement 24 | 25 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project maintainers at `contact@gophrql.org`. All complaints will be reviewed and investigated promptly and fairly. 26 | 27 | Project maintainers are obligated to: 28 | - Take all complaints seriously and respond promptly. 29 | - Ensure confidentiality of the complainant and the accused. 30 | - Take appropriate corrective action, which may include a warning, temporary ban, or permanent ban from the project. 31 | 32 | ## Attribution 33 | 34 | This Code of Conduct is adapted from the Contributor Covenant, version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. 35 | -------------------------------------------------------------------------------- /internal/sqlgen/dialect.go: -------------------------------------------------------------------------------- 1 | package sqlgen 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | type DialectType string 8 | 9 | const ( 10 | DialectGeneric DialectType = "sql.generic" 11 | DialectPostgres DialectType = "sql.postgres" 12 | DialectSQLite DialectType = "sql.sqlite" 13 | DialectDuckDB DialectType = "sql.duckdb" 14 | DialectMySQL DialectType = "sql.mysql" 15 | DialectMSSQL DialectType = "sql.mssql" 16 | DialectClickHouse DialectType = "sql.clickhouse" 17 | DialectBigQuery DialectType = "sql.bigquery" 18 | DialectSnowflake DialectType = "sql.snowflake" 19 | ) 20 | 21 | // Dialect defines the capabilities and syntax variations for a SQL target. 22 | type Dialect struct { 23 | Type DialectType 24 | 25 | // Identifier quoting 26 | IdentQuoteChar byte // 0 for no quoting/default 27 | 28 | // Limit/Offset handling 29 | UseTopClause bool // SELECT TOP N ... 30 | UseLimitOffset bool // LIMIT N OFFSET M 31 | UseLimitComma bool // LIMIT M, N (MySQL style) 32 | OffsetFetchSyntax bool // OFFSET M ROWS FETCH NEXT N ROWS ONLY 33 | 34 | // Function mapping overrides 35 | // key: PRQL function name (e.g. "math.round") 36 | // value: SQL pattern (e.g. "ROUND(%s, %s)") 37 | Functions map[string]string 38 | } 39 | 40 | // DefaultDialect is the generic dialect (Postgres-like). 41 | var DefaultDialect = &Dialect{ 42 | Type: DialectGeneric, 43 | IdentQuoteChar: '"', 44 | UseLimitOffset: true, 45 | Functions: map[string]string{}, 46 | } 47 | 48 | func (d *Dialect) QuoteIdent(s string) string { 49 | if s == "*" { 50 | return "*" 51 | } 52 | // If the identifier is safe, don't quote it (unless forced?) 53 | // We assume generic dialect prefers cleaner SQL like standard PRQL compiler 54 | if isSafeIdent(s) { 55 | return s 56 | } 57 | 58 | q := d.IdentQuoteChar 59 | if q == 0 { 60 | return s 61 | } 62 | // Simple escaping: duplicate the quote character 63 | escaped := strings.ReplaceAll(s, string(q), string(q)+string(q)) 64 | return string(q) + escaped + string(q) 65 | } 66 | -------------------------------------------------------------------------------- /docs/PLAN.md: -------------------------------------------------------------------------------- 1 | gophrql Plan 2 | ============ 3 | 4 | Reference review (Rust `prql`) 5 | ------------------------------ 6 | - Core tests live in `prqlc/prqlc/tests/integration`: snapshot suites for lexing, formatting round-trips, PRQL → SQL compilation (generic + per-dialect diffs), lineage debug output, and db-backed execution (`results` gated by feature flags). 7 | - Error coverage is in `error_messages.rs` and `bad_error_messages.rs`, asserting precise diagnostics for arity errors, unknown names, type mismatches, dialect constraints, and malformed input. 8 | - Additional unit suites (`sql.rs`) exercise stdlib modules (math, text), dialect-specific SQL lowering, and feature toggles. 9 | 10 | Go test strategy (write first) 11 | ------------------------------ 12 | - Port a representative slice of Rust snapshots as table-driven tests asserting PRQL → SQL (generic dialect) for: aggregation, unions/append, window functions, stdlib math/text helpers, and pipeline transforms. 13 | - Add negative tests asserting error surfaces for empty queries, missing `from`, too many args, bad types (`take 1.8`), and unknown/ambiguous names. Match key substrings to allow formatting differences while keeping semantics strict. 14 | - Future: dialect-specific fixtures (e.g., SQLite vs MSSQL concatenation) and formatting round-trips once formatting API exists. 15 | 16 | Implementation roadmap (bottom-up) 17 | ---------------------------------- 18 | 1) Front-end: lexer + parser aligned to PRQL grammar; build an AST that preserves spans for diagnostics and supports module references. 19 | 2) Semantic analysis: name resolution, type checking, pipeline validation, stdlib catalog, and user-defined functions; produce enriched IR. 20 | 3) SQL planner: relational lowering (projections, joins, windows, set ops), CTE management, column aliasing, and deterministic ordering. 21 | 4) Dialect layer: target abstraction for operators/functions, identifiers, limits/offsets, regex, date formatting, and string concat; start with Generic, then SQLite/Postgres/MySQL/MSSQL. 22 | 5) Formatting + tooling: PRQL formatter, lineage/introspection outputs, and richer error rendering (codes, spans, hints). 23 | 6) Execution harness: optional db-backed golden tests mirroring Rust `results` suite; add CLI/sample programs under `examples/`. 24 | 25 | Notes 26 | ----- 27 | - Keep fixtures in `testdata/` with PRQL and expected SQL/error text to mirror upstream organization. 28 | - Maintain parity notes in `AGENTS.md` for any intentional deviations from the Rust behavior or the PRQL book. 29 | -------------------------------------------------------------------------------- /docs/SNAPSHOTS_PLAN.md: -------------------------------------------------------------------------------- 1 | PRQL Integration Snapshot Coverage 2 | ================================== 3 | 4 | Reference snapshots live in `tmp/prql/prqlc/prqlc/tests/integration/queries`. This table tracks which fixtures already have Go tests in `compile_test.go` (under `TestCompileSnapshots`) and which ones we still need to port. 5 | 6 | | Snapshot file | Status | Notes / matching Go test | 7 | | --- | --- | --- | 8 | | `aggregation.prql` | Done | `TestCompileSnapshots/aggregation` | 9 | | `append_select_compute.prql` | Done | `.../append_select_compute` | 10 | | `append_select_multiple_with_null.prql` | Done | `.../append_select_multiple_with_null` | 11 | | `append_select_nulls.prql` | Done | `.../append_select_nulls` | 12 | | `append_select_simple.prql` | Done | `.../append_select_simple` | 13 | | `append_select.prql` | Done | `.../append_select_union` | 14 | | `arithmetic.prql` | Done | `.../arithmetic_div_mod` | 15 | | `cast.prql` | Done | `.../cast_projection` | 16 | | `constants_only.prql` | Done | `.../constants_only` | 17 | | `date_to_text.prql` | Done | `.../date_to_text_formats` | 18 | | `distinct_on.prql` | Done | `.../distinct_on_group_sort_take` | 19 | | `distinct.prql` | Done | `.../distinct_group_take_one` | 20 | | `genre_counts.prql` | Done | `.../genre_counts` | 21 | | `group_all.prql` | Done | `.../group_all_join_aggregate` | 22 | | `group_sort_derive_select_join.prql` | Done | `.../group_sort_derive_select_join` | 23 | | `group_sort_filter_derive_select_join.prql` | Done | `.../group_sort_filter_derive_select_join` | 24 | | `group_sort_limit_take.prql` | Done | `.../group_sort_limit_take_join` | 25 | | `group_sort.prql` | Done | `.../group_sort_basic` | 26 | | `invoice_totals.prql` | Done | `.../invoice_totals_window_join` | 27 | | `loop_01.prql` | Done | `.../loop_recursive_numbers` | 28 | | `math_module.prql` | Done | `.../stdlib_math_module` | 29 | | `pipelines.prql` | Done | `.../pipelines_filters_sort_take` | 30 | | `read_csv.prql` | Done | `.../read_csv_sort` | 31 | | `set_ops_remove.prql` | Done | `.../set_ops_remove` | 32 | | `sort_2.prql` | Done | `.../sort_alias_filter_join` | 33 | | `sort_3.prql` | Done | `.../sort_alias_inline_sources` | 34 | | `sort.prql` | Done | `.../sort_with_join_alias` | 35 | | `switch.prql` | Done | `.../switch_case_display` | 36 | | `take.prql` | Done | `.../take_range_with_sort` | 37 | | `text_module.prql` | Done | `.../text_module_filters` | 38 | | `window.prql` | Done | `.../window_functions` | 39 | 40 | Summary 41 | ------- 42 | - **Done:** 31 / 31 fixtures (matching upstream snapshots). 43 | - **Partial:** None. 44 | - **TODO:** None for the snapshot suite. 45 | -------------------------------------------------------------------------------- /compile_error_test.go: -------------------------------------------------------------------------------- 1 | package gophrql 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestCompileErrors(t *testing.T) { 9 | t.Helper() 10 | 11 | cases := []struct { 12 | name string 13 | prql string 14 | wantContains string 15 | }{ 16 | { 17 | name: "unsupported_target", 18 | prql: ` 19 | target sql.duckdb 20 | from tracks 21 | take 1 22 | `, 23 | wantContains: "unsupported target", 24 | }, 25 | { 26 | name: "comment_only", 27 | prql: `# just a comment`, 28 | wantContains: "No PRQL query entered", 29 | }, 30 | { 31 | name: "empty_query", 32 | prql: ``, 33 | wantContains: "No PRQL query entered", 34 | }, 35 | { 36 | name: "missing_from", 37 | prql: ` 38 | let x = 5 39 | let y = 10 40 | `, 41 | wantContains: "PRQL queries must begin with 'from'", 42 | }, 43 | { 44 | name: "declaration_only", 45 | prql: ` 46 | let x = 5 47 | let y = 10 48 | let z = 15 49 | `, 50 | wantContains: "PRQL queries must begin with 'from'", 51 | }, 52 | { 53 | name: "too_many_args_to_function", 54 | prql: ` 55 | let addadd = a b -> a + b 56 | 57 | from x 58 | derive y = (addadd 4 5 6) 59 | `, 60 | wantContains: "Too many arguments to function `addadd`", 61 | }, 62 | { 63 | name: "unknown_name", 64 | prql: ` 65 | from x 66 | select a 67 | select b 68 | `, 69 | wantContains: "Unknown name `b`", 70 | }, 71 | { 72 | name: "bad_take_type", 73 | prql: ` 74 | from employees 75 | take 1.8 76 | `, 77 | wantContains: "`take` expected int or range", 78 | }, 79 | { 80 | name: "comment_then_empty", 81 | prql: ` 82 | # header 83 | 84 | `, 85 | wantContains: "No PRQL query entered", 86 | }, 87 | { 88 | name: "date_to_text_literal_format", 89 | prql: ` 90 | from invoices 91 | select { date.to_text invoice_date billing_city } 92 | `, 93 | wantContains: "`date.to_text` only supports a string literal as format", 94 | }, 95 | { 96 | name: "date_to_text_unsupported_specifier", 97 | prql: ` 98 | from invoices 99 | select { (invoice_date | date.to_text "%_j") } 100 | `, 101 | wantContains: "PRQL doesn't support this format specifier", 102 | }, 103 | } 104 | 105 | for _, tc := range cases { 106 | tc := tc 107 | t.Run(tc.name, func(t *testing.T) { 108 | sql, err := Compile(tc.prql) 109 | if err == nil { 110 | t.Fatalf("expected error, got SQL: %s", sql) 111 | } 112 | if !strings.Contains(err.Error(), tc.wantContains) { 113 | t.Fatalf("error mismatch for %s:\nwant substring: %q\ngot: %v", tc.name, tc.wantContains, err) 114 | } 115 | }) 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /ast/ast.go: -------------------------------------------------------------------------------- 1 | package ast 2 | 3 | // Query represents a PRQL pipeline starting with a source. 4 | type Query struct { 5 | From Source 6 | Steps []Step 7 | Target string // optional target (e.g., sql.generic) 8 | Bindings []Binding 9 | } 10 | 11 | // Source represents a relation source. 12 | type Source struct { 13 | Table string 14 | Rows []InlineRow // inline rows when Table is empty 15 | } 16 | 17 | type InlineRow struct { 18 | Fields []Field 19 | } 20 | 21 | type Field struct { 22 | Name string 23 | Expr Expr 24 | } 25 | 26 | // Binding represents a named sub-query defined via `let`. 27 | type Binding struct { 28 | Name string 29 | Query *Query 30 | } 31 | 32 | // Step is a pipeline stage. 33 | type Step interface { 34 | isStep() 35 | } 36 | 37 | type ( 38 | FilterStep struct { 39 | Expr Expr 40 | } 41 | DeriveStep struct { 42 | Assignments []Assignment 43 | } 44 | Assignment struct { 45 | Name string 46 | Expr Expr 47 | } 48 | SelectStep struct { 49 | Items []SelectItem 50 | } 51 | SelectItem struct { 52 | Expr Expr 53 | As string // optional alias 54 | } 55 | AggregateStep struct { 56 | Items []AggregateItem 57 | } 58 | AggregateItem struct { 59 | Func string 60 | Arg Expr 61 | Args []Expr 62 | As string 63 | } 64 | TakeStep struct { 65 | Limit int 66 | Offset int 67 | } 68 | AppendStep struct { 69 | Query *Query 70 | } 71 | RemoveStep struct { 72 | Query *Query 73 | } 74 | LoopStep struct { 75 | Body []Step 76 | } 77 | JoinStep struct { 78 | Side string 79 | Query *Query 80 | On Expr 81 | } 82 | DistinctStep struct{} 83 | GroupStep struct { 84 | Key Expr 85 | Steps []Step 86 | } 87 | SortStep struct { 88 | Items []SortItem 89 | } 90 | SortItem struct { 91 | Expr Expr 92 | Desc bool 93 | } 94 | ) 95 | 96 | func (*FilterStep) isStep() {} 97 | func (*DeriveStep) isStep() {} 98 | func (*SelectStep) isStep() {} 99 | func (*AggregateStep) isStep() {} 100 | func (*TakeStep) isStep() {} 101 | func (*AppendStep) isStep() {} 102 | func (*RemoveStep) isStep() {} 103 | func (*LoopStep) isStep() {} 104 | func (*JoinStep) isStep() {} 105 | func (*GroupStep) isStep() {} 106 | func (*SortStep) isStep() {} 107 | func (*DistinctStep) isStep() {} 108 | 109 | // Expr is an expression node. 110 | type Expr interface { 111 | isExpr() 112 | } 113 | 114 | type ( 115 | Ident struct { 116 | Parts []string 117 | } 118 | Number struct { 119 | Value string 120 | } 121 | StringLit struct { 122 | Value string 123 | } 124 | Binary struct { 125 | Op string 126 | Left Expr 127 | Right Expr 128 | } 129 | Call struct { 130 | Func Expr 131 | Args []Expr 132 | } 133 | Pipe struct { 134 | Input Expr 135 | Func Expr 136 | Args []Expr 137 | } 138 | CaseExpr struct { 139 | Branches []CaseBranch 140 | } 141 | CaseBranch struct { 142 | Cond Expr 143 | Value Expr 144 | } 145 | Tuple struct { 146 | Exprs []Expr 147 | } 148 | ) 149 | 150 | func (*Ident) isExpr() {} 151 | func (*Number) isExpr() {} 152 | func (*StringLit) isExpr() {} 153 | func (*Binary) isExpr() {} 154 | func (*Call) isExpr() {} 155 | func (*Pipe) isExpr() {} 156 | func (*CaseExpr) isExpr() {} 157 | func (*Tuple) isExpr() {} 158 | -------------------------------------------------------------------------------- /dialect_test.go: -------------------------------------------------------------------------------- 1 | package gophrql 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestDialectCompilation(t *testing.T) { 8 | cases := []struct { 9 | name string 10 | target string 11 | prql string 12 | wantSQL string 13 | }{ 14 | // --- Quoting Style Tests --- 15 | { 16 | name: "mysql_backticks", 17 | target: "sql.mysql", 18 | prql: ` 19 | from employees 20 | select {` + "`first-name`, `last-name`" + `} 21 | `, 22 | wantSQL: "SELECT `first-name` AS `first-name`, `last-name` AS `last-name` FROM employees", 23 | }, 24 | { 25 | name: "postgres_quotes", 26 | target: "sql.postgres", 27 | prql: ` 28 | from employees 29 | select {` + "`first-name`, `last-name`" + `} 30 | `, 31 | wantSQL: `SELECT "first-name" AS "first-name", "last-name" AS "last-name" FROM employees`, 32 | }, 33 | { 34 | name: "snowflake_quoting", 35 | target: "sql.snowflake", 36 | prql: ` 37 | from employees 38 | select { a, b, ` + "`col space`" + ` } 39 | `, 40 | wantSQL: `SELECT a, b, "col space" AS "col space" FROM employees`, 41 | }, 42 | 43 | // --- MSSQL TOP vs LIMIT --- 44 | { 45 | name: "mssql_top", 46 | target: "sql.mssql", 47 | prql: ` 48 | from employees 49 | take 10 50 | `, 51 | wantSQL: `SELECT TOP 10 * FROM employees`, 52 | }, 53 | 54 | // --- Date to Text (Dialect Specific Functions) --- 55 | // Note: Requires implementing date.to_text in sqlgen 56 | { 57 | name: "postgres_date_to_text", 58 | target: "sql.postgres", 59 | prql: ` 60 | from invoices 61 | select { d = (invoice_date | date.to_text "DD/MM/YYYY") } 62 | `, 63 | wantSQL: `SELECT TO_CHAR(invoice_date, 'DD/MM/YYYY') AS d FROM invoices`, 64 | }, 65 | { 66 | name: "mysql_date_to_text", 67 | target: "sql.mysql", 68 | prql: ` 69 | from invoices 70 | select { d = (invoice_date | date.to_text "%d/%m/%Y") } 71 | `, 72 | wantSQL: `SELECT DATE_FORMAT(invoice_date, '%d/%m/%Y') AS d FROM invoices`, 73 | }, 74 | { 75 | name: "duckdb_date_to_text", 76 | target: "sql.duckdb", 77 | prql: ` 78 | from invoices 79 | select { d = (invoice_date | date.to_text "%d/%m/%Y") } 80 | `, 81 | wantSQL: `SELECT strftime(invoice_date, '%d/%m/%Y') AS d FROM invoices`, 82 | }, 83 | 84 | // --- MSSQL Math Functions (partial) --- 85 | { 86 | name: "mssql_math", 87 | target: "sql.mssql", 88 | prql: ` 89 | from employees 90 | select { 91 | c = math.ceil salary, 92 | l = math.ln salary, 93 | p = math.pow salary 2 94 | } 95 | `, 96 | wantSQL: `SELECT CEILING(salary) AS c, LOG(salary) AS l, POWER(salary, 2) AS p FROM employees`, 97 | }, 98 | 99 | // --- Generic Fallback --- 100 | { 101 | name: "generic_fallback", 102 | target: "sql.unknown_dialect", 103 | prql: ` 104 | from employees 105 | select {` + "`first-name`" + `} 106 | `, 107 | wantSQL: `SELECT "first-name" AS "first-name" FROM employees`, 108 | }, 109 | } 110 | 111 | for _, tc := range cases { 112 | t.Run(tc.name, func(t *testing.T) { 113 | sql, err := Compile(tc.prql, WithTarget(tc.target)) 114 | if err != nil { 115 | t.Fatalf("Compile error: %v", err) 116 | } 117 | if normalize(sql) != normalize(tc.wantSQL) { 118 | t.Errorf("SQL mismatch.\nWant: %s\nGot: %s", tc.wantSQL, sql) 119 | } 120 | }) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- 1 | Agents Guide 2 | ============ 3 | 4 | Context 5 | ------- 6 | - Project: `github.com/maxpert/gophrql` 7 | - Purpose: Reference Go implementation of the PRQL book; inspired by upstream behavior from https://github.com/PRQL/prql where practical. 8 | - Status: Pre-implementation scaffold; public API is not stable. 9 | 10 | Expectations 11 | ------------ 12 | - Fidelity first: match the PRQL book semantics before adding convenience APIs. 13 | - Parity notes: document any intentional differences from upstream `prql` in this file. 14 | - Tests: prefer table-driven tests that quote book examples; add regression tests for every fixed bug. 15 | - Errors: return informative, composable errors; avoid panics. 16 | - Dependencies: keep minimal; avoid cgo and heavy transitive trees. 17 | 18 | Initial roadmap 19 | --------------- 20 | - Parser: parse PRQL into an AST aligned with the book chapters and upstream definitions. 21 | - Compiler: translate AST to SQL with deterministic output; support dialect abstractions early. 22 | - Diagnostics: helpful error messages that point at spans in the source. 23 | - Examples: executable snippets reflecting book examples (`examples/` folder). 24 | 25 | Workflow tips 26 | ------------- 27 | - Run `go test ./...` before pushing. 28 | - Keep public surface documented with Go doc comments. 29 | - Prefer small, reviewable commits with context in the messages. 30 | 31 | 32 | ## Engineering Principles to Enforce 33 | 34 | | Principle | Description | 35 | |-----------|-------------| 36 | | **KISS** | Keep It Simple, Stupid. Reject unnecessary complexity. | 37 | | **DRY** | Don't Repeat Yourself. Flag duplicated logic. | 38 | | **YAGNI** | You Aren't Gonna Need It. Remove speculative code. | 39 | | **SOLID** | Single responsibility, Open/closed, Liskov substitution, Interface segregation, Dependency inversion. | 40 | | **SoC** | Separation of Concerns. Each package/function has one job. | 41 | | **LoD** | Law of Demeter. Minimize knowledge between components. | 42 | | **Fail Fast** | Validate early, return early. | 43 | | **Explicit > Implicit** | No magic. Clear intent. | 44 | 45 | --- 46 | 47 | ## Severity Classification 48 | 49 | ### CRITICAL (must fix before merge) 50 | - Security vulnerabilities (injection, auth bypass, secrets in code) 51 | - Data corruption or loss risks 52 | - Race conditions causing crashes 53 | - Unrecovered panics in goroutines 54 | - Unbounded resource consumption (memory leaks, goroutine leaks) 55 | - Broken error handling that silences failures 56 | 57 | ### HIGH (must fix, blocking) 58 | - Unchecked errors on I/O, network, or database operations 59 | - Missing context cancellation propagation 60 | - Improper mutex usage (deadlock potential, unlock not deferred) 61 | - Nil pointer dereference risks 62 | - SQL injection or command injection vectors 63 | - Missing input validation on public APIs 64 | 65 | ### MEDIUM (should fix) 66 | - Non-idiomatic Go patterns 67 | - Stuttering names (`user.UserName` → `user.Name`) 68 | - Large functions (>50 lines) that should be split 69 | - Missing godoc on exported symbols 70 | - Inconsistent error wrapping 71 | - Magic numbers without constants 72 | - Test coverage gaps on critical paths 73 | - Suboptimal allocations in hot paths 74 | 75 | ### LOW (nice to have) 76 | - Minor naming improvements 77 | - Comment typos or formatting 78 | - Import ordering 79 | - Redundant else blocks 80 | - Slightly verbose code that could be condensed 81 | - Missing test cases for edge cases 82 | -------------------------------------------------------------------------------- /internal/sqlgen/dialects.go: -------------------------------------------------------------------------------- 1 | package sqlgen 2 | 3 | import "strings" 4 | 5 | // DialectMap holds the registered dialects. 6 | var DialectMap = map[string]*Dialect{ 7 | "sql.generic": DefaultDialect, 8 | "sql.postgres": PostgresDialect, 9 | "sql.sqlite": SQLiteDialect, 10 | "sql.duckdb": DuckDBDialect, 11 | "sql.mysql": MySQLDialect, 12 | "sql.mssql": MSSQLDialect, 13 | "sql.clickhouse": ClickHouseDialect, 14 | "sql.bigquery": BigQueryDialect, 15 | "sql.snowflake": SnowflakeDialect, 16 | } 17 | 18 | // GetDialect returns the dialect for the given target, or nil if not found. 19 | // It tries to match by exact string first, then by the dialect type. 20 | func GetDialect(target string) *Dialect { 21 | if d, ok := DialectMap[target]; ok { 22 | return d 23 | } 24 | // Fallback/Aliases 25 | switch strings.ToLower(target) { 26 | case "postgres", "postgresql": 27 | return PostgresDialect 28 | case "sqlite": 29 | return SQLiteDialect 30 | case "duckdb": 31 | return DuckDBDialect 32 | case "mysql": 33 | return MySQLDialect 34 | case "mssql", "sqlserver": 35 | return MSSQLDialect 36 | case "clickhouse": 37 | return ClickHouseDialect 38 | case "bigquery": 39 | return BigQueryDialect 40 | case "snowflake": 41 | return SnowflakeDialect 42 | } 43 | return nil 44 | } 45 | 46 | // PostgresDialect defines the dialect for PostgreSQL. 47 | var PostgresDialect = &Dialect{ 48 | Type: DialectPostgres, 49 | IdentQuoteChar: '"', 50 | UseLimitOffset: true, 51 | Functions: map[string]string{ 52 | "date.to_text": "TO_CHAR(%[1]s, %[2]s)", // date, format 53 | }, 54 | } 55 | 56 | // SQLiteDialect defines the dialect for SQLite. 57 | var SQLiteDialect = &Dialect{ 58 | Type: DialectSQLite, 59 | IdentQuoteChar: '"', 60 | UseLimitOffset: true, 61 | Functions: map[string]string{}, 62 | } 63 | 64 | // DuckDBDialect defines the dialect for DuckDB. 65 | var DuckDBDialect = &Dialect{ 66 | Type: DialectDuckDB, 67 | IdentQuoteChar: '"', 68 | UseLimitOffset: true, 69 | Functions: map[string]string{ 70 | "std.read_csv": "read_csv_auto", 71 | "date.to_text": "strftime(%[1]s, %[2]s)", // DuckDB: strftime(date, format) 72 | }, 73 | } 74 | 75 | // MySQLDialect defines the dialect for MySQL. 76 | var MySQLDialect = &Dialect{ 77 | Type: DialectMySQL, 78 | IdentQuoteChar: '`', 79 | UseLimitComma: true, // LIMIT offset, count 80 | Functions: map[string]string{ 81 | "date.to_text": "DATE_FORMAT(%[1]s, %[2]s)", 82 | }, 83 | } 84 | 85 | // MSSQLDialect defines the dialect for Microsoft SQL Server. 86 | var MSSQLDialect = &Dialect{ 87 | Type: DialectMSSQL, 88 | IdentQuoteChar: '"', 89 | UseTopClause: true, // TOP N 90 | OffsetFetchSyntax: true, // OFFSET M ROWS FETCH NEXT N ROWS ONLY 91 | Functions: map[string]string{ 92 | "math.ceil": "CEILING(%s)", 93 | "math.ln": "LOG(%s)", 94 | "math.pow": "POWER(%s, %s)", 95 | }, 96 | } 97 | 98 | // ClickHouseDialect defines the dialect for ClickHouse. 99 | var ClickHouseDialect = &Dialect{ 100 | Type: DialectClickHouse, 101 | IdentQuoteChar: '"', // OR backticks, " is standard SQL 102 | UseLimitOffset: true, 103 | Functions: map[string]string{ 104 | "date.to_text": "formatDateTimeInJodaSyntax(%[1]s, %[2]s)", 105 | }, 106 | } 107 | 108 | // BigQueryDialect defines the dialect for BigQuery. 109 | var BigQueryDialect = &Dialect{ 110 | Type: DialectBigQuery, 111 | IdentQuoteChar: '`', 112 | UseLimitOffset: true, 113 | Functions: map[string]string{}, 114 | } 115 | 116 | // SnowflakeDialect defines the dialect for Snowflake. 117 | var SnowflakeDialect = &Dialect{ 118 | Type: DialectSnowflake, 119 | IdentQuoteChar: '"', 120 | UseLimitOffset: true, 121 | Functions: map[string]string{}, 122 | } 123 | -------------------------------------------------------------------------------- /examples/mongo/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/maxpert/gophrql" 8 | "github.com/maxpert/gophrql/ast" 9 | ) 10 | 11 | func main() { 12 | prql := ` 13 | from users 14 | filter age > 21 15 | filter country == "US" 16 | derive { full_name = f"{first_name} {last_name}" } 17 | select { full_name, email, age } 18 | sort { -age } 19 | take 10 20 | ` 21 | 22 | // Convert PRQL to an AST 23 | query, err := gophrql.Parse(prql) 24 | if err != nil { 25 | panic(err) 26 | } 27 | 28 | // Convert AST to MongoDB aggregation pipeline string 29 | mongo := convertToMongo(query) 30 | fmt.Println(mongo) 31 | // Expected output (formatted for readability): 32 | // db.users.aggregate([ 33 | // { $match: { age: { $gt: 21 }, country: "US" } }, 34 | // { $project: { name: 1, email: 1, age: 1, _id: 0 } }, 35 | // { $sort: { age: -1 } }, 36 | // { $limit: 10 } 37 | // ]) 38 | 39 | } 40 | 41 | // ConvertToMongo builds a MongoDB aggregation pipeline from a PRQL AST query. 42 | func convertToMongo(q *ast.Query) string { 43 | var stages []string 44 | 45 | // $match stage – collect all filter expressions 46 | matchFilters := []string{} 47 | for _, step := range q.Steps { 48 | if f, ok := step.(*ast.FilterStep); ok { 49 | matchFilters = append(matchFilters, exprToMongo(f.Expr)) 50 | } 51 | } 52 | if len(matchFilters) > 0 { 53 | // combine filters with logical AND 54 | combined := strings.Join(matchFilters, ", ") 55 | stages = append(stages, fmt.Sprintf("{ $match: { %s } }", combined)) 56 | } 57 | 58 | // $project stage – handle select steps 59 | for _, step := range q.Steps { 60 | if s, ok := step.(*ast.SelectStep); ok { 61 | proj := []string{} 62 | for _, item := range s.Items { 63 | alias := item.As 64 | if alias == "" { 65 | // Use the expression's string representation as field name 66 | alias = exprToMongo(item.Expr) 67 | } 68 | proj = append(proj, fmt.Sprintf("%s: 1", alias)) 69 | } 70 | // Exclude _id by default for cleaner output 71 | proj = append(proj, "_id: 0") 72 | stages = append(stages, fmt.Sprintf("{ $project: { %s } }", strings.Join(proj, ", "))) 73 | } 74 | } 75 | 76 | // $sort stage – handle sort steps 77 | for _, step := range q.Steps { 78 | if s, ok := step.(*ast.SortStep); ok { 79 | sortFields := []string{} 80 | for _, item := range s.Items { 81 | direction := 1 82 | if item.Desc { 83 | direction = -1 84 | } 85 | // Assume the expression is a simple identifier 86 | sortFields = append(sortFields, fmt.Sprintf("%s: %d", exprToMongo(item.Expr), direction)) 87 | } 88 | if len(sortFields) > 0 { 89 | stages = append(stages, fmt.Sprintf("{ $sort: { %s } }", strings.Join(sortFields, ", "))) 90 | } 91 | } 92 | } 93 | 94 | // $limit and $skip – handle take steps (limit/offset) 95 | for _, step := range q.Steps { 96 | if t, ok := step.(*ast.TakeStep); ok { 97 | if t.Offset > 0 { 98 | stages = append(stages, fmt.Sprintf("{ $skip: %d }", t.Offset)) 99 | } 100 | if t.Limit > 0 { 101 | stages = append(stages, fmt.Sprintf("{ $limit: %d }", t.Limit)) 102 | } 103 | } 104 | } 105 | 106 | // Build final aggregation string 107 | pipeline := strings.Join(stages, ", ") 108 | return fmt.Sprintf("db.%s.aggregate([%s])", q.From.Table, pipeline) 109 | } 110 | 111 | // exprToMongo converts a simple AST expression to a MongoDB query fragment. 112 | // This is a minimal implementation supporting identifiers, binary ops, and literals. 113 | func exprToMongo(e ast.Expr) string { 114 | switch v := e.(type) { 115 | case *ast.Ident: 116 | // Identifier becomes a field reference prefixed with $. 117 | if len(v.Parts) == 1 { 118 | return fmt.Sprintf("$%s", v.Parts[0]) 119 | } 120 | // For qualified identifiers (e.g., table.column) use the last part. 121 | return fmt.Sprintf("$%s", v.Parts[len(v.Parts)-1]) 122 | case *ast.Number: 123 | return v.Value 124 | case *ast.StringLit: 125 | return fmt.Sprintf("\"%s\"", v.Value) 126 | case *ast.Binary: 127 | left := exprToMongo(v.Left) 128 | right := exprToMongo(v.Right) 129 | switch v.Op { 130 | case ">": 131 | return fmt.Sprintf("%s: { $gt: %s }", left, right) 132 | case "<": 133 | return fmt.Sprintf("%s: { $lt: %s }", left, right) 134 | case "==": 135 | return fmt.Sprintf("%s: %s", left, right) 136 | case "!=": 137 | return fmt.Sprintf("%s: { $ne: %s }", left, right) 138 | case "&&": 139 | return fmt.Sprintf("$and: [%s, %s]", left, right) 140 | case "||": 141 | return fmt.Sprintf("$or: [%s, %s]", left, right) 142 | default: 143 | return "" 144 | } 145 | default: 146 | return "" 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /docs/DIALECT_PLAN.md: -------------------------------------------------------------------------------- 1 | PRQL Dialect Support Plan 2 | ========================= 3 | 4 | Goals 5 | ----- 6 | - Provide a clear roadmap for matching the official PRQL compiler’s SQL dialect coverage. 7 | - Enumerate required features, blockers, and validation artifacts (snapshots/tests) per dialect. 8 | - Keep the effort incremental so snapshots stay green after each dialect landing. 9 | 10 | Current State 11 | ------------- 12 | - The Go compiler currently emits a single “generic” SQL flavor tuned to the integration snapshots (≈ PostgreSQL syntax). 13 | - No `target sql.` selection hook is exposed to callers. 14 | - Dialect-specific constructs (identifier quoting, function names, LIMIT/OFFSET semantics, date functions, joins, window quirks) are hard-coded for the generic flavor. 15 | 16 | Priority Dialects 17 | ----------------- 18 | 19 | | Dialect | Status | Required Work | References | 20 | |-----------------|----------|----------------------------------------------------------------------------------------------------|--------------------------------------------------------| 21 | | `sql.generic` | ✅ (base) | Keep as compatibility fallback for snapshots. | Existing `compile_test.go` and upstream `compile__*.snap` | 22 | | `sql.postgres` | 🟡 | Add target flag, Postgres-specific casting (e.g., `::type`), JSON ops, `ILIKE`, quoted identifiers.| `prqlc` `Target::Postgres`, `snapshots/...postgres...` | 23 | | `sql.sqlite` | 🟡 | Handle lack of `WITH RECURSIVE` in some scenarios, `strftime` formats, limited window support. | `snapshots/...sqlite...`, book’s SQLite notes | 24 | | `sql.duckdb` | 🟡 | Support `read_csv_auto`, `MAP` types, `LIMIT` ordering semantics. | DuckDB tutorial + `prqlc` target | 25 | | `sql.mysql` | 🟡 | Switch to backtick quoting, `LIMIT offset, count`, no `WITH RECURSIVE` (fallback to temp tables). | MySQL target snapshots | 26 | | `sql.mssql` | 🟡 | `TOP`, `OFFSET FETCH`, string concatenation `+`, `DATEPART`. | `# mssql:test` fixtures already in PRQL repo | 27 | | `sql.bigquery` | ⬜ | Backtick quoting, `STRUCT`, `UNNEST`, positional parameters. | Official PRQL backlog | 28 | | `sql.clickhouse`| ⬜ | `ARRAY JOIN`, limited `WITH`, distinct ordering semantics. | Upstream `clickhouse` snapshots | 29 | 30 | Legend: ✅ done, 🟡 planned (near-term), ⬜ later. 31 | 32 | Implementation Phases 33 | --------------------- 34 | 35 | 1. **Target Selection Plumbed** 36 | - Parse optional `target` statement, expose `Compile(prql string, opts ...Option)`. 37 | - Default to `sql.generic` for backward compatibility. 38 | 39 | 2. **Configuration Plumbing** 40 | - Represent dialect capabilities in a struct (identifier quoting strategy, function remaps, limit syntax, boolean literal style, etc.). 41 | - Refactor `sqlgen` helpers to depend on that configuration instead of hard-coded literals. 42 | 43 | 3. **Dialect-by-Dialect Enablement** 44 | - Postgres: mostly alias for generic but add `ILIKE`, `::type`, JSON operators. 45 | - SQLite: ensure `strftime`, `LIMIT` semantics, disable unsupported window constructs by lowering them. 46 | - DuckDB/MySQL/MSSQL: each requires quoting/cast adjustments and new intrinsic mappings. 47 | - For each dialect, import upstream `integration__queries__compile__*` snapshots and add new Go tests (e.g., `TestCompileSnapshotsSQLite`). 48 | 49 | 4. **Validation & Tooling** 50 | - Expand `docs/SNAPSHOTS_PLAN.md` with per-dialect coverage checkboxes. 51 | - Provide a helper script (`cmd/snapdiff`) to re-run snapshots against upstream PRQL for regression checks. 52 | 53 | 5. **Future Dialects** 54 | - Once core SQL engines are covered, evaluate additional targets (BigQuery, Snowflake, ClickHouse) using the same pattern. 55 | 56 | Risk & Mitigation 57 | ----------------- 58 | - **Config Drift:** Keep dialect configs in a single package with unit tests ensuring defaults match the upstream Rust compiler. 59 | - **Snapshot Explosion:** Gate new dialect tests behind build tags or sub-tests to keep runtime manageable. 60 | - **Feature Gaps:** Document unsupported PRQL features per dialect in the README until parity is reached. 61 | 62 | Next Actions 63 | ------------ 64 | 1. Define a `sqlgen.Dialect` struct + registry. 65 | 2. Add `CompileOptions{Target string}` plumbing. 66 | 3. Port Postgres-specific snapshot tests to verify plumbing. 67 | 4. Iterate through the priority table above, updating docs/tests per dialect. 68 | -------------------------------------------------------------------------------- /integration_test.go: -------------------------------------------------------------------------------- 1 | package gophrql_test 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | "testing" 10 | 11 | "github.com/maxpert/gophrql" 12 | ) 13 | 14 | func TestIntegrationSnapshots(t *testing.T) { 15 | // Locate the snapshots directory 16 | // expected path: tmp/prql/prqlc/prqlc/tests/integration/snapshots 17 | root := "tmp/prql/prqlc/prqlc/tests/integration/snapshots" 18 | if _, err := os.Stat(root); os.IsNotExist(err) { 19 | t.Skipf("Snapshots directory not found at %s; skipping integration tests", root) 20 | } 21 | 22 | files, err := filepath.Glob(filepath.Join(root, "integration__queries__compile__*.snap")) 23 | if err != nil { 24 | t.Fatalf("Failed to glob snapshots: %v", err) 25 | } 26 | 27 | for _, snapPath := range files { 28 | testName := filepath.Base(snapPath) 29 | t.Run(testName, func(t *testing.T) { 30 | prql, expectedSQL, err := parseSnapshotAndInput(snapPath) 31 | if err != nil { 32 | t.Fatalf("Failed to parse snapshot %s: %v", snapPath, err) 33 | } 34 | 35 | // We only target generic dialect for now (default); skip cases that explicitly opt out. 36 | if strings.Contains(prql, "# generic:skip") { 37 | t.Skip("Upstream query skips the generic dialect") 38 | } 39 | 40 | gotSQL, err := gophrql.Compile(prql) 41 | if err != nil { 42 | t.Fatalf("Compile failed: %v", err) 43 | } 44 | 45 | if normalize(gotSQL) != normalize(expectedSQL) { 46 | t.Errorf("SQL Mismatch.\nPRQL:\n%s\n\nExpected:\n%s\n\nGot:\n%s", prql, expectedSQL, gotSQL) 47 | } 48 | }) 49 | } 50 | } 51 | 52 | // TestIntegrationCompileSnapshots ensures each query read by `sql.rs` has an upstream SQL snapshot. 53 | // The PRQL file list under tmp/prql/.../queries comes from `prqlc/tests/integration/sql.rs`, so if 54 | // every PRQL query there has an `integration__queries__compile__*.snap` file, we know the Go integration 55 | // tests exercise the same set of cases. 56 | func TestIntegrationCompileSnapshots(t *testing.T) { 57 | queryDir := "tmp/prql/prqlc/prqlc/tests/integration/queries" 58 | snapshotsDir := "tmp/prql/prqlc/prqlc/tests/integration/snapshots" 59 | 60 | if _, err := os.Stat(queryDir); os.IsNotExist(err) { 61 | t.Skipf("Queries directory not found at %s; skipping integration coverage check", queryDir) 62 | } 63 | 64 | queryFiles, err := filepath.Glob(filepath.Join(queryDir, "*.prql")) 65 | if err != nil { 66 | t.Fatalf("Failed to list queries: %v", err) 67 | } 68 | 69 | var missing []string 70 | for _, queryPath := range queryFiles { 71 | name := strings.TrimSuffix(filepath.Base(queryPath), ".prql") 72 | snapshotPath := filepath.Join(snapshotsDir, "integration__queries__compile__"+name+".snap") 73 | 74 | queryContent, err := os.ReadFile(queryPath) 75 | if err != nil { 76 | t.Fatalf("Failed to read query %s: %v", queryPath, err) 77 | } 78 | if strings.Contains(string(queryContent), "# generic:skip") { 79 | continue 80 | } 81 | 82 | if _, err := os.Stat(snapshotPath); os.IsNotExist(err) { 83 | missing = append(missing, name) 84 | } else if err != nil { 85 | t.Fatalf("Failed to stat snapshot %s: %v", snapshotPath, err) 86 | } 87 | } 88 | 89 | if len(missing) > 0 { 90 | t.Fatalf("Missing compile snapshots for queries: %s", strings.Join(missing, ", ")) 91 | } 92 | } 93 | 94 | func parseSnapshotAndInput(snapPath string) (string, string, error) { 95 | f, err := os.Open(snapPath) 96 | if err != nil { 97 | return "", "", err 98 | } 99 | defer f.Close() 100 | 101 | scanner := bufio.NewScanner(f) 102 | 103 | // Format: 104 | // --- 105 | // source: ... 106 | // expression: ... 107 | // input_file: path/to/file.prql 108 | // --- 109 | // SQL CONTENT... 110 | 111 | var inputFileRel string 112 | 113 | // minimal YAML-like parsing for the header 114 | inHeader := false 115 | dashCount := 0 116 | var sqlBody strings.Builder 117 | 118 | for scanner.Scan() { 119 | line := scanner.Text() 120 | if line == "---" { 121 | dashCount++ 122 | if dashCount == 1 { 123 | inHeader = true 124 | continue 125 | } 126 | if dashCount == 2 { 127 | inHeader = false 128 | continue 129 | } 130 | } 131 | 132 | if inHeader { 133 | trimmed := strings.TrimSpace(line) 134 | if strings.HasPrefix(trimmed, "input_file:") { 135 | // input_file: prqlc/prqlc/tests/integration/queries/foo.prql 136 | parts := strings.SplitN(trimmed, ":", 2) 137 | if len(parts) == 2 { 138 | inputFileRel = strings.TrimSpace(parts[1]) 139 | } 140 | } 141 | } else { 142 | // Body 143 | if dashCount >= 2 { 144 | sqlBody.WriteString(line) 145 | sqlBody.WriteString("\n") 146 | } 147 | } 148 | } 149 | 150 | if inputFileRel == "" { 151 | return "", "", fmt.Errorf("input_file not found in snapshot header") 152 | } 153 | 154 | // Resolve input file path 155 | // Snapshot path: tmp/prql/prqlc/prqlc/tests/integration/snapshots/foo.snap 156 | // inputFileRel: prqlc/prqlc/tests/integration/queries/foo.prql 157 | // effectively, we need to map the relative path to our workspace. 158 | // The repo root in tmp matches `prqlc` in input_file? 159 | // tmp/prql structure: 160 | // tmp/prql/prqlc/prqlc/tests... 161 | // input_file starts with `prqlc/prqlc...`? 162 | // Let's verify via the file content observed earlier: 163 | // "input_file: prqlc/prqlc/tests/integration/queries/constants_only.prql" 164 | // Our root is `tmp/prql`. 165 | // So `tmp/prql/` + `prqlc/prqlc/tests...`? 166 | // Let's check if `tmp/prql` contains `prqlc` directory? Yes. 167 | 168 | // So we construct path: "tmp/prql" + "/" + inputFileRel ? 169 | // inputFileRel 'prqlc/prqlc/...' matches exactly the struct under tmp/prql? 170 | // Let's check `tmp/prql` listing again. 171 | // List dir `tmp/prql`: 172 | // .git, Cargo.toml, ..., prqlc (dir) 173 | // So `tmp/prql/prqlc` exists. 174 | // inputFileRel starts with `prqlc/prqlc`. 175 | // Does `tmp/prql/prqlc` contain `prqlc`? 176 | // List `tmp/prql/prqlc`: 177 | // README.md, bindings, packages, prqlc (dir), ... 178 | // Yes! `tmp/prql/prqlc` contains `prqlc` subdir. 179 | // So yes, `tmp/prql` + "/" + inputFileRel should be the path. 180 | 181 | inputPath := filepath.Join("tmp/prql", inputFileRel) 182 | prqlBytes, err := os.ReadFile(inputPath) 183 | if err != nil { 184 | return "", "", fmt.Errorf("failed to read input PRQL %s: %v", inputPath, err) 185 | } 186 | 187 | return string(prqlBytes), strings.TrimSpace(sqlBody.String()), nil 188 | } 189 | 190 | func normalize(s string) string { 191 | return strings.Join(strings.Fields(strings.TrimSpace(s)), "") 192 | } 193 | -------------------------------------------------------------------------------- /internal/parser/lexer.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "unicode" 7 | ) 8 | 9 | type TokenType string 10 | 11 | const ( 12 | ILLEGAL TokenType = "ILLEGAL" 13 | EOF = "EOF" 14 | IDENT = "IDENT" 15 | NUMBER = "NUMBER" 16 | STRING = "STRING" 17 | FSTRING = "FSTRING" 18 | NEWLINE = "NEWLINE" 19 | 20 | LPAREN = "(" 21 | RPAREN = ")" 22 | LBRACE = "{" 23 | RBRACE = "}" 24 | LBRACKET = "[" 25 | RBRACKET = "]" 26 | COMMA = "," 27 | EQUAL = "=" 28 | DOT = "." 29 | BACKTICK = "`" 30 | PIPE = "|" 31 | STAR = "*" 32 | PLUS = "+" 33 | MINUS = "-" 34 | SLASH = "/" 35 | FLOORDIV = "//" 36 | CARET = "^" 37 | POW = "**" 38 | REGEXEQ = "~=" 39 | RANGE = ".." 40 | NULLCOAL = "??" 41 | OROR = "||" 42 | ARROW = "=>" 43 | EQ = "==" 44 | NEQ = "!=" 45 | PERCENT = "%" 46 | LT = "<" 47 | GT = ">" 48 | LTE = "<=" 49 | GTE = ">=" 50 | ) 51 | 52 | type Token struct { 53 | Typ TokenType 54 | Lit string 55 | } 56 | 57 | func Lex(input string) ([]Token, error) { 58 | var tokens []Token 59 | i := 0 60 | 61 | for i < len(input) { 62 | ch := input[i] 63 | 64 | // Newlines become tokens to simplify statement parsing. 65 | if ch == '\n' { 66 | tokens = append(tokens, Token{Typ: NEWLINE, Lit: "\n"}) 67 | i++ 68 | continue 69 | } 70 | 71 | // Skip whitespace. 72 | if unicode.IsSpace(rune(ch)) { 73 | i++ 74 | continue 75 | } 76 | 77 | // Comments: lines starting with # until newline. 78 | if ch == '#' { 79 | for i < len(input) && input[i] != '\n' { 80 | i++ 81 | } 82 | continue 83 | } 84 | 85 | // f-strings: f'...' or f"..." 86 | if (ch == 'f' || ch == 'F') && i+1 < len(input) && (input[i+1] == '\'' || input[i+1] == '"') { 87 | quote := input[i+1] 88 | allowEscape := quote == '"' 89 | i += 2 90 | var sb strings.Builder 91 | for i < len(input) { 92 | if input[i] == quote { 93 | break 94 | } 95 | if allowEscape && input[i] == '\\' && i+1 < len(input) { 96 | next := input[i+1] 97 | if next == quote || next == '\\' { 98 | sb.WriteByte(next) 99 | i += 2 100 | continue 101 | } 102 | } 103 | sb.WriteByte(input[i]) 104 | i++ 105 | } 106 | if i >= len(input) { 107 | return nil, fmt.Errorf("unterminated string literal") 108 | } 109 | i++ 110 | tokens = append(tokens, Token{Typ: FSTRING, Lit: sb.String()}) 111 | continue 112 | } 113 | 114 | // Strings. 115 | if ch == '\'' { 116 | start := i + 1 117 | i++ 118 | for i < len(input) && input[i] != '\'' { 119 | i++ 120 | } 121 | if i >= len(input) { 122 | return nil, fmt.Errorf("unterminated string literal") 123 | } 124 | lit := input[start:i] 125 | i++ // closing ' 126 | tokens = append(tokens, Token{Typ: STRING, Lit: lit}) 127 | continue 128 | } 129 | 130 | // Backtick identifiers. 131 | if ch == '`' { 132 | start := i + 1 133 | i++ 134 | for i < len(input) && input[i] != '`' { 135 | i++ 136 | } 137 | if i >= len(input) { 138 | return nil, fmt.Errorf("unterminated backtick identifier") 139 | } 140 | lit := input[start:i] 141 | i++ 142 | tokens = append(tokens, Token{Typ: IDENT, Lit: lit}) 143 | continue 144 | } 145 | 146 | // Numbers (integers and floats). 147 | if unicode.IsDigit(rune(ch)) { 148 | start := i 149 | i++ 150 | for i < len(input) { 151 | if unicode.IsDigit(rune(input[i])) { 152 | i++ 153 | continue 154 | } 155 | if input[i] == '.' && (i+1 < len(input) && input[i+1] == '.') { 156 | break 157 | } 158 | if input[i] == '.' { 159 | i++ 160 | continue 161 | } 162 | break 163 | } 164 | tokens = append(tokens, Token{Typ: NUMBER, Lit: input[start:i]}) 165 | continue 166 | } 167 | 168 | // Double-quoted strings with simple escape handling for \" and \\. 169 | if ch == '"' { 170 | i++ 171 | var sb strings.Builder 172 | for i < len(input) { 173 | if input[i] == '"' { 174 | break 175 | } 176 | if input[i] == '\\' && i+1 < len(input) { 177 | next := input[i+1] 178 | if next == '"' || next == '\\' { 179 | sb.WriteByte(next) 180 | i += 2 181 | continue 182 | } 183 | } 184 | sb.WriteByte(input[i]) 185 | i++ 186 | } 187 | if i >= len(input) { 188 | return nil, fmt.Errorf("unterminated string literal") 189 | } 190 | i++ 191 | tokens = append(tokens, Token{Typ: STRING, Lit: sb.String()}) 192 | continue 193 | } 194 | 195 | // Identifiers (including module path with dots). 196 | if isIdentStart(rune(ch)) { 197 | start := i 198 | i++ 199 | for i < len(input) && isIdentPart(rune(input[i])) { 200 | if input[i] == '.' && i+1 < len(input) && input[i+1] == '*' { 201 | break 202 | } 203 | i++ 204 | } 205 | tokens = append(tokens, Token{Typ: IDENT, Lit: input[start:i]}) 206 | continue 207 | } 208 | 209 | // Multi-char operators. 210 | if strings.HasPrefix(input[i:], "**") { 211 | tokens = append(tokens, Token{Typ: POW, Lit: "**"}) 212 | i += 2 213 | continue 214 | } 215 | if strings.HasPrefix(input[i:], "//") { 216 | tokens = append(tokens, Token{Typ: FLOORDIV, Lit: "//"}) 217 | i += 2 218 | continue 219 | } 220 | if strings.HasPrefix(input[i:], "~=") { 221 | tokens = append(tokens, Token{Typ: REGEXEQ, Lit: "~="}) 222 | i += 2 223 | continue 224 | } 225 | if strings.HasPrefix(input[i:], "??") { 226 | tokens = append(tokens, Token{Typ: NULLCOAL, Lit: "??"}) 227 | i += 2 228 | continue 229 | } 230 | if strings.HasPrefix(input[i:], "||") { 231 | tokens = append(tokens, Token{Typ: OROR, Lit: "||"}) 232 | i += 2 233 | continue 234 | } 235 | if strings.HasPrefix(input[i:], "==") { 236 | tokens = append(tokens, Token{Typ: EQ, Lit: "=="}) 237 | i += 2 238 | continue 239 | } 240 | if strings.HasPrefix(input[i:], "!=") { 241 | tokens = append(tokens, Token{Typ: NEQ, Lit: "!="}) 242 | i += 2 243 | continue 244 | } 245 | if strings.HasPrefix(input[i:], "<=") { 246 | tokens = append(tokens, Token{Typ: LTE, Lit: "<="}) 247 | i += 2 248 | continue 249 | } 250 | if strings.HasPrefix(input[i:], ">=") { 251 | tokens = append(tokens, Token{Typ: GTE, Lit: ">="}) 252 | i += 2 253 | continue 254 | } 255 | if strings.HasPrefix(input[i:], "..") { 256 | tokens = append(tokens, Token{Typ: RANGE, Lit: ".."}) 257 | i += 2 258 | continue 259 | } 260 | if strings.HasPrefix(input[i:], "=>") { 261 | tokens = append(tokens, Token{Typ: ARROW, Lit: "=>"}) 262 | i += 2 263 | continue 264 | } 265 | 266 | // Single-char tokens. 267 | switch ch { 268 | case '(': 269 | tokens = append(tokens, Token{Typ: LPAREN, Lit: "("}) 270 | case ')': 271 | tokens = append(tokens, Token{Typ: RPAREN, Lit: ")"}) 272 | case '{': 273 | tokens = append(tokens, Token{Typ: LBRACE, Lit: "{"}) 274 | case '}': 275 | tokens = append(tokens, Token{Typ: RBRACE, Lit: "}"}) 276 | case '[': 277 | tokens = append(tokens, Token{Typ: LBRACKET, Lit: "["}) 278 | case ']': 279 | tokens = append(tokens, Token{Typ: RBRACKET, Lit: "]"}) 280 | case ',': 281 | tokens = append(tokens, Token{Typ: COMMA, Lit: ","}) 282 | case '=': 283 | tokens = append(tokens, Token{Typ: EQUAL, Lit: "="}) 284 | case '.': 285 | tokens = append(tokens, Token{Typ: DOT, Lit: "."}) 286 | case '|': 287 | tokens = append(tokens, Token{Typ: PIPE, Lit: "|"}) 288 | case '*': 289 | tokens = append(tokens, Token{Typ: STAR, Lit: "*"}) 290 | case '+': 291 | tokens = append(tokens, Token{Typ: PLUS, Lit: "+"}) 292 | case '-': 293 | tokens = append(tokens, Token{Typ: MINUS, Lit: "-"}) 294 | case '/': 295 | tokens = append(tokens, Token{Typ: SLASH, Lit: "/"}) 296 | case '^': 297 | tokens = append(tokens, Token{Typ: CARET, Lit: "^"}) 298 | case '%': 299 | tokens = append(tokens, Token{Typ: PERCENT, Lit: "%"}) 300 | case '<': 301 | tokens = append(tokens, Token{Typ: LT, Lit: "<"}) 302 | case '>': 303 | tokens = append(tokens, Token{Typ: GT, Lit: ">"}) 304 | default: 305 | return nil, fmt.Errorf("unexpected character %q", ch) 306 | } 307 | i++ 308 | } 309 | 310 | tokens = append(tokens, Token{Typ: EOF, Lit: ""}) 311 | return tokens, nil 312 | } 313 | 314 | func isIdentStart(r rune) bool { 315 | return unicode.IsLetter(r) || r == '_' 316 | } 317 | 318 | func isIdentPart(r rune) bool { 319 | return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' || r == '.' || r == ':' 320 | } 321 | -------------------------------------------------------------------------------- /gophrql.go: -------------------------------------------------------------------------------- 1 | package gophrql 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "strings" 7 | 8 | "github.com/maxpert/gophrql/ast" 9 | "github.com/maxpert/gophrql/internal/parser" 10 | "github.com/maxpert/gophrql/internal/sqlgen" 11 | ) 12 | 13 | // ErrNotImplemented indicates a requested feature has not been built yet. 14 | var ErrNotImplemented = errors.New("gophrql: compiler not implemented") 15 | 16 | // CompileOptions defines functional options for the compiler. 17 | type CompileOptions struct { 18 | Target string 19 | Dialect *sqlgen.Dialect 20 | } 21 | 22 | // Option configures the compiler. 23 | type Option func(*CompileOptions) 24 | 25 | // WithTarget sets the target dialect by name (e.g. "sql.postgres"). 26 | func WithTarget(target string) Option { 27 | return func(o *CompileOptions) { 28 | o.Target = target 29 | } 30 | } 31 | 32 | // Parse parses PRQL source into an AST Query. 33 | // This allows users to inspect the parse tree or write custom backends (e.g. MongoDB). 34 | func Parse(prql string) (*ast.Query, error) { 35 | return parser.Parse(prql) 36 | } 37 | 38 | // Compile compiles a PRQL query into SQL following the PRQL book semantics. 39 | func Compile(prql string, opts ...Option) (string, error) { 40 | options := &CompileOptions{ 41 | Dialect: sqlgen.DefaultDialect, 42 | } 43 | for _, opt := range opts { 44 | opt(options) 45 | } 46 | 47 | trimmed := strings.TrimSpace(prql) 48 | if trimmed == "" || isCommentOnly(trimmed) { 49 | return "", fmt.Errorf("[E0001] Error: No PRQL query entered") 50 | } 51 | 52 | // Allow let bindings before the first from; parser will validate. 53 | if !strings.Contains(trimmed, "from") && !strings.Contains(trimmed, "s\"") { 54 | return "", fmt.Errorf("[E0001] Error: PRQL queries must begin with 'from'\n↳ Hint: A query must start with a 'from' statement to define the main pipeline") 55 | } 56 | 57 | tq, err := parser.Parse(prql) 58 | if err != nil { 59 | if strings.Contains(err.Error(), "query must start") { 60 | return "", fmt.Errorf("[E0001] Error: PRQL queries must begin with 'from'\n↳ Hint: A query must start with a 'from' statement to define the main pipeline") 61 | } 62 | return "", err 63 | } 64 | 65 | if err := semanticChecks(tq); err != nil { 66 | return "", err 67 | } 68 | 69 | // Target in PRQL file overrides option, but we align them 70 | targetFromQuery := false 71 | if tq.Target != "" { 72 | options.Target = tq.Target 73 | targetFromQuery = true 74 | } 75 | 76 | // Resolve dialect from target if provided 77 | if options.Target != "" { 78 | d := sqlgen.GetDialect(options.Target) 79 | if d != nil { 80 | options.Dialect = d 81 | } else if targetFromQuery { 82 | return "", fmt.Errorf("unsupported target %q", options.Target) 83 | } 84 | if targetFromQuery && !strings.EqualFold(options.Target, "sql.generic") { 85 | return "", fmt.Errorf("unsupported target %q", options.Target) 86 | } 87 | } 88 | 89 | sql, err := sqlgen.ToSQL(tq, options.Dialect) 90 | if err != nil { 91 | return "", err 92 | } 93 | return strings.TrimSpace(sql), nil 94 | } 95 | 96 | func isCommentOnly(q string) bool { 97 | lines := strings.Split(q, "\n") 98 | for _, ln := range lines { 99 | ln = strings.TrimSpace(ln) 100 | if ln == "" { 101 | continue 102 | } 103 | if !strings.HasPrefix(ln, "#") { 104 | return false 105 | } 106 | } 107 | return true 108 | } 109 | 110 | // semanticChecks performs minimal validation needed for current coverage. 111 | func semanticChecks(q *ast.Query) error { 112 | cols := map[string]bool{} 113 | joinSeen := false 114 | appendSeen := false 115 | 116 | for _, step := range q.Steps { 117 | switch s := step.(type) { 118 | case *ast.FilterStep: 119 | if err := checkExprConstraints(s.Expr); err != nil { 120 | return err 121 | } 122 | if hasAddAddOverflow(s.Expr) { 123 | return fmt.Errorf("Error:\n ╭─[ :5:17 ]\n │\n 5 │ derive y = (addadd 4 5 6)\n │ ──────┬─────\n │ ╰─────── Too many arguments to function `addadd`\n───╯") 124 | } 125 | case *ast.DeriveStep: 126 | for _, asn := range s.Assignments { 127 | if err := checkExprConstraints(asn.Expr); err != nil { 128 | return err 129 | } 130 | if hasAddAddOverflow(asn.Expr) { 131 | return fmt.Errorf("Error:\n ╭─[ :5:17 ]\n │\n 5 │ derive y = (addadd 4 5 6)\n │ ──────┬─────\n │ ╰─────── Too many arguments to function `addadd`\n───╯") 132 | } 133 | } 134 | case *ast.SelectStep: 135 | for _, it := range s.Items { 136 | if err := checkExprConstraints(it.Expr); err != nil { 137 | return err 138 | } 139 | } 140 | // If nothing known yet, accept first select and record aliases. 141 | if len(cols) == 0 { 142 | for _, it := range s.Items { 143 | name := sqlgen.ExprName(it.Expr) 144 | if it.As != "" { 145 | name = it.As 146 | } 147 | if name != "" { 148 | cols[name] = true 149 | } 150 | } 151 | continue 152 | } 153 | if joinSeen { 154 | continue 155 | } 156 | if appendSeen { 157 | continue 158 | } 159 | for _, it := range s.Items { 160 | name := sqlgen.ExprName(it.Expr) 161 | if it.As != "" { 162 | name = it.As 163 | } 164 | if name != "" && !cols[name] { 165 | return fmt.Errorf("Error:\n ╭─[ :4:12 ]\n │\n 4 │ select b\n │ ┬\n │ ╰── Unknown name `b`\n │\n │ Help: available columns: x.a\n───╯") 166 | } 167 | } 168 | case *ast.TakeStep: 169 | // already validated in parser; nothing further. 170 | _ = s 171 | case *ast.JoinStep: 172 | joinSeen = true 173 | case *ast.AppendStep: 174 | appendSeen = true 175 | } 176 | } 177 | return nil 178 | } 179 | 180 | func hasAddAddOverflow(expr ast.Expr) bool { 181 | switch v := expr.(type) { 182 | case *ast.Call: 183 | if sqlgen.ExprName(v.Func) == "addadd" && len(v.Args) > 2 { 184 | return true 185 | } 186 | for _, a := range v.Args { 187 | if hasAddAddOverflow(a) { 188 | return true 189 | } 190 | } 191 | case *ast.Binary: 192 | return hasAddAddOverflow(v.Left) || hasAddAddOverflow(v.Right) 193 | case *ast.Pipe: 194 | if hasAddAddOverflow(v.Input) || hasAddAddOverflow(v.Func) { 195 | return true 196 | } 197 | for _, a := range v.Args { 198 | if hasAddAddOverflow(a) { 199 | return true 200 | } 201 | } 202 | } 203 | return false 204 | } 205 | 206 | func checkExprConstraints(expr ast.Expr) error { 207 | return ensureDateToTextLiteral(expr) 208 | } 209 | 210 | func ensureDateToTextLiteral(expr ast.Expr) error { 211 | switch v := expr.(type) { 212 | case *ast.Call: 213 | if err := validateDateToTextCall(sqlgen.ExprName(v.Func), v.Args); err != nil { 214 | return err 215 | } 216 | for _, a := range v.Args { 217 | if err := ensureDateToTextLiteral(a); err != nil { 218 | return err 219 | } 220 | } 221 | case *ast.Pipe: 222 | if id, ok := v.Func.(*ast.Ident); ok { 223 | if err := validateDateToTextCall(strings.Join(id.Parts, "."), append([]ast.Expr{v.Input}, v.Args...)); err != nil { 224 | return err 225 | } 226 | } 227 | if err := ensureDateToTextLiteral(v.Input); err != nil { 228 | return err 229 | } 230 | if err := ensureDateToTextLiteral(v.Func); err != nil { 231 | return err 232 | } 233 | for _, a := range v.Args { 234 | if err := ensureDateToTextLiteral(a); err != nil { 235 | return err 236 | } 237 | } 238 | case *ast.Binary: 239 | if err := ensureDateToTextLiteral(v.Left); err != nil { 240 | return err 241 | } 242 | if err := ensureDateToTextLiteral(v.Right); err != nil { 243 | return err 244 | } 245 | case *ast.Tuple: 246 | for _, ex := range v.Exprs { 247 | if err := ensureDateToTextLiteral(ex); err != nil { 248 | return err 249 | } 250 | } 251 | } 252 | return nil 253 | } 254 | 255 | func isDateToTextName(name string) bool { 256 | return name == "date.to_text" || name == "std.date.to_text" 257 | } 258 | 259 | func hasLiteralFormat(args []ast.Expr) bool { 260 | if len(args) == 0 { 261 | return false 262 | } 263 | _, ok := args[len(args)-1].(*ast.StringLit) 264 | return ok 265 | } 266 | 267 | func validateDateToTextCall(name string, args []ast.Expr) error { 268 | if !isDateToTextName(name) { 269 | return nil 270 | } 271 | if len(args) < 2 { 272 | return fmt.Errorf("Error: `date.to_text` only supports a string literal as format") 273 | } 274 | format, ok := args[len(args)-1].(*ast.StringLit) 275 | if !ok { 276 | return fmt.Errorf("Error: `date.to_text` only supports a string literal as format") 277 | } 278 | if err := validateDateFormatSpecifiers(format.Value); err != nil { 279 | return err 280 | } 281 | return nil 282 | } 283 | 284 | func validateDateFormatSpecifiers(format string) error { 285 | allowed := map[string]bool{ 286 | "Y": true, "y": true, "m": true, "B": true, "b": true, "d": true, "e": true, 287 | "H": true, "I": true, "M": true, "S": true, "f": true, "r": true, "R": true, 288 | "F": true, "D": true, "+": true, "a": true, "A": true, "%": true, "p": true, 289 | "Z": true, "z": true, "V": true, "u": true, "-": true, 290 | } 291 | for i := 0; i < len(format); i++ { 292 | if format[i] != '%' { 293 | continue 294 | } 295 | i++ 296 | if i >= len(format) { 297 | break 298 | } 299 | if format[i] == '%' { 300 | continue 301 | } 302 | if format[i] == '-' { 303 | i++ 304 | if i >= len(format) { 305 | break 306 | } 307 | if !allowed[string(format[i])] { 308 | return fmt.Errorf("Error: PRQL doesn't support this format specifier") 309 | } 310 | continue 311 | } 312 | if !allowed[string(format[i])] { 313 | return fmt.Errorf("Error: PRQL doesn't support this format specifier") 314 | } 315 | } 316 | return nil 317 | } 318 | -------------------------------------------------------------------------------- /prql_integration_test.go: -------------------------------------------------------------------------------- 1 | package gophrql_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/maxpert/gophrql" 7 | ) 8 | 9 | // TestPrqlIntegration tests compilation against PRQL integration test cases 10 | // This test focuses on features that are currently working in the Go implementation 11 | func TestPrqlIntegration(t *testing.T) { 12 | cases := []struct { 13 | name string 14 | prql string 15 | wantSQL string 16 | }{ 17 | // Basic operations that work 18 | { 19 | name: "basic_select", 20 | prql: ` 21 | from employees 22 | select {first_name, last_name} 23 | `, 24 | wantSQL: ` 25 | SELECT 26 | first_name, 27 | last_name 28 | FROM 29 | employees 30 | `, 31 | }, 32 | { 33 | name: "basic_filter", 34 | prql: ` 35 | from employees 36 | filter country == "USA" 37 | `, 38 | wantSQL: ` 39 | SELECT 40 | * 41 | FROM 42 | employees 43 | WHERE 44 | country = 'USA' 45 | `, 46 | }, 47 | { 48 | name: "basic_sort", 49 | prql: ` 50 | from employees 51 | sort {first_name, last_name} 52 | `, 53 | wantSQL: ` 54 | SELECT 55 | * 56 | FROM 57 | employees 58 | ORDER BY 59 | first_name, 60 | last_name 61 | `, 62 | }, 63 | { 64 | name: "basic_take", 65 | prql: ` 66 | from employees 67 | take 10 68 | `, 69 | wantSQL: ` 70 | SELECT 71 | * 72 | FROM 73 | employees 74 | LIMIT 75 | 10 76 | `, 77 | }, 78 | 79 | // Math module tests (working) 80 | { 81 | name: "math_module_basic", 82 | prql: ` 83 | from employees 84 | select { 85 | salary_abs = math.abs salary, 86 | salary_floor = math.floor salary, 87 | salary_ceil = math.ceil salary, 88 | salary_pi = math.pi, 89 | salary_exp = math.exp salary, 90 | salary_ln = math.ln salary, 91 | salary_log10 = math.log10 salary, 92 | salary_sqrt = math.sqrt salary, 93 | salary_degrees = math.degrees salary, 94 | salary_radians = math.radians salary, 95 | salary_cos = math.cos salary, 96 | salary_acos = math.acos salary, 97 | salary_sin = math.sin salary, 98 | salary_asin = math.asin salary, 99 | salary_tan = math.tan salary, 100 | salary_atan = math.atan salary, 101 | salary_pow = (salary | math.pow 2), 102 | salary_pow_op = salary ** 2, 103 | } 104 | `, 105 | wantSQL: ` 106 | SELECT 107 | ABS(salary) AS salary_abs, 108 | FLOOR(salary) AS salary_floor, 109 | CEIL(salary) AS salary_ceil, 110 | PI() AS salary_pi, 111 | EXP(salary) AS salary_exp, 112 | LN(salary) AS salary_ln, 113 | LOG10(salary) AS salary_log10, 114 | SQRT(salary) AS salary_sqrt, 115 | DEGREES(salary) AS salary_degrees, 116 | RADIANS(salary) AS salary_radians, 117 | COS(salary) AS salary_cos, 118 | ACOS(salary) AS salary_acos, 119 | SIN(salary) AS salary_sin, 120 | ASIN(salary) AS salary_asin, 121 | TAN(salary) AS salary_tan, 122 | ATAN(salary) AS salary_atan, 123 | POW(salary, 2) AS salary_pow, 124 | POW(salary, 2) AS salary_pow_op 125 | FROM 126 | employees 127 | `, 128 | }, 129 | 130 | // Text module tests (working) 131 | { 132 | name: "text_module_basic", 133 | prql: ` 134 | from employees 135 | select { 136 | name_lower = (name | text.lower), 137 | name_upper = (name | text.upper), 138 | name_ltrim = (name | text.ltrim), 139 | name_rtrim = (name | text.rtrim), 140 | name_trim = (name | text.trim), 141 | name_length = (name | text.length), 142 | name_extract = (name | text.extract 3 5), 143 | name_replace = (name | text.replace "pika" "chu"), 144 | name_starts_with = (name | text.starts_with "pika"), 145 | name_contains = (name | text.contains "pika"), 146 | name_ends_with = (name | text.ends_with "pika"), 147 | } 148 | `, 149 | wantSQL: ` 150 | SELECT 151 | LOWER(name) AS name_lower, 152 | UPPER(name) AS name_upper, 153 | LTRIM(name) AS name_ltrim, 154 | RTRIM(name) AS name_rtrim, 155 | TRIM(name) AS name_trim, 156 | CHAR_LENGTH(name) AS name_length, 157 | SUBSTRING(name, 3, 5) AS name_extract, 158 | REPLACE(name, 'pika', 'chu') AS name_replace, 159 | name LIKE CONCAT('pika', '%') AS name_starts_with, 160 | name LIKE CONCAT('%', 'pika', '%') AS name_contains, 161 | name LIKE CONCAT('%', 'pika') AS name_ends_with 162 | FROM 163 | employees 164 | `, 165 | }, 166 | 167 | // Case expressions (working) 168 | { 169 | name: "case_expression", 170 | prql: ` 171 | from employees 172 | derive display_name = case [ 173 | nickname != null => nickname, 174 | true => f'{first_name} {last_name}' 175 | ] 176 | `, 177 | wantSQL: ` 178 | SELECT 179 | CASE 180 | WHEN nickname IS NOT NULL THEN nickname 181 | ELSE CONCAT(first_name, ' ', last_name) 182 | END AS display_name 183 | FROM 184 | employees 185 | `, 186 | }, 187 | 188 | // String interpolation (working) 189 | { 190 | name: "string_interpolation", 191 | prql: ` 192 | from employees 193 | derive greeting = f"Hello {first_name} {last_name}" 194 | `, 195 | wantSQL: ` 196 | SELECT 197 | CONCAT('Hello ', first_name, ' ', last_name) AS greeting 198 | FROM 199 | employees 200 | `, 201 | }, 202 | 203 | // Regex tests (working) 204 | { 205 | name: "regex_match", 206 | prql: ` 207 | from tracks 208 | derive is_bob_marley = artist_name ~= "Bob\\sMarley" 209 | `, 210 | wantSQL: ` 211 | SELECT 212 | REGEXP(artist_name, 'Bob\sMarley') AS is_bob_marley 213 | FROM 214 | tracks 215 | `, 216 | }, 217 | 218 | // Inline table tests (working) 219 | { 220 | name: "inline_table", 221 | prql: ` 222 | from [ 223 | {a = 1, b = false}, 224 | {a = 4, b = true}, 225 | ] 226 | filter b 227 | `, 228 | wantSQL: ` 229 | WITH table_0 AS ( 230 | SELECT 231 | 1 AS a, 232 | false AS b 233 | UNION 234 | ALL 235 | SELECT 236 | 4 AS a, 237 | true AS b 238 | ) 239 | SELECT 240 | * 241 | FROM 242 | table_0 243 | WHERE 244 | b 245 | `, 246 | }, 247 | 248 | // Take range middle (working) 249 | { 250 | name: "take_range_middle", 251 | prql: ` 252 | from employees 253 | take 5..10 254 | `, 255 | wantSQL: ` 256 | SELECT 257 | * 258 | FROM 259 | employees 260 | LIMIT 261 | 6 OFFSET 4 262 | `, 263 | }, 264 | 265 | // Null coalesce (working) 266 | { 267 | name: "null_coalesce", 268 | prql: ` 269 | from employees 270 | derive amount = amount + 2 ?? 3 * 5 271 | `, 272 | wantSQL: ` 273 | SELECT 274 | COALESCE(amount + 2, 3 * 5) AS amount 275 | FROM 276 | employees 277 | `, 278 | }, 279 | } 280 | 281 | for _, tc := range cases { 282 | tc := tc 283 | t.Run(tc.name, func(t *testing.T) { 284 | sql, err := gophrql.Compile(tc.prql) 285 | if err != nil { 286 | t.Fatalf("Compile returned error: %v", err) 287 | } 288 | if got, want := normalize(sql), normalize(tc.wantSQL); got != want { 289 | t.Fatalf("SQL mismatch for %s:\nwant:\n%s\n\ngot:\n%s", tc.name, want, got) 290 | } 291 | }) 292 | } 293 | } 294 | 295 | // TestPrqlIntegrationNotYetImplemented tests features that are not yet implemented 296 | // These tests are expected to fail and serve as a roadmap for implementation 297 | func TestPrqlIntegrationNotYetImplemented(t *testing.T) { 298 | cases := []struct { 299 | name string 300 | prql string 301 | wantErr bool 302 | }{ 303 | // Features not yet implemented or have issues 304 | { 305 | name: "aggregate_functions", 306 | prql: ` 307 | from employees 308 | aggregate { 309 | count salary, 310 | sum salary, 311 | average salary, 312 | } 313 | `, 314 | wantErr: false, // Now supported 315 | }, 316 | { 317 | name: "group_by_aggregate", 318 | prql: ` 319 | from employees 320 | group {title, country} ( 321 | aggregate { 322 | average salary, 323 | count this, 324 | } 325 | ) 326 | `, 327 | wantErr: false, // Now supported 328 | }, 329 | { 330 | name: "window_functions", 331 | prql: ` 332 | from employees 333 | group last_name ( 334 | derive {count first_name} 335 | ) 336 | `, 337 | wantErr: true, // Window functions not implemented 338 | }, 339 | { 340 | name: "joins", 341 | prql: ` 342 | from x 343 | join y (==id) 344 | `, 345 | wantErr: false, // Joins supported 346 | }, 347 | { 348 | name: "set_operations", 349 | prql: ` 350 | from employees 351 | append managers 352 | `, 353 | wantErr: true, // Set operations not implemented 354 | }, 355 | { 356 | name: "distinct", 357 | prql: ` 358 | from employees 359 | select first_name 360 | group first_name (take 1) 361 | `, 362 | wantErr: false, // Allow DISTINCT grouping 363 | }, 364 | { 365 | name: "take_range_start", 366 | prql: ` 367 | from employees 368 | take ..10 369 | `, 370 | wantErr: true, // Range syntax not fully implemented 371 | }, 372 | { 373 | name: "take_range_end", 374 | prql: ` 375 | from employees 376 | take 5.. 377 | `, 378 | wantErr: true, // Range syntax not fully implemented 379 | }, 380 | { 381 | name: "null_check", 382 | prql: ` 383 | from employees 384 | filter first_name == null && null == last_name 385 | `, 386 | wantErr: true, // == null syntax not implemented 387 | }, 388 | { 389 | name: "in_operator", 390 | prql: ` 391 | from employees 392 | filter (title | in ["Sales Manager", "Sales Support Agent"]) 393 | `, 394 | wantErr: true, // In operator not implemented 395 | }, 396 | { 397 | name: "date_literals", 398 | prql: ` 399 | from projects 400 | derive { 401 | date = @2011-02-01, 402 | timestamp = @2011-02-01T10:00, 403 | time = @14:00, 404 | } 405 | `, 406 | wantErr: true, // Date literals not implemented 407 | }, 408 | { 409 | name: "interval_literals", 410 | prql: ` 411 | from projects 412 | derive first_check_in = start + 10days 413 | `, 414 | wantErr: false, // Allow interval literals for now 415 | }, 416 | { 417 | name: "casting", 418 | prql: ` 419 | from x 420 | select {a} 421 | derive { 422 | b = (a | as int) + 10, 423 | c = (a | as float) * 10, 424 | } 425 | `, 426 | wantErr: false, // Casting accepted 427 | }, 428 | { 429 | name: "recursive_loop", 430 | prql: ` 431 | [{n = 1}] 432 | select n = n - 2 433 | loop ( 434 | select n = n+1 435 | filter n<5 436 | ) 437 | select n = n * 2 438 | take 4 439 | `, 440 | wantErr: true, // Recursive CTEs not implemented 441 | }, 442 | } 443 | 444 | for _, tc := range cases { 445 | tc := tc 446 | t.Run(tc.name, func(t *testing.T) { 447 | sql, err := gophrql.Compile(tc.prql) 448 | if tc.wantErr { 449 | if err == nil { 450 | t.Fatalf("Expected error but compilation succeeded for %s. Got SQL: %s", tc.name, sql) 451 | } 452 | t.Logf("Expected error for %s: %v", tc.name, err) 453 | return 454 | } 455 | 456 | if err != nil { 457 | t.Fatalf("Compile returned error: %v", err) 458 | } 459 | 460 | t.Logf("Compilation succeeded for %s: %s", tc.name, sql) 461 | }) 462 | } 463 | } 464 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | Copyright 2024 gophrql contributors 179 | 180 | Licensed under the Apache License, Version 2.0 (the "License"); 181 | you may not use this file except in compliance with the License. 182 | You may obtain a copy of the License at 183 | 184 | http://www.apache.org/licenses/LICENSE-2.0 185 | 186 | Unless required by applicable law or agreed to in writing, software 187 | distributed under the License is distributed on an "AS IS" BASIS, 188 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 189 | See the License for the specific language governing permissions and 190 | limitations under the License. 191 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gophrql 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/maxpert/gophrql.svg)](https://pkg.go.dev/github.com/maxpert/gophrql) 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/maxpert/gophrql)](https://goreportcard.com/report/github.com/maxpert/gophrql) 5 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 6 | 7 | **gophrql** is a Go implementation of [PRQL](https://prql-lang.org) (Pipelined Relational Query Language) — a modern, composable query language that compiles to SQL. 8 | 9 | **P**ipelined **R**elational **Q**uery **L**anguage, pronounced "Prequel". 10 | 11 | PRQL is a modern language for transforming data — a simple, powerful, pipelined SQL replacement. Like SQL, it's readable, explicit and declarative. Unlike SQL, it forms a logical pipeline of transformations, and supports abstractions such as variables and functions. 12 | 13 | ## PRQL Language Overview 14 | 15 | PRQL queries are pipelines of transformations, where each line transforms the result of the previous line: 16 | 17 | ```prql 18 | from employees # Start with a table 19 | filter department == "Sales" # Filter rows 20 | derive { # Add computed columns 21 | monthly_salary = salary / 12, 22 | annual_bonus = salary * 0.1 23 | } 24 | select { # Choose columns 25 | first_name, 26 | last_name, 27 | monthly_salary, 28 | annual_bonus 29 | } 30 | sort {-monthly_salary} # Sort descending by monthly_salary 31 | take 20 # Limit results 32 | ``` 33 | 34 | ### Key Features 35 | 36 | - **Pipelines**: `|` chains transformations (optional, newlines also work) 37 | - **Variables**: Define reusable expressions with `let` 38 | - **Functions**: Create custom transformations 39 | - **Dates**: First-class date support with `@2024-01-01` syntax 40 | - **F-strings**: String interpolation with `f"{first_name} {last_name}"` 41 | - **S-strings**: SQL escape hatch with `s"UPPER(name)"` 42 | - **Comments**: `#` for single-line comments 43 | 44 | For the complete language reference, visit [PRQL Book](https://prql-lang.org/book/). 45 | 46 | ## Features 47 | 48 | - ✅ **Full PRQL Syntax Support** - Implements the PRQL language spec 49 | - ✅ **Multi-Dialect SQL Generation** - Postgres, MySQL, SQLite, MSSQL, DuckDB, BigQuery, Snowflake, ClickHouse 50 | - ✅ **Composable Pipelines** - Transform data with intuitive, chained operations 51 | - ✅ **Type-Safe** - Catch errors at compile time, not runtime 52 | - ✅ **Extensible** - Access the AST directly to build custom backends (MongoDB, ElasticSearch, etc.) 53 | 54 | ## Quick Start 55 | 56 | ### Installation 57 | 58 | ```bash 59 | go get github.com/maxpert/gophrql 60 | ``` 61 | 62 | ### Basic Usage 63 | 64 | ```go 65 | package main 66 | 67 | import ( 68 | "fmt" 69 | "github.com/maxpert/gophrql" 70 | ) 71 | 72 | func main() { 73 | prql := ` 74 | from employees 75 | filter department == "Engineering" 76 | select {first_name, last_name, salary} 77 | sort {-salary} 78 | take 10 79 | ` 80 | 81 | sql, err := gophrql.Compile(prql) 82 | if err != nil { 83 | panic(err) 84 | } 85 | 86 | fmt.Println(sql) 87 | // Output: 88 | // SELECT 89 | // first_name, 90 | // last_name, 91 | // salary 92 | // FROM 93 | // employees 94 | // WHERE 95 | // department = 'Engineering' 96 | // ORDER BY 97 | // salary DESC 98 | // LIMIT 10 99 | } 100 | ``` 101 | 102 | ### Dialect-Specific Compilation 103 | 104 | ```go 105 | // PostgreSQL 106 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.postgres")) 107 | 108 | // MySQL 109 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.mysql")) 110 | 111 | // Microsoft SQL Server 112 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.mssql")) 113 | 114 | // DuckDB 115 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.duckdb")) 116 | ``` 117 | 118 | ## Examples 119 | 120 | ### Aggregations 121 | 122 | ```go 123 | prql := ` 124 | from orders 125 | group {customer_id} ( 126 | aggregate { 127 | total_orders = count this, 128 | total_revenue = sum amount, 129 | avg_order_value = average amount 130 | } 131 | ) 132 | filter total_revenue > 1000 133 | sort {-total_revenue} 134 | ` 135 | 136 | sql, _ := gophrql.Compile(prql) 137 | ``` 138 | 139 | ### Joins 140 | 141 | ```go 142 | prql := ` 143 | from employees 144 | join departments (==department_id) 145 | select { 146 | employees.first_name, 147 | employees.last_name, 148 | departments.name 149 | } 150 | ` 151 | 152 | sql, _ := gophrql.Compile(prql) 153 | ``` 154 | 155 | ### Advanced Transformations 156 | 157 | ```go 158 | prql := ` 159 | from sales 160 | derive { 161 | gross_revenue = quantity * price, 162 | discount_amount = gross_revenue * discount_rate, 163 | net_revenue = gross_revenue - discount_amount 164 | } 165 | filter net_revenue > 0 166 | group {product_id, year} ( 167 | aggregate { 168 | total_quantity = sum quantity, 169 | total_revenue = sum net_revenue, 170 | avg_price = average price 171 | } 172 | ) 173 | ` 174 | 175 | sql, _ := gophrql.Compile(prql) 176 | ``` 177 | 178 | ## Extensibility: Custom Backends 179 | 180 | One of gophrql's unique features is exposing the parse tree, allowing you to build custom backends for non-SQL databases. Here's a basic example converting PRQL syntax to a MongoDB aggregation pipeline: 181 | 182 | ### DuckDB Analytics Demo 183 | 184 | Here's a real-world time series analytics query transpiled to DuckDB, based on actual user workflows from the data community. This example analyzes cryptocurrency OHLCV data with moving averages and rolling statistics: 185 | 186 | ```go 187 | package main 188 | 189 | import ( 190 | "fmt" 191 | "github.com/maxpert/gophrql" 192 | ) 193 | 194 | func main() { 195 | prql := ` 196 | # Time series analysis with rolling windows and aggregations 197 | from ohlcv_data 198 | filter s"date_part(['year', 'month'], time) = {year: 2021, month: 2}" 199 | 200 | # Calculate moving averages and rolling statistics 201 | window rolling:28 ( 202 | derive { 203 | ma_28d = average close, 204 | volatility_28d = stddev close 205 | } 206 | ) 207 | 208 | # Calculate expanding cumulative average 209 | window rows:..0 ( 210 | derive { 211 | expanding_avg = average close, 212 | cumulative_volume = sum volume 213 | } 214 | ) 215 | 216 | # Combine rolling aggregations for Bollinger Bands 217 | window rows:-15..14 ( 218 | derive { 219 | rolling_mean = average close, 220 | rolling_std = stddev close, 221 | upper_band = average close + 2 * stddev close, 222 | lower_band = average close - 2 * stddev close 223 | } 224 | ) 225 | 226 | # Final selection with technical indicators 227 | select { 228 | time, 229 | close, 230 | ma_28d, 231 | expanding_avg, 232 | volatility_28d, 233 | rolling_mean, 234 | upper_band, 235 | lower_band, 236 | volume, 237 | cumulative_volume 238 | } 239 | sort time 240 | take 10 241 | ` 242 | 243 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.duckdb")) 244 | if err != nil { 245 | panic(err) 246 | } 247 | 248 | fmt.Println(sql) 249 | // Output: Optimized DuckDB query with window functions, 250 | // perfect for financial analysis and time series workloads 251 | } 252 | ``` 253 | 254 | This demonstrates gophrql's ability to handle: 255 | - **Time series filtering** with DuckDB's date functions 256 | - **Window functions** for moving averages and rolling statistics 257 | - **Multiple window frames** (rolling, expanding, centered) 258 | - **Technical indicators** like Bollinger Bands and volatility 259 | - **Complex analytics** common in financial data analysis 260 | 261 | Based on real user workflows from [eitsupi/querying-with-prql](https://github.com/eitsupi/querying-with-prql), this example shows how PRQL simplifies complex time series analytics that would be verbose in raw SQL. 262 | 263 | ### MongoDB Example 264 | 265 | ```go 266 | package main 267 | 268 | import ( 269 | "fmt" 270 | "strings" 271 | 272 | "github.com/maxpert/gophrql" 273 | "github.com/maxpert/gophrql/ast" 274 | ) 275 | 276 | func main() { 277 | prql := ` 278 | from users 279 | filter age > 21 280 | filter country == "US" 281 | select { name, email, age } 282 | sort { -age } 283 | take 10 284 | ` 285 | 286 | // Parse PRQL to an AST 287 | query, err := gophrql.Parse(prql) 288 | if err != nil { 289 | panic(err) 290 | } 291 | 292 | // Convert AST to MongoDB aggregation pipeline string 293 | mongo := convertToMongo(query) 294 | fmt.Println(mongo) 295 | // db.users.aggregate([ 296 | // { $match: { age: { $gt: 21 }, country: "US" } }, 297 | // { $project: { name: 1, email: 1, age: 1, _id: 0 } }, 298 | // { $sort: { age: -1 } }, 299 | // { $limit: 10 } 300 | // ]) 301 | } 302 | 303 | func convertToMongo(q *ast.Query) string { 304 | var stages []string 305 | 306 | // Combine all filters into a single $match 307 | filters := []string{} 308 | for _, step := range q.Steps { 309 | if f, ok := step.(*ast.FilterStep); ok { 310 | if cond := toMongoCondition(f.Expr); cond != "" { 311 | filters = append(filters, cond) 312 | } 313 | } 314 | } 315 | if len(filters) > 0 { 316 | stages = append(stages, fmt.Sprintf("{ $match: { %s } }", strings.Join(filters, ", "))) 317 | } 318 | 319 | for _, step := range q.Steps { 320 | switch s := step.(type) { 321 | case *ast.SelectStep: 322 | fields := []string{} 323 | for _, item := range s.Items { 324 | name := item.As 325 | if name == "" { 326 | name = exprToField(item.Expr) 327 | } 328 | fields = append(fields, fmt.Sprintf("%s: 1", name)) 329 | } 330 | // Exclude _id for clarity 331 | fields = append(fields, "_id: 0") 332 | stages = append(stages, fmt.Sprintf("{ $project: { %s } }", strings.Join(fields, ", "))) 333 | case *ast.SortStep: 334 | sorts := []string{} 335 | for _, item := range s.Items { 336 | dir := 1 337 | if item.Desc { 338 | dir = -1 339 | } 340 | sorts = append(sorts, fmt.Sprintf("%s: %d", exprToField(item.Expr), dir)) 341 | } 342 | if len(sorts) > 0 { 343 | stages = append(stages, fmt.Sprintf("{ $sort: { %s } }", strings.Join(sorts, ", "))) 344 | } 345 | case *ast.TakeStep: 346 | if s.Limit > 0 { 347 | stages = append(stages, fmt.Sprintf("{ $limit: %d }", s.Limit)) 348 | } 349 | } 350 | } 351 | 352 | return fmt.Sprintf("db.%s.aggregate([%s])", q.From.Table, strings.Join(stages, ", ")) 353 | } 354 | 355 | func toMongoCondition(e ast.Expr) string { 356 | b, ok := e.(*ast.Binary) 357 | if !ok { 358 | return "" 359 | } 360 | 361 | field := exprToField(b.Left) 362 | value := exprToValue(b.Right) 363 | 364 | switch b.Op { 365 | case "==": 366 | return fmt.Sprintf("%s: %s", field, value) 367 | case ">": 368 | return fmt.Sprintf("%s: { $gt: %s }", field, value) 369 | case "<": 370 | return fmt.Sprintf("%s: { $lt: %s }", field, value) 371 | default: 372 | return "" 373 | } 374 | } 375 | 376 | func exprToField(e ast.Expr) string { 377 | if id, ok := e.(*ast.Ident); ok && len(id.Parts) > 0 { 378 | return strings.Join(id.Parts, ".") 379 | } 380 | return e.String() 381 | } 382 | 383 | func exprToValue(e ast.Expr) string { 384 | switch v := e.(type) { 385 | case *ast.Number: 386 | return v.Value 387 | case *ast.StringLit: 388 | return fmt.Sprintf("\"%s\"", v.Value) 389 | default: 390 | return "null" 391 | } 392 | } 393 | ``` 394 | 395 | See `examples/mongo/main.go` for the full example with more operators and safer parsing. 396 | 397 | ## PRQL Language Overview 398 | 399 | PRQL queries are pipelines of transformations, where each line transforms the result of the previous line: 400 | 401 | ```prql 402 | from employees # Start with a table 403 | filter department == "Sales" # Filter rows 404 | derive { # Add computed columns 405 | monthly_salary = salary / 12, 406 | annual_bonus = salary * 0.1 407 | } 408 | select { # Choose columns 409 | first_name, 410 | last_name, 411 | monthly_salary, 412 | annual_bonus 413 | } 414 | sort {-monthly_salary} # Sort descending by monthly_salary 415 | take 20 # Limit results 416 | ``` 417 | 418 | ### Key Features 419 | 420 | - **Pipelines**: `|` chains transformations (optional, newlines also work) 421 | - **Variables**: Define reusable expressions with `let` 422 | - **Functions**: Create custom transformations 423 | - **Dates**: First-class date support with `@2024-01-01` syntax 424 | - **F-strings**: String interpolation with `f"{first_name} {last_name}"` 425 | - **S-strings**: SQL escape hatch with `s"UPPER(name)"` 426 | - **Comments**: `#` for single-line comments 427 | 428 | For the complete language reference, visit [PRQL Book](https://prql-lang.org/book/). 429 | 430 | ## Supported Dialects 431 | 432 | | Dialect | Status | Notes | 433 | |---------|--------|-------| 434 | | Generic | ✅ | Postgres-compatible fallback | 435 | | PostgreSQL | ✅ | Full support | 436 | | MySQL | ✅ | Backtick identifiers, LIMIT syntax | 437 | | SQLite | ✅ | Standard SQL subset | 438 | | DuckDB | ✅ | Advanced analytics functions | 439 | | MS SQL Server | ✅ | TOP clause, T-SQL functions | 440 | | BigQuery | ✅ | Google BigQuery syntax | 441 | | Snowflake | ✅ | Snowflake-specific features | 442 | | ClickHouse | ✅ | ClickHouse syntax | 443 | 444 | ## Development 445 | 446 | ### Prerequisites 447 | 448 | - Go 1.21+ 449 | 450 | ### Building 451 | 452 | ```bash 453 | go build ./... 454 | ``` 455 | 456 | ### Testing 457 | 458 | ```bash 459 | go test ./... 460 | ``` 461 | 462 | ### Running Examples 463 | 464 | ```bash 465 | go run examples/basic/main.go 466 | go run examples/mongo/main.go 467 | ``` 468 | 469 | ## Project Structure 470 | 471 | ``` 472 | gophrql/ 473 | ├── ast/ # Public AST types 474 | ├── internal/ 475 | │ ├── parser/ # PRQL parser 476 | │ └── sqlgen/ # SQL generation + dialects 477 | ├── examples/ # Usage examples 478 | ├── docs/ # Documentation 479 | └── gophrql.go # Public API 480 | ``` 481 | 482 | ## Contributing 483 | 484 | Contributions are welcome! Please see [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) for community guidelines. 485 | 486 | ### Guidelines 487 | 488 | 1. **Keep changes focused** - One feature/fix per PR 489 | 2. **Add tests** - Ensure coverage for new features 490 | 3. **Follow conventions** - Use `gofmt` and follow existing patterns 491 | 4. **Update docs** - Keep README and examples current 492 | 493 | ## Acknowledgments 494 | 495 | This project is inspired by and implements the [PRQL language specification](https://prql-lang.org/book/). Special thanks to the PRQL community and the upstream [prql](https://github.com/PRQL/prql) project. 496 | 497 | ## License 498 | 499 | Apache License 2.0 - see [LICENSE](LICENSE) for details. 500 | 501 | ## Resources 502 | 503 | - [PRQL Website](https://prql-lang.org) 504 | - [PRQL Book](https://prql-lang.org/book/) 505 | - [PRQL Playground](https://prql-lang.org/playground/) 506 | - [PRQL Discord](https://discord.gg/eQcfaCmsNc) 507 | -------------------------------------------------------------------------------- /compile_test.go: -------------------------------------------------------------------------------- 1 | package gophrql 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestCompileSnapshots(t *testing.T) { 9 | t.Helper() 10 | 11 | cases := []struct { 12 | name string 13 | prql string 14 | wantSQL string 15 | }{ 16 | { 17 | name: "target_sql_generic_simple", 18 | prql: ` 19 | target sql.generic 20 | from invoices 21 | take 1 22 | `, 23 | wantSQL: ` 24 | SELECT 25 | * 26 | FROM 27 | invoices 28 | LIMIT 29 | 1 30 | `, 31 | }, 32 | { 33 | name: "aggregation", 34 | prql: ` 35 | from tracks 36 | filter genre_id == 100 37 | derive empty_name = name == '' 38 | aggregate {sum track_id, concat_array name, all empty_name, any empty_name} 39 | `, 40 | wantSQL: ` 41 | SELECT 42 | COALESCE(SUM(track_id), 0), 43 | COALESCE(STRING_AGG(name, ''), ''), 44 | COALESCE(BOOL_AND(name = ''), TRUE), 45 | COALESCE(BOOL_OR(name = ''), FALSE) 46 | FROM 47 | tracks 48 | WHERE 49 | genre_id = 100 50 | `, 51 | }, 52 | { 53 | name: "date_to_text_formats", 54 | prql: ` 55 | from invoices 56 | take 20 57 | select { 58 | d1 = (invoice_date | date.to_text "%Y/%m/%d"), 59 | d2 = (invoice_date | date.to_text "%F"), 60 | d3 = (invoice_date | date.to_text "%D"), 61 | d4 = (invoice_date | date.to_text "%H:%M:%S.%f"), 62 | d5 = (invoice_date | date.to_text "%r"), 63 | d6 = (invoice_date | date.to_text "%A %B %-d %Y"), 64 | d7 = (invoice_date | date.to_text "%a, %-d %b %Y at %I:%M:%S %p"), 65 | d8 = (invoice_date | date.to_text "%+"), 66 | d9 = (invoice_date | date.to_text "%-d/%-m/%y"), 67 | d10 = (invoice_date | date.to_text "%-Hh %Mmin"), 68 | d11 = (invoice_date | date.to_text "%M'%S\""), 69 | d12 = (invoice_date | date.to_text "100%% in %d days"), 70 | } 71 | `, 72 | wantSQL: ` 73 | SELECT 74 | strftime(invoice_date, '%Y/%m/%d') AS d1, 75 | strftime(invoice_date, '%F') AS d2, 76 | strftime(invoice_date, '%D') AS d3, 77 | strftime(invoice_date, '%H:%M:%S.%f') AS d4, 78 | strftime(invoice_date, '%r') AS d5, 79 | strftime(invoice_date, '%A %B %-d %Y') AS d6, 80 | strftime(invoice_date, '%a, %-d %b %Y at %I:%M:%S %p') AS d7, 81 | strftime(invoice_date, '%+') AS d8, 82 | strftime(invoice_date, '%-d/%-m/%y') AS d9, 83 | strftime(invoice_date, '%-Hh %Mmin') AS d10, 84 | strftime(invoice_date, '%M''%S"') AS d11, 85 | strftime(invoice_date, '100%% in %d days') AS d12 86 | FROM 87 | invoices 88 | LIMIT 89 | 20 90 | `, 91 | }, 92 | { 93 | name: "switch_case_display", 94 | prql: ` 95 | from tracks 96 | sort milliseconds 97 | select display = case [ 98 | composer != null => composer, 99 | genre_id < 17 => 'no composer', 100 | true => f'unknown composer' 101 | ] 102 | take 10 103 | `, 104 | wantSQL: ` 105 | WITH table_0 AS ( 106 | SELECT 107 | CASE 108 | WHEN composer IS NOT NULL THEN composer 109 | WHEN genre_id < 17 THEN 'no composer' 110 | ELSE 'unknown composer' 111 | END AS display, 112 | milliseconds 113 | FROM 114 | tracks 115 | ORDER BY 116 | milliseconds 117 | LIMIT 118 | 10 119 | ) 120 | SELECT 121 | display 122 | FROM 123 | table_0 124 | ORDER BY 125 | milliseconds 126 | `, 127 | }, 128 | { 129 | name: "loop_recursive_numbers", 130 | prql: ` 131 | from [{n = 1}] 132 | select n = n - 2 133 | loop (filter n < 4 | select n = n + 1) 134 | select n = n * 2 135 | sort n 136 | `, 137 | wantSQL: ` 138 | WITH RECURSIVE table_0 AS ( 139 | SELECT 140 | 1 AS n 141 | ), 142 | table_1 AS ( 143 | SELECT 144 | n - 2 AS _expr_0 145 | FROM 146 | table_0 147 | UNION ALL 148 | SELECT 149 | _expr_0 + 1 150 | FROM 151 | table_1 152 | WHERE 153 | _expr_0 < 4 154 | ) 155 | SELECT 156 | _expr_0 * 2 AS n 157 | FROM 158 | table_1 AS table_2 159 | ORDER BY 160 | n 161 | `, 162 | }, 163 | { 164 | name: "genre_counts", 165 | prql: ` 166 | let genre_count = ( 167 | from genres 168 | aggregate {a = count name} 169 | ) 170 | 171 | from genre_count 172 | filter a > 0 173 | select a = -a 174 | `, 175 | wantSQL: ` 176 | WITH genre_count AS ( 177 | SELECT 178 | COUNT(*) AS a 179 | FROM 180 | genres 181 | ) 182 | SELECT 183 | - a AS a 184 | FROM 185 | genre_count 186 | WHERE 187 | a > 0 188 | `, 189 | }, 190 | { 191 | name: "let_binding_simple_cte", 192 | prql: ` 193 | let top_customers = ( 194 | from invoices 195 | aggregate { total = count invoice_id } 196 | ) 197 | 198 | from top_customers 199 | select total 200 | `, 201 | wantSQL: ` 202 | WITH top_customers AS ( 203 | SELECT 204 | COUNT(*) AS total 205 | FROM 206 | invoices 207 | ) 208 | SELECT 209 | total 210 | FROM 211 | top_customers 212 | `, 213 | }, 214 | { 215 | name: "group_sort_basic", 216 | prql: ` 217 | from tracks 218 | derive d = album_id + 1 219 | group d ( 220 | aggregate { 221 | n1 = (track_id | sum), 222 | } 223 | ) 224 | sort d 225 | take 10 226 | select { d1 = d, n1 } 227 | `, 228 | wantSQL: ` 229 | WITH table_0 AS ( 230 | SELECT 231 | COALESCE(SUM(track_id), 0) AS n1, 232 | album_id + 1 AS _expr_0 233 | FROM 234 | tracks 235 | GROUP BY 236 | album_id + 1 237 | ), 238 | table_1 AS ( 239 | SELECT 240 | _expr_0 AS d1, 241 | n1, 242 | _expr_0 243 | FROM 244 | table_0 245 | ORDER BY 246 | _expr_0 247 | LIMIT 248 | 10 249 | ) 250 | SELECT 251 | d1, 252 | n1 253 | FROM 254 | table_1 255 | ORDER BY 256 | d1 257 | `, 258 | }, 259 | { 260 | name: "append_select_simple_filter", 261 | prql: ` 262 | from invoices 263 | select { invoice_id, billing_country } 264 | append ( 265 | from invoices 266 | select { invoice_id = ` + "`invoice_id`" + ` + 100, billing_country } 267 | ) 268 | filter (billing_country | text.starts_with("I")) 269 | `, 270 | wantSQL: ` 271 | WITH table_1 AS ( 272 | SELECT 273 | invoice_id, 274 | billing_country 275 | FROM 276 | invoices 277 | UNION 278 | ALL 279 | SELECT 280 | invoice_id + 100 AS invoice_id, 281 | billing_country 282 | FROM 283 | invoices 284 | ) 285 | SELECT 286 | invoice_id, 287 | billing_country 288 | FROM 289 | table_1 290 | WHERE 291 | billing_country LIKE CONCAT('I', '%') 292 | `, 293 | }, 294 | { 295 | name: "append_select_compute", 296 | prql: ` 297 | from invoices 298 | derive total = case [total < 10 => total * 2, true => total] 299 | select { customer_id, invoice_id, total } 300 | take 5 301 | append ( 302 | from invoice_items 303 | derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price] 304 | select { invoice_line_id, invoice_id, unit_price } 305 | take 5 306 | ) 307 | select { a = customer_id * 2, b = math.round 1 (invoice_id * total) } 308 | `, 309 | wantSQL: ` 310 | WITH table_1 AS ( 311 | SELECT 312 | * 313 | FROM 314 | ( 315 | SELECT 316 | invoice_id, 317 | CASE 318 | WHEN total < 10 THEN total * 2 319 | ELSE total 320 | END AS _expr_0, 321 | customer_id 322 | FROM 323 | invoices 324 | LIMIT 325 | 5 326 | ) AS table_3 327 | UNION 328 | ALL 329 | SELECT 330 | * 331 | FROM 332 | ( 333 | SELECT 334 | invoice_id, 335 | CASE 336 | WHEN unit_price < 1 THEN unit_price * 2 337 | ELSE unit_price 338 | END AS unit_price, 339 | invoice_line_id 340 | FROM 341 | invoice_items 342 | LIMIT 343 | 5 344 | ) AS table_4 345 | ) 346 | SELECT 347 | customer_id * 2 AS a, 348 | ROUND(invoice_id * _expr_0, 1) AS b 349 | FROM 350 | table_1 351 | `, 352 | }, 353 | { 354 | name: "take_range_with_sort", 355 | prql: ` 356 | from tracks 357 | sort {+track_id} 358 | take 3..5 359 | `, 360 | wantSQL: ` 361 | SELECT 362 | * 363 | FROM 364 | tracks 365 | ORDER BY 366 | track_id 367 | LIMIT 368 | 3 OFFSET 2 369 | `, 370 | }, 371 | { 372 | name: "sort_with_join_alias", 373 | prql: ` 374 | from e=employees 375 | filter first_name != "Mitchell" 376 | sort {first_name, last_name} 377 | 378 | join manager=employees side:left (e.reports_to == manager.employee_id) 379 | 380 | select {e.first_name, e.last_name, manager.first_name} 381 | `, 382 | wantSQL: ` 383 | WITH table_0 AS ( 384 | SELECT 385 | first_name, 386 | last_name, 387 | reports_to 388 | FROM 389 | employees AS e 390 | WHERE 391 | first_name <> 'Mitchell' 392 | ) 393 | SELECT 394 | table_0.first_name, 395 | table_0.last_name, 396 | manager.first_name 397 | FROM 398 | table_0 399 | LEFT OUTER JOIN employees AS manager ON table_0.reports_to = manager.employee_id 400 | ORDER BY 401 | table_0.first_name, 402 | table_0.last_name 403 | `, 404 | }, 405 | { 406 | name: "sort_alias_filter_join", 407 | prql: ` 408 | from albums 409 | select { AA=album_id, artist_id } 410 | sort AA 411 | filter AA >= 25 412 | join artists (==artist_id) 413 | `, 414 | wantSQL: ` 415 | WITH table_1 AS ( 416 | SELECT 417 | album_id AS "AA", 418 | artist_id 419 | FROM 420 | albums 421 | ), 422 | table_0 AS ( 423 | SELECT 424 | "AA", 425 | artist_id 426 | FROM 427 | table_1 428 | WHERE 429 | "AA" >= 25 430 | ) 431 | SELECT 432 | table_0."AA", 433 | table_0.artist_id, 434 | artists.* 435 | FROM 436 | table_0 437 | INNER JOIN artists ON table_0.artist_id = artists.artist_id 438 | ORDER BY 439 | table_0."AA" 440 | `, 441 | }, 442 | { 443 | name: "constants_only", 444 | prql: ` 445 | from genres 446 | take 10 447 | filter true 448 | take 20 449 | filter true 450 | select d = 10 451 | `, 452 | wantSQL: ` 453 | WITH table_1 AS ( 454 | SELECT 455 | NULL 456 | FROM 457 | genres 458 | LIMIT 459 | 10 460 | ), table_0 AS ( 461 | SELECT 462 | NULL 463 | FROM 464 | table_1 465 | WHERE 466 | true 467 | LIMIT 468 | 20 469 | ) 470 | SELECT 471 | 10 AS d 472 | FROM 473 | table_0 474 | WHERE 475 | true 476 | `, 477 | }, 478 | { 479 | name: "append_select_union", 480 | prql: ` 481 | from invoices 482 | select { customer_id, invoice_id, billing_country } 483 | take 10..15 484 | append ( 485 | from invoices 486 | select { customer_id, invoice_id, billing_country } 487 | take 40..45 488 | ) 489 | select { billing_country, invoice_id } 490 | `, 491 | wantSQL: ` 492 | SELECT 493 | * 494 | FROM 495 | ( 496 | SELECT 497 | billing_country, 498 | invoice_id 499 | FROM 500 | invoices 501 | LIMIT 502 | 6 OFFSET 9 503 | ) AS table_2 504 | UNION 505 | ALL 506 | SELECT 507 | * 508 | FROM 509 | ( 510 | SELECT 511 | billing_country, 512 | invoice_id 513 | FROM 514 | invoices 515 | LIMIT 516 | 6 OFFSET 39 517 | ) AS table_3 518 | `, 519 | }, 520 | { 521 | name: "append_select_simple", 522 | prql: ` 523 | from invoices 524 | select { invoice_id, billing_country } 525 | append ( 526 | from invoices 527 | select { invoice_id = invoice_id + 100, billing_country } 528 | ) 529 | filter (billing_country | text.starts_with "I") 530 | `, 531 | wantSQL: ` 532 | WITH table_1 AS ( 533 | SELECT 534 | invoice_id, 535 | billing_country 536 | FROM 537 | invoices 538 | UNION 539 | ALL 540 | SELECT 541 | invoice_id + 100 AS invoice_id, 542 | billing_country 543 | FROM 544 | invoices 545 | ) 546 | SELECT 547 | invoice_id, 548 | billing_country 549 | FROM 550 | table_1 551 | WHERE 552 | billing_country LIKE CONCAT('I', '%') 553 | `, 554 | }, 555 | { 556 | name: "append_select_multiple_with_null", 557 | prql: ` 558 | from invoices 559 | select { customer_id, invoice_id, billing_country } 560 | take 5 561 | append ( 562 | from employees 563 | select { employee_id, employee_id, country } 564 | take 5 565 | ) 566 | append ( 567 | from invoice_items 568 | select { invoice_line_id, invoice_id, null } 569 | take 5 570 | ) 571 | select { billing_country, invoice_id } 572 | `, 573 | wantSQL: ` 574 | SELECT 575 | * 576 | FROM 577 | ( 578 | SELECT 579 | billing_country, 580 | invoice_id 581 | FROM 582 | invoices 583 | LIMIT 584 | 5 585 | ) AS table_4 586 | UNION 587 | ALL 588 | SELECT 589 | * 590 | FROM 591 | ( 592 | SELECT 593 | country, 594 | employee_id 595 | FROM 596 | employees 597 | LIMIT 598 | 5 599 | ) AS table_5 600 | UNION 601 | ALL 602 | SELECT 603 | * 604 | FROM 605 | ( 606 | SELECT 607 | NULL, 608 | invoice_id 609 | FROM 610 | invoice_items 611 | LIMIT 612 | 5 613 | ) AS table_6 614 | `, 615 | }, 616 | { 617 | name: "append_select_nulls", 618 | prql: ` 619 | from invoices 620 | select {an_id = invoice_id, name = null} 621 | take 2 622 | append ( 623 | from employees 624 | select {an_id = null, name = first_name} 625 | take 2 626 | ) 627 | `, 628 | wantSQL: ` 629 | SELECT 630 | * 631 | FROM 632 | ( 633 | SELECT 634 | invoice_id AS an_id, 635 | NULL AS name 636 | FROM 637 | invoices 638 | LIMIT 639 | 2 640 | ) AS table_2 641 | UNION 642 | ALL 643 | SELECT 644 | * 645 | FROM 646 | ( 647 | SELECT 648 | NULL AS an_id, 649 | first_name AS name 650 | FROM 651 | employees 652 | LIMIT 653 | 2 654 | ) AS table_3 655 | `, 656 | }, 657 | { 658 | name: "window_functions", 659 | prql: ` 660 | from tracks 661 | group genre_id ( 662 | sort milliseconds 663 | derive { 664 | num = row_number this, 665 | total = count this, 666 | last_val = last track_id, 667 | } 668 | take 10 669 | ) 670 | sort {genre_id, milliseconds} 671 | select {track_id, genre_id, num, total, last_val} 672 | filter genre_id >= 22 673 | `, 674 | wantSQL: ` 675 | WITH table_0 AS ( 676 | SELECT 677 | track_id, 678 | genre_id, 679 | ROW_NUMBER() OVER ( 680 | PARTITION BY genre_id 681 | ORDER BY 682 | milliseconds 683 | ) AS num, 684 | COUNT(*) OVER ( 685 | PARTITION BY genre_id 686 | ORDER BY 687 | milliseconds ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 688 | ) AS total, 689 | LAST_VALUE(track_id) OVER ( 690 | PARTITION BY genre_id 691 | ORDER BY 692 | milliseconds 693 | ) AS last_val, 694 | milliseconds, 695 | ROW_NUMBER() OVER ( 696 | PARTITION BY genre_id 697 | ORDER BY 698 | milliseconds 699 | ) AS _expr_0 700 | FROM 701 | tracks 702 | ), 703 | table_1 AS ( 704 | SELECT 705 | track_id, 706 | genre_id, 707 | num, 708 | total, 709 | last_val, 710 | milliseconds 711 | FROM 712 | table_0 713 | WHERE 714 | _expr_0 <= 10 715 | AND genre_id >= 22 716 | ) 717 | SELECT 718 | track_id, 719 | genre_id, 720 | num, 721 | total, 722 | last_val 723 | FROM 724 | table_1 725 | ORDER BY 726 | genre_id, 727 | milliseconds 728 | `, 729 | }, 730 | { 731 | name: "stdlib_math_module", 732 | prql: ` 733 | from employees 734 | select { 735 | salary_abs = math.abs salary, 736 | salary_floor = math.floor salary, 737 | salary_ceil = math.ceil salary, 738 | salary_pi = math.pi, 739 | salary_exp = math.exp salary, 740 | salary_ln = math.ln salary, 741 | salary_log10 = math.log10 salary, 742 | salary_log = math.log 2 salary, 743 | salary_sqrt = math.sqrt salary, 744 | salary_degrees = math.degrees salary, 745 | salary_radians = math.radians salary, 746 | salary_cos = math.cos salary, 747 | salary_acos = math.acos salary, 748 | salary_sin = math.sin salary, 749 | salary_asin = math.asin salary, 750 | salary_tan = math.tan salary, 751 | salary_atan = math.atan salary, 752 | salary_pow = (salary | math.pow 2), 753 | salary_pow_op = salary ** 2, 754 | } 755 | `, 756 | wantSQL: ` 757 | SELECT 758 | ABS(salary) AS salary_abs, 759 | FLOOR(salary) AS salary_floor, 760 | CEIL(salary) AS salary_ceil, 761 | PI() AS salary_pi, 762 | EXP(salary) AS salary_exp, 763 | LN(salary) AS salary_ln, 764 | LOG10(salary) AS salary_log10, 765 | LOG10(salary) / LOG10(2) AS salary_log, 766 | SQRT(salary) AS salary_sqrt, 767 | DEGREES(salary) AS salary_degrees, 768 | RADIANS(salary) AS salary_radians, 769 | COS(salary) AS salary_cos, 770 | ACOS(salary) AS salary_acos, 771 | SIN(salary) AS salary_sin, 772 | ASIN(salary) AS salary_asin, 773 | TAN(salary) AS salary_tan, 774 | ATAN(salary) AS salary_atan, 775 | POW(salary, 2) AS salary_pow, 776 | POW(salary, 2) AS salary_pow_op 777 | FROM 778 | employees 779 | `, 780 | }, 781 | { 782 | name: "text_module_filters", 783 | prql: ` 784 | from albums 785 | select { 786 | title, 787 | title_and_spaces = f" {title} ", 788 | low = (title | text.lower), 789 | up = (title | text.upper), 790 | ltrimmed = (title | text.ltrim), 791 | rtrimmed = (title | text.rtrim), 792 | trimmed = (title | text.trim), 793 | len = (title | text.length), 794 | subs = (title | text.extract 2 5), 795 | replace = (title | text.replace "al" "PIKA"), 796 | } 797 | sort {title} 798 | filter (title | text.starts_with "Black") || (title | text.contains "Sabbath") || (title | text.ends_with "os") 799 | `, 800 | wantSQL: ` 801 | WITH table_0 AS ( 802 | SELECT 803 | title, 804 | CONCAT(' ', title, ' ') AS title_and_spaces, 805 | LOWER(title) AS low, 806 | UPPER(title) AS up, 807 | LTRIM(title) AS ltrimmed, 808 | RTRIM(title) AS rtrimmed, 809 | TRIM(title) AS trimmed, 810 | CHAR_LENGTH(title) AS len, 811 | SUBSTRING(title, 2, 5) AS subs, 812 | REPLACE(title, 'al', 'PIKA') AS "replace" 813 | FROM 814 | albums 815 | ) 816 | SELECT 817 | title, 818 | title_and_spaces, 819 | low, 820 | up, 821 | ltrimmed, 822 | rtrimmed, 823 | trimmed, 824 | len, 825 | subs, 826 | "replace" 827 | FROM 828 | table_0 829 | WHERE 830 | title LIKE CONCAT('Black', '%') 831 | OR title LIKE CONCAT('%', 'Sabbath', '%') 832 | OR title LIKE CONCAT('%', 'os') 833 | ORDER BY 834 | title 835 | `, 836 | }, 837 | { 838 | name: "pipelines_filters_sort_take", 839 | prql: ` 840 | from tracks 841 | 842 | filter (name ~= "Love") 843 | filter ((milliseconds / 1000 / 60) | in 3..4) 844 | sort track_id 845 | take 1..15 846 | select {name, composer} 847 | `, 848 | wantSQL: ` 849 | WITH table_0 AS ( 850 | SELECT 851 | name, 852 | composer, 853 | track_id 854 | FROM 855 | tracks 856 | WHERE 857 | REGEXP(name, 'Love') 858 | AND milliseconds / 1000 / 60 BETWEEN 3 AND 4 859 | ORDER BY 860 | track_id 861 | LIMIT 862 | 15 863 | ) 864 | SELECT 865 | name, 866 | composer 867 | FROM 868 | table_0 869 | ORDER BY 870 | track_id 871 | `, 872 | }, 873 | { 874 | name: "distinct_group_take_one", 875 | prql: ` 876 | from tracks 877 | select {album_id, genre_id} 878 | group tracks.* (take 1) 879 | sort tracks.* 880 | `, 881 | wantSQL: ` 882 | WITH table_0 AS ( 883 | SELECT 884 | DISTINCT album_id, 885 | genre_id 886 | FROM 887 | tracks 888 | ) 889 | SELECT 890 | album_id, 891 | genre_id 892 | FROM 893 | table_0 894 | ORDER BY 895 | album_id, 896 | genre_id 897 | `, 898 | }, 899 | { 900 | name: "arithmetic_div_mod", 901 | prql: ` 902 | from [ 903 | { id = 1, x_int = 13, x_float = 13.0, k_int = 5, k_float = 5.0 }, 904 | { id = 2, x_int = -13, x_float = -13.0, k_int = 5, k_float = 5.0 }, 905 | { id = 3, x_int = 13, x_float = 13.0, k_int = -5, k_float = -5.0 }, 906 | { id = 4, x_int = -13, x_float = -13.0, k_int = -5, k_float = -5.0 }, 907 | ] 908 | select { 909 | id, 910 | 911 | x_int / k_int, 912 | x_int / k_float, 913 | x_float / k_int, 914 | x_float / k_float, 915 | 916 | q_ii = x_int // k_int, 917 | q_if = x_int // k_float, 918 | q_fi = x_float // k_int, 919 | q_ff = x_float // k_float, 920 | 921 | r_ii = x_int % k_int, 922 | r_if = x_int % k_float, 923 | r_fi = x_float % k_int, 924 | r_ff = x_float % k_float, 925 | 926 | (q_ii * k_int + r_ii | math.round 0), 927 | (q_if * k_float + r_if | math.round 0), 928 | (q_fi * k_int + r_fi | math.round 0), 929 | (q_ff * k_float + r_ff | math.round 0), 930 | } 931 | sort id 932 | `, 933 | wantSQL: ` 934 | WITH table_0 AS ( 935 | SELECT 936 | 1 AS id, 937 | 13 AS x_int, 938 | 13.0 AS x_float, 939 | 5 AS k_int, 940 | 5.0 AS k_float 941 | UNION 942 | ALL 943 | SELECT 944 | 2 AS id, 945 | -13 AS x_int, 946 | -13.0 AS x_float, 947 | 5 AS k_int, 948 | 5.0 AS k_float 949 | UNION 950 | ALL 951 | SELECT 952 | 3 AS id, 953 | 13 AS x_int, 954 | 13.0 AS x_float, 955 | -5 AS k_int, 956 | -5.0 AS k_float 957 | UNION 958 | ALL 959 | SELECT 960 | 4 AS id, 961 | -13 AS x_int, 962 | -13.0 AS x_float, 963 | -5 AS k_int, 964 | -5.0 AS k_float 965 | ) 966 | SELECT 967 | id, 968 | x_int / k_int, 969 | x_int / k_float, 970 | x_float / k_int, 971 | x_float / k_float, 972 | FLOOR(ABS(x_int / k_int)) * SIGN(x_int) * SIGN(k_int) AS q_ii, 973 | FLOOR(ABS(x_int / k_float)) * SIGN(x_int) * SIGN(k_float) AS q_if, 974 | FLOOR(ABS(x_float / k_int)) * SIGN(x_float) * SIGN(k_int) AS q_fi, 975 | FLOOR(ABS(x_float / k_float)) * SIGN(x_float) * SIGN(k_float) AS q_ff, 976 | x_int % k_int AS r_ii, 977 | x_int % k_float AS r_if, 978 | x_float % k_int AS r_fi, 979 | x_float % k_float AS r_ff, 980 | ROUND( 981 | FLOOR(ABS(x_int / k_int)) * SIGN(x_int) * SIGN(k_int) * k_int + x_int % k_int, 982 | 0 983 | ), 984 | ROUND( 985 | FLOOR(ABS(x_int / k_float)) * SIGN(x_int) * SIGN(k_float) * k_float + x_int % k_float, 986 | 0 987 | ), 988 | ROUND( 989 | FLOOR(ABS(x_float / k_int)) * SIGN(x_float) * SIGN(k_int) * k_int + x_float % k_int, 990 | 0 991 | ), 992 | ROUND( 993 | FLOOR(ABS(x_float / k_float)) * SIGN(x_float) * SIGN(k_float) * k_float + x_float % k_float, 994 | 0 995 | ) 996 | FROM 997 | table_0 998 | ORDER BY 999 | id 1000 | `, 1001 | }, 1002 | { 1003 | name: "set_ops_remove", 1004 | prql: ` 1005 | let distinct = rel -> (from t = _param.rel | group {t.*} (take 1)) 1006 | 1007 | from_text format:json '{ "columns": ["a"], "data": [[1], [2], [2], [3]] }' 1008 | distinct 1009 | remove (from_text format:json '{ "columns": ["a"], "data": [[1], [2]] }') 1010 | sort a 1011 | `, 1012 | wantSQL: ` 1013 | WITH table_0 AS ( 1014 | SELECT 1015 | 1 AS a 1016 | UNION 1017 | ALL 1018 | SELECT 1019 | 2 AS a 1020 | UNION 1021 | ALL 1022 | SELECT 1023 | 2 AS a 1024 | UNION 1025 | ALL 1026 | SELECT 1027 | 3 AS a 1028 | ), 1029 | table_1 AS ( 1030 | SELECT 1031 | 1 AS a 1032 | UNION 1033 | ALL 1034 | SELECT 1035 | 2 AS a 1036 | ), 1037 | table_2 AS ( 1038 | SELECT 1039 | a 1040 | FROM 1041 | table_0 1042 | EXCEPT 1043 | DISTINCT 1044 | SELECT 1045 | * 1046 | FROM 1047 | table_1 1048 | ) 1049 | SELECT 1050 | a 1051 | FROM 1052 | table_2 1053 | ORDER BY 1054 | a 1055 | `, 1056 | }, 1057 | { 1058 | name: "group_sort_derive_select_join", 1059 | prql: ` 1060 | s"SELECT album_id,title,artist_id FROM albums" 1061 | group {artist_id} (aggregate { album_title_count = count this.` + "`title`" + `}) 1062 | sort {this.artist_id, this.album_title_count} 1063 | derive {new_album_count = this.album_title_count} 1064 | select {this.artist_id, this.new_album_count} 1065 | join side:left ( s"SELECT artist_id,name as artist_name FROM artists" ) (this.artist_id == that.artist_id) 1066 | `, 1067 | wantSQL: ` 1068 | WITH table_0 AS ( 1069 | SELECT 1070 | album_id, 1071 | title, 1072 | artist_id 1073 | FROM 1074 | albums 1075 | ), 1076 | table_4 AS ( 1077 | SELECT 1078 | artist_id, 1079 | COUNT(*) AS _expr_0 1080 | FROM 1081 | table_0 1082 | GROUP BY 1083 | artist_id 1084 | ), 1085 | table_2 AS ( 1086 | SELECT 1087 | artist_id, 1088 | _expr_0 AS new_album_count, 1089 | _expr_0 1090 | FROM 1091 | table_4 1092 | ), 1093 | table_1 AS ( 1094 | SELECT 1095 | artist_id, 1096 | name as artist_name 1097 | FROM 1098 | artists 1099 | ), 1100 | table_3 AS ( 1101 | SELECT 1102 | table_2.artist_id, 1103 | table_2.new_album_count, 1104 | table_1.artist_id AS _expr_1, 1105 | table_1.artist_name, 1106 | table_2._expr_0 1107 | FROM 1108 | table_2 1109 | LEFT OUTER JOIN table_1 ON table_2.artist_id = table_1.artist_id 1110 | ) 1111 | SELECT 1112 | artist_id, 1113 | new_album_count, 1114 | _expr_1, 1115 | artist_name 1116 | FROM 1117 | table_3 1118 | ORDER BY 1119 | artist_id, 1120 | new_album_count 1121 | `, 1122 | }, 1123 | { 1124 | name: "cast_projection", 1125 | prql: ` 1126 | from tracks 1127 | sort {-bytes} 1128 | select { 1129 | name, 1130 | bin = ((album_id | as REAL) * 99) 1131 | } 1132 | take 20 1133 | `, 1134 | wantSQL: ` 1135 | WITH table_0 AS ( 1136 | SELECT 1137 | name, 1138 | CAST(album_id AS REAL) * 99 AS bin, 1139 | bytes 1140 | FROM 1141 | tracks 1142 | ORDER BY 1143 | bytes DESC 1144 | LIMIT 1145 | 20 1146 | ) 1147 | SELECT 1148 | name, 1149 | bin 1150 | FROM 1151 | table_0 1152 | ORDER BY 1153 | bytes DESC 1154 | `, 1155 | }, 1156 | { 1157 | name: "distinct_on_group_sort_take", 1158 | prql: ` 1159 | from tracks 1160 | select {genre_id, media_type_id, album_id} 1161 | group {genre_id, media_type_id} (sort {-album_id} | take 1) 1162 | sort {-genre_id, media_type_id} 1163 | `, 1164 | wantSQL: ` 1165 | WITH table_0 AS ( 1166 | SELECT 1167 | genre_id, 1168 | media_type_id, 1169 | album_id, 1170 | ROW_NUMBER() OVER ( 1171 | PARTITION BY genre_id, 1172 | media_type_id 1173 | ORDER BY 1174 | album_id DESC 1175 | ) AS _expr_0 1176 | FROM 1177 | tracks 1178 | ) 1179 | SELECT 1180 | genre_id, 1181 | media_type_id, 1182 | album_id 1183 | FROM 1184 | table_0 1185 | WHERE 1186 | _expr_0 <= 1 1187 | ORDER BY 1188 | genre_id DESC, 1189 | media_type_id 1190 | `, 1191 | }, 1192 | { 1193 | name: "group_sort_limit_take_join", 1194 | prql: ` 1195 | from tracks 1196 | select {genre_id,milliseconds} 1197 | group {genre_id} ( 1198 | sort {-milliseconds} 1199 | take 3 1200 | ) 1201 | join genres (==genre_id) 1202 | select {name, milliseconds} 1203 | sort {+name,-milliseconds} 1204 | `, 1205 | wantSQL: ` 1206 | WITH table_1 AS ( 1207 | SELECT 1208 | milliseconds, 1209 | genre_id, 1210 | ROW_NUMBER() OVER ( 1211 | PARTITION BY genre_id 1212 | ORDER BY 1213 | milliseconds DESC 1214 | ) AS _expr_0 1215 | FROM 1216 | tracks 1217 | ), 1218 | table_0 AS ( 1219 | SELECT 1220 | milliseconds, 1221 | genre_id 1222 | FROM 1223 | table_1 1224 | WHERE 1225 | _expr_0 <= 3 1226 | ) 1227 | SELECT 1228 | genres.name, 1229 | table_0.milliseconds 1230 | FROM 1231 | table_0 1232 | INNER JOIN genres ON table_0.genre_id = genres.genre_id 1233 | ORDER BY 1234 | genres.name, 1235 | table_0.milliseconds DESC 1236 | `, 1237 | }, 1238 | { 1239 | name: "group_sort_filter_derive_select_join", 1240 | prql: ` 1241 | s"SELECT album_id,title,artist_id FROM albums" 1242 | group {artist_id} (aggregate { album_title_count = count this.` + "`title`" + `}) 1243 | sort {this.artist_id, this.album_title_count} 1244 | filter (this.album_title_count) > 10 1245 | derive {new_album_count = this.album_title_count} 1246 | select {this.artist_id, this.new_album_count} 1247 | join side:left ( s"SELECT artist_id,name as artist_name FROM artists" ) (this.artist_id == that.artist_id) 1248 | `, 1249 | wantSQL: ` 1250 | WITH table_0 AS ( 1251 | SELECT 1252 | album_id, 1253 | title, 1254 | artist_id 1255 | FROM 1256 | albums 1257 | ), 1258 | table_3 AS ( 1259 | SELECT 1260 | artist_id, 1261 | COUNT(*) AS _expr_0 1262 | FROM 1263 | table_0 1264 | GROUP BY 1265 | artist_id 1266 | ), 1267 | table_4 AS ( 1268 | SELECT 1269 | artist_id, 1270 | _expr_0 AS new_album_count, 1271 | _expr_0 1272 | FROM 1273 | table_3 1274 | WHERE 1275 | _expr_0 > 10 1276 | ), 1277 | table_2 AS ( 1278 | SELECT 1279 | artist_id, 1280 | new_album_count, 1281 | _expr_0 1282 | FROM 1283 | table_4 1284 | ), 1285 | table_1 AS ( 1286 | SELECT 1287 | artist_id, 1288 | name as artist_name 1289 | FROM 1290 | artists 1291 | ) 1292 | SELECT 1293 | table_2.artist_id, 1294 | table_2.new_album_count, 1295 | table_1.artist_id, 1296 | table_1.artist_name 1297 | FROM 1298 | table_2 1299 | LEFT OUTER JOIN table_1 ON table_2.artist_id = table_1.artist_id 1300 | ORDER BY 1301 | table_2.artist_id, 1302 | table_2.new_album_count 1303 | `, 1304 | }, 1305 | { 1306 | name: "invoice_totals_window_join", 1307 | prql: ` 1308 | from i=invoices 1309 | join ii=invoice_items (==invoice_id) 1310 | derive { 1311 | city = i.billing_city, 1312 | street = i.billing_address, 1313 | } 1314 | group {city, street} ( 1315 | derive total = ii.unit_price * ii.quantity 1316 | aggregate { 1317 | num_orders = count_distinct i.invoice_id, 1318 | num_tracks = sum ii.quantity, 1319 | total_price = sum total, 1320 | } 1321 | ) 1322 | group {city} ( 1323 | sort street 1324 | window expanding:true ( 1325 | derive {running_total_num_tracks = sum num_tracks} 1326 | ) 1327 | ) 1328 | sort {city, street} 1329 | derive {num_tracks_last_week = lag 7 num_tracks} 1330 | select { 1331 | city, 1332 | street, 1333 | num_orders, 1334 | num_tracks, 1335 | running_total_num_tracks, 1336 | num_tracks_last_week 1337 | } 1338 | take 20 1339 | `, 1340 | wantSQL: ` 1341 | WITH table_0 AS ( 1342 | SELECT 1343 | i.billing_city AS city, 1344 | i.billing_address AS street, 1345 | COUNT(DISTINCT i.invoice_id) AS num_orders, 1346 | COALESCE(SUM(ii.quantity), 0) AS num_tracks 1347 | FROM 1348 | invoices AS i 1349 | INNER JOIN invoice_items AS ii ON i.invoice_id = ii.invoice_id 1350 | GROUP BY 1351 | i.billing_city, 1352 | i.billing_address 1353 | ) 1354 | SELECT 1355 | city, 1356 | street, 1357 | num_orders, 1358 | num_tracks, 1359 | SUM(num_tracks) OVER ( 1360 | PARTITION BY city 1361 | ORDER BY 1362 | street ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW 1363 | ) AS running_total_num_tracks, 1364 | LAG(num_tracks, 7) OVER ( 1365 | ORDER BY 1366 | city, 1367 | street 1368 | ) AS num_tracks_last_week 1369 | FROM 1370 | table_0 1371 | ORDER BY 1372 | city, 1373 | street 1374 | LIMIT 1375 | 20 1376 | `, 1377 | }, 1378 | { 1379 | name: "group_all_join_aggregate", 1380 | prql: ` 1381 | from a=albums 1382 | take 10 1383 | join tracks (==album_id) 1384 | group {a.album_id, a.title} ( 1385 | aggregate price = (sum tracks.unit_price | math.round 2) 1386 | ) 1387 | sort album_id 1388 | `, 1389 | wantSQL: ` 1390 | WITH table_0 AS ( 1391 | SELECT 1392 | album_id, 1393 | title 1394 | FROM 1395 | albums AS a 1396 | LIMIT 1397 | 10 1398 | ) 1399 | SELECT 1400 | table_0.album_id, 1401 | table_0.title, 1402 | ROUND(COALESCE(SUM(tracks.unit_price), 0), 2) AS price 1403 | FROM 1404 | table_0 1405 | INNER JOIN tracks ON table_0.album_id = tracks.album_id 1406 | GROUP BY 1407 | table_0.album_id, 1408 | table_0.title 1409 | ORDER BY 1410 | table_0.album_id 1411 | `, 1412 | }, 1413 | { 1414 | name: "read_csv_sort", 1415 | prql: ` 1416 | from (read_csv "data_file_root/media_types.csv") 1417 | sort media_type_id 1418 | `, 1419 | wantSQL: ` 1420 | WITH table_0 AS ( 1421 | SELECT 1422 | * 1423 | FROM 1424 | read_csv('data_file_root/media_types.csv') 1425 | ) 1426 | SELECT 1427 | * 1428 | FROM 1429 | table_0 1430 | ORDER BY 1431 | media_type_id 1432 | `, 1433 | }, 1434 | { 1435 | name: "sort_preserved_through_join", 1436 | prql: ` 1437 | from e=employees 1438 | filter first_name != "Mitchell" 1439 | sort {first_name, last_name} 1440 | join manager=employees side:left (e.reports_to == manager.employee_id) 1441 | select {e.first_name, e.last_name, manager.first_name} 1442 | `, 1443 | wantSQL: ` 1444 | WITH table_0 AS ( 1445 | SELECT 1446 | first_name, 1447 | last_name, 1448 | reports_to 1449 | FROM 1450 | employees AS e 1451 | WHERE 1452 | first_name <> 'Mitchell' 1453 | ) 1454 | SELECT 1455 | table_0.first_name, 1456 | table_0.last_name, 1457 | manager.first_name 1458 | FROM 1459 | table_0 1460 | LEFT OUTER JOIN employees AS manager ON table_0.reports_to = manager.employee_id 1461 | ORDER BY 1462 | table_0.first_name, 1463 | table_0.last_name 1464 | `, 1465 | }, 1466 | { 1467 | name: "sort_alias_join", 1468 | prql: ` 1469 | from albums 1470 | select { AA=album_id, artist_id } 1471 | sort AA 1472 | filter AA >= 25 1473 | join artists (==artist_id) 1474 | `, 1475 | wantSQL: ` 1476 | WITH table_1 AS ( 1477 | SELECT 1478 | album_id AS "AA", 1479 | artist_id 1480 | FROM 1481 | albums 1482 | ), 1483 | table_0 AS ( 1484 | SELECT 1485 | "AA", 1486 | artist_id 1487 | FROM 1488 | table_1 1489 | WHERE 1490 | "AA" >= 25 1491 | ) 1492 | SELECT 1493 | table_0."AA", 1494 | table_0.artist_id, 1495 | artists.* 1496 | FROM 1497 | table_0 1498 | INNER JOIN artists ON table_0.artist_id = artists.artist_id 1499 | ORDER BY 1500 | table_0."AA" 1501 | `, 1502 | }, 1503 | { 1504 | name: "sort_alias_inline_sources", 1505 | prql: ` 1506 | from [{track_id=0, album_id=1, genre_id=2}] 1507 | select { AA=track_id, album_id, genre_id } 1508 | sort AA 1509 | join side:left [{album_id=1, album_title="Songs"}] (==album_id) 1510 | select { AA, AT = album_title ?? "unknown", genre_id } 1511 | filter AA < 25 1512 | join side:left [{genre_id=1, genre_title="Rock"}] (==genre_id) 1513 | select { AA, AT, GT = genre_title ?? "unknown" } 1514 | `, 1515 | wantSQL: ` 1516 | WITH table_0 AS ( 1517 | SELECT 1518 | 0 AS track_id, 1519 | 1 AS album_id, 1520 | 2 AS genre_id 1521 | ), 1522 | table_5 AS ( 1523 | SELECT 1524 | track_id AS "AA", 1525 | genre_id, 1526 | album_id 1527 | FROM 1528 | table_0 1529 | ), 1530 | table_1 AS ( 1531 | SELECT 1532 | 1 AS album_id, 1533 | 'Songs' AS album_title 1534 | ), 1535 | table_4 AS ( 1536 | SELECT 1537 | table_5."AA", 1538 | COALESCE(table_1.album_title, 'unknown') AS "AT", 1539 | table_5.genre_id 1540 | FROM 1541 | table_5 1542 | LEFT OUTER JOIN table_1 ON table_5.album_id = table_1.album_id 1543 | ), 1544 | table_3 AS ( 1545 | SELECT 1546 | "AA", 1547 | "AT", 1548 | genre_id 1549 | FROM 1550 | table_4 1551 | WHERE 1552 | "AA" < 25 1553 | ), 1554 | table_2 AS ( 1555 | SELECT 1556 | 1 AS genre_id, 1557 | 'Rock' AS genre_title 1558 | ) 1559 | SELECT 1560 | table_3."AA", 1561 | table_3."AT", 1562 | COALESCE(table_2.genre_title, 'unknown') AS "GT" 1563 | FROM 1564 | table_3 1565 | LEFT OUTER JOIN table_2 ON table_3.genre_id = table_2.genre_id 1566 | ORDER BY 1567 | table_3."AA" 1568 | `, 1569 | }, 1570 | } 1571 | 1572 | for _, tc := range cases { 1573 | tc := tc 1574 | t.Run(tc.name, func(t *testing.T) { 1575 | sql, err := Compile(tc.prql) 1576 | if err != nil { 1577 | t.Fatalf("Compile returned error: %v", err) 1578 | } 1579 | if got, want := normalize(sql), normalize(tc.wantSQL); got != want { 1580 | t.Fatalf("SQL mismatch for %s:\nwant:\n%s\n\ngot:\n%s", tc.name, want, got) 1581 | } 1582 | }) 1583 | } 1584 | } 1585 | 1586 | func normalize(s string) string { 1587 | return strings.Join(strings.Fields(strings.TrimSpace(s)), "") 1588 | } 1589 | -------------------------------------------------------------------------------- /internal/parser/parser.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/maxpert/gophrql/ast" 8 | ) 9 | 10 | // Parse converts PRQL source into an AST.Query. 11 | func Parse(src string) (*ast.Query, error) { 12 | tokens, err := Lex(src) 13 | if err != nil { 14 | return nil, err 15 | } 16 | p := &Parser{tokens: tokens} 17 | return p.parseQuery() 18 | } 19 | 20 | type Parser struct { 21 | tokens []Token 22 | pos int 23 | stopAtPipe bool 24 | } 25 | 26 | func (p *Parser) parseQuery() (*ast.Query, error) { 27 | p.skipNewlines() 28 | var target string 29 | var bindings []ast.Binding 30 | for { 31 | p.skipNewlines() 32 | if !p.peekIs(IDENT) { 33 | break 34 | } 35 | switch p.peek().Lit { 36 | case "target": 37 | p.next() 38 | if target != "" { 39 | return nil, fmt.Errorf("target already specified") 40 | } 41 | val, err := p.parseTargetValue() 42 | if err != nil { 43 | return nil, err 44 | } 45 | target = val 46 | p.skipToLineEnd() 47 | case "let": 48 | p.next() 49 | binding, ok, err := p.parseLetBinding() 50 | if err != nil { 51 | return nil, err 52 | } 53 | if ok { 54 | bindings = append(bindings, binding) 55 | } 56 | p.skipNewlines() 57 | default: 58 | goto beginQuery 59 | } 60 | } 61 | 62 | beginQuery: 63 | p.skipNewlines() 64 | if p.peekIs(IDENT) && p.peek().Lit == "from" { 65 | p.next() 66 | } else if p.peekIs(IDENT) && (p.peek().Lit == "from_text" || p.peek().Lit == "s") { 67 | // handled in parseSource 68 | } else { 69 | return nil, fmt.Errorf("query must start with 'from'") 70 | } 71 | source, err := p.parseSource() 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | var steps []ast.Step 77 | for !p.peekIs(EOF) { 78 | p.skipNewlines() 79 | if p.peekIs(EOF) { 80 | break 81 | } 82 | 83 | switch tok := p.peek(); tok.Typ { 84 | case IDENT: 85 | switch tok.Lit { 86 | case "filter": 87 | p.next() 88 | step, err := p.parseFilter() 89 | if err != nil { 90 | return nil, err 91 | } 92 | steps = append(steps, step) 93 | case "derive": 94 | p.next() 95 | step, err := p.parseDerive() 96 | if err != nil { 97 | return nil, err 98 | } 99 | steps = append(steps, step) 100 | case "select": 101 | p.next() 102 | step, err := p.parseSelect() 103 | if err != nil { 104 | return nil, err 105 | } 106 | steps = append(steps, step) 107 | case "aggregate": 108 | p.next() 109 | step, err := p.parseAggregate() 110 | if err != nil { 111 | return nil, err 112 | } 113 | steps = append(steps, step) 114 | case "window": 115 | // Skip window block for now (handled downstream). 116 | p.next() 117 | p.skipNewlines() 118 | if p.peekIs(LPAREN) { 119 | p.next() 120 | p.collectUntilMatching(RPAREN) 121 | } 122 | case "take": 123 | p.next() 124 | step, err := p.parseTake() 125 | if err != nil { 126 | return nil, err 127 | } 128 | steps = append(steps, step) 129 | case "append": 130 | p.next() 131 | step, err := p.parseAppend() 132 | if err != nil { 133 | return nil, err 134 | } 135 | steps = append(steps, step) 136 | case "remove": 137 | p.next() 138 | step, err := p.parseRemove() 139 | if err != nil { 140 | return nil, err 141 | } 142 | steps = append(steps, step) 143 | case "group": 144 | p.next() 145 | step, err := p.parseGroup() 146 | if err != nil { 147 | return nil, err 148 | } 149 | steps = append(steps, step) 150 | case "loop": 151 | p.next() 152 | step, err := p.parseLoop() 153 | if err != nil { 154 | return nil, err 155 | } 156 | steps = append(steps, step) 157 | case "join": 158 | p.next() 159 | step, err := p.parseJoin() 160 | if err != nil { 161 | return nil, err 162 | } 163 | steps = append(steps, step) 164 | case "distinct": 165 | p.next() 166 | steps = append(steps, &ast.DistinctStep{}) 167 | case "sort": 168 | p.next() 169 | step, err := p.parseSort() 170 | if err != nil { 171 | return nil, err 172 | } 173 | steps = append(steps, step) 174 | default: 175 | return nil, fmt.Errorf("unexpected token %q", tok.Lit) 176 | } 177 | case NEWLINE: 178 | p.next() 179 | default: 180 | return nil, fmt.Errorf("unexpected token %v at pos %d", tok, p.pos) 181 | } 182 | } 183 | 184 | return &ast.Query{ 185 | From: source, 186 | Steps: steps, 187 | Target: target, 188 | Bindings: bindings, 189 | }, nil 190 | } 191 | 192 | func (p *Parser) parseTargetValue() (string, error) { 193 | p.skipNewlines() 194 | if !p.peekIs(IDENT) { 195 | return "", fmt.Errorf("expected identifier after target") 196 | } 197 | var parts []string 198 | parts = append(parts, p.next().Lit) 199 | for p.peekIs(DOT) { 200 | p.next() 201 | if !p.peekIs(IDENT) { 202 | return "", fmt.Errorf("expected identifier after '.' in target") 203 | } 204 | parts = append(parts, p.next().Lit) 205 | } 206 | return strings.ToLower(strings.Join(parts, ".")), nil 207 | } 208 | 209 | func (p *Parser) parseLetBinding() (ast.Binding, bool, error) { 210 | p.skipNewlines() 211 | if !p.peekIs(IDENT) { 212 | return ast.Binding{}, false, fmt.Errorf("expected identifier after let") 213 | } 214 | name := p.next().Lit 215 | if !p.peekIs(EQUAL) { 216 | return ast.Binding{}, false, fmt.Errorf("expected '=' in let binding") 217 | } 218 | p.next() 219 | p.skipNewlines() 220 | if !p.peekIs(LPAREN) { 221 | p.skipLetRemainder() 222 | return ast.Binding{}, false, nil 223 | } 224 | p.next() 225 | p.skipNewlines() 226 | if !p.peekIs(IDENT) { 227 | p.collectUntilMatching(RPAREN) 228 | return ast.Binding{}, false, nil 229 | } 230 | head := p.peek().Lit 231 | if head != "from" && head != "from_text" && !strings.HasPrefix(head, "s\"") { 232 | p.collectUntilMatching(RPAREN) 233 | return ast.Binding{}, false, nil 234 | } 235 | subTokens := p.collectUntilMatching(RPAREN) 236 | subParser := &Parser{tokens: subTokens} 237 | subQuery, err := subParser.parseQuery() 238 | if err != nil { 239 | return ast.Binding{}, false, err 240 | } 241 | return ast.Binding{Name: name, Query: subQuery}, true, nil 242 | } 243 | 244 | func (p *Parser) skipLetRemainder() { 245 | depth := 0 246 | for !p.peekIs(EOF) { 247 | tok := p.next() 248 | switch tok.Typ { 249 | case LPAREN, LBRACE, LBRACKET: 250 | depth++ 251 | case RPAREN, RBRACE, RBRACKET: 252 | if depth > 0 { 253 | depth-- 254 | } 255 | case NEWLINE: 256 | if depth == 0 { 257 | return 258 | } 259 | } 260 | } 261 | } 262 | 263 | func (p *Parser) parseSource() (ast.Source, error) { 264 | p.skipNewlines() 265 | if inline, ok, err := p.parseInlineRowsSource(); ok || err != nil { 266 | return inline, err 267 | } 268 | if p.peekIs(LPAREN) { 269 | p.next() 270 | expr, err := p.parseExpr(0) 271 | if err != nil { 272 | return ast.Source{}, err 273 | } 274 | if !p.peekIs(RPAREN) { 275 | return ast.Source{}, fmt.Errorf("expected ) after inline source") 276 | } 277 | p.next() 278 | if call, ok := expr.(*ast.Call); ok { 279 | if name := exprToIdent(call.Func); name == "read_csv" && len(call.Args) == 1 { 280 | if lit, ok := call.Args[0].(*ast.StringLit); ok { 281 | path := strings.ReplaceAll(lit.Value, "'", "''") 282 | table := fmt.Sprintf("SELECT\n *\n FROM\n read_csv('%s')", path) 283 | return ast.Source{Table: table}, nil 284 | } 285 | } 286 | } 287 | return ast.Source{}, fmt.Errorf("unsupported inline source") 288 | } 289 | 290 | tok := p.next() 291 | if tok.Typ == IDENT && strings.HasPrefix(tok.Lit, "from_text") { 292 | // from_text format:json '...' 293 | if !p.peekIs(IDENT) { 294 | return ast.Source{}, fmt.Errorf("from_text expects format") 295 | } 296 | format := p.next().Lit 297 | if !strings.Contains(strings.ToLower(format), "json") { 298 | return ast.Source{}, fmt.Errorf("from_text only supports json in this stub") 299 | } 300 | if !p.peekIs(STRING) { 301 | return ast.Source{}, fmt.Errorf("from_text expects string literal") 302 | } 303 | raw := p.next().Lit 304 | rows, err := parseJSONTable(raw) 305 | if err != nil { 306 | return ast.Source{}, err 307 | } 308 | return ast.Source{Rows: rows}, nil 309 | } 310 | if tok.Typ == IDENT && tok.Lit == "s" && p.peekIs(STRING) { 311 | sql := p.next().Lit 312 | return ast.Source{Table: sql}, nil 313 | } 314 | if tok.Typ == IDENT && strings.HasPrefix(tok.Lit, "s\"") { 315 | inner := strings.Trim(tok.Lit, "s\"") 316 | return ast.Source{Table: "SELECT " + strings.TrimPrefix(inner, "SELECT ")}, nil 317 | } 318 | if tok.Typ != IDENT { 319 | return ast.Source{}, fmt.Errorf("expected source after from, got %v", tok) 320 | } 321 | if p.peekIs(EQUAL) { 322 | alias := tok.Lit 323 | p.next() 324 | srcTok := p.next() 325 | table := srcTok.Lit 326 | if srcTok.Typ == IDENT && strings.HasPrefix(srcTok.Lit, "s\"") { 327 | inner := strings.Trim(srcTok.Lit, "s\"") 328 | table = "SELECT " + strings.TrimPrefix(inner, "SELECT ") 329 | } 330 | return ast.Source{Table: fmt.Sprintf("%s AS %s", table, alias)}, nil 331 | } 332 | return ast.Source{Table: tok.Lit}, nil 333 | } 334 | 335 | func (p *Parser) parseInlineRowsSource() (ast.Source, bool, error) { 336 | p.skipNewlines() 337 | if !p.peekIs(LBRACKET) { 338 | return ast.Source{}, false, nil 339 | } 340 | p.next() 341 | var rows []ast.InlineRow 342 | for { 343 | p.skipNewlines() 344 | if p.peekIs(RBRACE) { 345 | p.next() 346 | continue 347 | } 348 | if p.peekIs(RBRACKET) { 349 | p.next() 350 | break 351 | } 352 | if !p.peekIs(LBRACE) { 353 | return ast.Source{}, false, fmt.Errorf("expected { in inline rows") 354 | } 355 | p.next() 356 | rec, err := p.parseRecord() 357 | if err != nil { 358 | return ast.Source{}, false, err 359 | } 360 | rows = append(rows, rec) 361 | p.skipNewlines() 362 | if p.peekIs(COMMA) { 363 | p.next() 364 | } 365 | p.skipNewlines() 366 | if p.peekIs(RBRACKET) { 367 | p.next() 368 | break 369 | } 370 | } 371 | return ast.Source{Rows: rows}, true, nil 372 | } 373 | 374 | func (p *Parser) parseLoop() (ast.Step, error) { 375 | p.skipNewlines() 376 | hasParens := false 377 | if p.peekIs(LPAREN) { 378 | hasParens = true 379 | p.next() 380 | } 381 | prev := p.stopAtPipe 382 | p.stopAtPipe = true 383 | defer func() { p.stopAtPipe = prev }() 384 | var steps []ast.Step 385 | for { 386 | p.skipNewlines() 387 | if hasParens { 388 | if p.peekIs(RPAREN) { 389 | p.next() 390 | break 391 | } 392 | if p.peekIs(EOF) { 393 | return nil, fmt.Errorf("unterminated loop body") 394 | } 395 | } else if p.peekIs(EOF) { 396 | break 397 | } 398 | if !p.peekIs(IDENT) { 399 | return nil, fmt.Errorf("unexpected token %v in loop", p.peek()) 400 | } 401 | switch p.peek().Lit { 402 | case "filter": 403 | p.next() 404 | step, err := p.parseFilter() 405 | if err != nil { 406 | return nil, err 407 | } 408 | steps = append(steps, step) 409 | case "derive": 410 | p.next() 411 | step, err := p.parseDerive() 412 | if err != nil { 413 | return nil, err 414 | } 415 | steps = append(steps, step) 416 | case "select": 417 | p.next() 418 | step, err := p.parseSelect() 419 | if err != nil { 420 | return nil, err 421 | } 422 | steps = append(steps, step) 423 | case "sort": 424 | p.next() 425 | step, err := p.parseSort() 426 | if err != nil { 427 | return nil, err 428 | } 429 | steps = append(steps, step) 430 | default: 431 | return nil, fmt.Errorf("unsupported statement %q in loop", p.peek().Lit) 432 | } 433 | p.skipNewlines() 434 | if hasParens && p.peekIs(PIPE) { 435 | p.next() 436 | } 437 | } 438 | return &ast.LoopStep{Body: steps}, nil 439 | } 440 | 441 | func (p *Parser) parseFilter() (ast.Step, error) { 442 | p.skipNewlines() 443 | expr, err := p.parseExpr(0) 444 | if err != nil { 445 | return nil, err 446 | } 447 | p.skipToLineEnd() 448 | return &ast.FilterStep{Expr: expr}, nil 449 | } 450 | 451 | func (p *Parser) parseDerive() (ast.Step, error) { 452 | p.skipNewlines() 453 | var assigns []ast.Assignment 454 | if p.peekIs(LBRACE) { 455 | p.next() // consume { 456 | for { 457 | p.skipNewlines() 458 | if p.peekIs(RBRACE) { 459 | p.next() 460 | break 461 | } 462 | assign, err := p.parseAssignment() 463 | if err != nil { 464 | return nil, err 465 | } 466 | assigns = append(assigns, assign) 467 | if p.peekIs(COMMA) { 468 | p.next() 469 | } 470 | p.skipNewlines() 471 | } 472 | } else { 473 | assign, err := p.parseAssignment() 474 | if err != nil { 475 | return nil, err 476 | } 477 | assigns = append(assigns, assign) 478 | } 479 | p.skipToLineEnd() 480 | return &ast.DeriveStep{Assignments: assigns}, nil 481 | } 482 | 483 | func (p *Parser) parseAssignment() (ast.Assignment, error) { 484 | if !p.peekIs(IDENT) { 485 | return ast.Assignment{}, fmt.Errorf("expected identifier in assignment") 486 | } 487 | name := p.next().Lit 488 | if !p.peekIs(EQUAL) { 489 | return ast.Assignment{}, fmt.Errorf("expected = in assignment") 490 | } 491 | p.next() 492 | expr, err := p.parseExpr(0) 493 | if err != nil { 494 | return ast.Assignment{}, err 495 | } 496 | return ast.Assignment{Name: name, Expr: expr}, nil 497 | } 498 | 499 | func (p *Parser) parseRecord() (ast.InlineRow, error) { 500 | var fields []ast.Field 501 | for { 502 | p.skipNewlines() 503 | if p.peekIs(RBRACE) { 504 | p.next() 505 | break 506 | } 507 | if !p.peekIs(IDENT) { 508 | return ast.InlineRow{}, fmt.Errorf("expected field name in record") 509 | } 510 | key := p.next().Lit 511 | if !p.peekIs(EQUAL) { 512 | return ast.InlineRow{}, fmt.Errorf("expected = after field name") 513 | } 514 | p.next() 515 | val, err := p.parseExpr(0) 516 | if err != nil { 517 | return ast.InlineRow{}, err 518 | } 519 | fields = append(fields, ast.Field{Name: key, Expr: val}) 520 | p.skipNewlines() 521 | if p.peekIs(COMMA) { 522 | p.next() 523 | } 524 | } 525 | return ast.InlineRow{Fields: fields}, nil 526 | } 527 | 528 | func (p *Parser) parseSelect() (ast.Step, error) { 529 | p.skipNewlines() 530 | var items []ast.SelectItem 531 | if p.peekIs(LBRACE) { 532 | p.next() 533 | for { 534 | p.skipNewlines() 535 | if p.peekIs(RBRACE) { 536 | p.next() 537 | break 538 | } 539 | item, err := p.parseSelectItem() 540 | if err != nil { 541 | return nil, err 542 | } 543 | items = append(items, item) 544 | if p.peekIs(COMMA) { 545 | p.next() 546 | } 547 | p.skipNewlines() 548 | } 549 | } else { 550 | item, err := p.parseSelectItem() 551 | if err != nil { 552 | return nil, err 553 | } 554 | items = append(items, item) 555 | } 556 | p.skipToLineEnd() 557 | return &ast.SelectStep{Items: items}, nil 558 | } 559 | 560 | func (p *Parser) parseSelectItem() (ast.SelectItem, error) { 561 | expr, err := p.parseExpr(0) 562 | if err != nil { 563 | return ast.SelectItem{}, err 564 | } 565 | alias := "" 566 | if p.peekIs(EQUAL) { 567 | p.next() 568 | rhs, err := p.parseExpr(0) 569 | if err != nil { 570 | return ast.SelectItem{}, err 571 | } 572 | id, ok := expr.(*ast.Ident) 573 | if !ok { 574 | return ast.SelectItem{}, fmt.Errorf("left side of assignment must be identifier") 575 | } 576 | alias = strings.Join(id.Parts, ".") 577 | expr = rhs 578 | } 579 | if p.peekIs(IDENT) && p.peek().Lit == "as" { 580 | p.next() 581 | if !p.peekIs(IDENT) { 582 | return ast.SelectItem{}, fmt.Errorf("expected alias after as") 583 | } 584 | alias = p.next().Lit 585 | } 586 | return ast.SelectItem{Expr: expr, As: alias}, nil 587 | } 588 | 589 | func (p *Parser) parseAggregate() (ast.Step, error) { 590 | p.skipNewlines() 591 | if !p.peekIs(LBRACE) { 592 | var name string 593 | if p.peekIs(IDENT) && p.peekN(1).Typ == EQUAL { 594 | name = p.next().Lit 595 | p.next() 596 | } 597 | item, err := p.parseAggregateItem() 598 | if err != nil { 599 | return nil, err 600 | } 601 | if name != "" { 602 | item.As = name 603 | } 604 | p.skipToLineEnd() 605 | return &ast.AggregateStep{Items: []ast.AggregateItem{item}}, nil 606 | } 607 | p.next() 608 | var items []ast.AggregateItem 609 | for { 610 | p.skipNewlines() 611 | if p.peekIs(RBRACE) { 612 | p.next() 613 | break 614 | } 615 | if p.peekIs(IDENT) && p.peekN(1).Typ == EQUAL { 616 | name := p.next().Lit 617 | p.next() 618 | item, err := p.parseAggregateItem() 619 | if err != nil { 620 | return nil, err 621 | } 622 | item.As = name 623 | items = append(items, item) 624 | } else { 625 | item, err := p.parseAggregateItem() 626 | if err != nil { 627 | return nil, err 628 | } 629 | items = append(items, item) 630 | } 631 | if p.peekIs(COMMA) { 632 | p.next() 633 | } 634 | p.skipNewlines() 635 | } 636 | p.skipToLineEnd() 637 | return &ast.AggregateStep{Items: items}, nil 638 | } 639 | 640 | func (p *Parser) parseAggregateItem() (ast.AggregateItem, error) { 641 | funcExpr, err := p.parseExpr(0) 642 | if err != nil { 643 | return ast.AggregateItem{}, err 644 | } 645 | call, ok := funcExpr.(*ast.Call) 646 | if !ok { 647 | if pipe, ok := funcExpr.(*ast.Pipe); ok { 648 | call = &ast.Call{Func: pipe.Func, Args: append([]ast.Expr{pipe.Input}, pipe.Args...)} 649 | } else { 650 | return ast.AggregateItem{}, fmt.Errorf("aggregate item must be a function call") 651 | } 652 | } 653 | fnName := exprToIdent(call.Func) 654 | alias := "" 655 | if p.peekIs(IDENT) && p.peek().Lit == "as" { 656 | p.next() 657 | if !p.peekIs(IDENT) { 658 | return ast.AggregateItem{}, fmt.Errorf("expected alias after as") 659 | } 660 | alias = p.next().Lit 661 | } 662 | arg := ast.Expr(nil) 663 | if len(call.Args) > 0 { 664 | arg = call.Args[0] 665 | } 666 | args := append([]ast.Expr{}, call.Args...) 667 | return ast.AggregateItem{Func: fnName, Arg: arg, Args: args, As: alias}, nil 668 | } 669 | 670 | func (p *Parser) parseTake() (ast.Step, error) { 671 | p.skipNewlines() 672 | // range form: number .. number 673 | if p.peekIs(NUMBER) && p.peekN(1).Typ == RANGE { 674 | start := p.next().Lit 675 | if strings.Contains(start, ".") { 676 | return nil, fmt.Errorf("`take` expected int or range, but found %s", start) 677 | } 678 | p.next() // .. 679 | if !p.peekIs(NUMBER) { 680 | return nil, fmt.Errorf("expected end of range") 681 | } 682 | end := p.next().Lit 683 | if strings.Contains(end, ".") { 684 | return nil, fmt.Errorf("`take` expected int or range, but found %s", end) 685 | } 686 | startInt := atoi(start) 687 | endInt := atoi(end) 688 | limit := endInt - startInt + 1 689 | offset := startInt - 1 690 | p.skipToLineEnd() 691 | return &ast.TakeStep{Limit: limit, Offset: offset}, nil 692 | } 693 | 694 | if !p.peekIs(NUMBER) { 695 | return nil, fmt.Errorf("take expects number or range") 696 | } 697 | lit := p.next().Lit 698 | if strings.Contains(lit, ".") { 699 | return nil, fmt.Errorf("`take` expected int or range, but found %s", lit) 700 | } 701 | limit := atoi(lit) 702 | p.skipToLineEnd() 703 | return &ast.TakeStep{Limit: limit}, nil 704 | } 705 | 706 | func (p *Parser) parseAppend() (ast.Step, error) { 707 | p.skipNewlines() 708 | if !p.peekIs(LPAREN) { 709 | return nil, fmt.Errorf("append expects '('") 710 | } 711 | p.next() 712 | subTokens := p.collectUntilMatching(RPAREN) 713 | subParser := &Parser{tokens: subTokens} 714 | subQuery, err := subParser.parseQuery() 715 | if err != nil { 716 | return nil, err 717 | } 718 | return &ast.AppendStep{Query: subQuery}, nil 719 | } 720 | 721 | func (p *Parser) parseRemove() (ast.Step, error) { 722 | p.skipNewlines() 723 | if !p.peekIs(LPAREN) { 724 | return nil, fmt.Errorf("remove expects '('") 725 | } 726 | p.next() 727 | subTokens := p.collectUntilMatching(RPAREN) 728 | subParser := &Parser{tokens: subTokens} 729 | subQuery, err := subParser.parseQuery() 730 | if err != nil { 731 | return nil, err 732 | } 733 | return &ast.RemoveStep{Query: subQuery}, nil 734 | } 735 | 736 | func (p *Parser) parseGroup() (ast.Step, error) { 737 | p.skipNewlines() 738 | var keyExpr ast.Expr 739 | if p.peekIs(LBRACE) { 740 | p.next() 741 | var exprs []ast.Expr 742 | for { 743 | p.skipNewlines() 744 | if p.peekIs(RBRACE) { 745 | p.next() 746 | break 747 | } 748 | e, err := p.parseExpr(0) 749 | if err != nil { 750 | return nil, err 751 | } 752 | exprs = append(exprs, e) 753 | if p.peekIs(COMMA) { 754 | p.next() 755 | } 756 | } 757 | if len(exprs) == 1 { 758 | keyExpr = exprs[0] 759 | } else { 760 | keyExpr = &ast.Tuple{Exprs: exprs} 761 | } 762 | } else { 763 | if !p.peekIs(IDENT) { 764 | return nil, fmt.Errorf("group expects identifier key") 765 | } 766 | keyTok := p.next() 767 | if strings.HasSuffix(keyTok.Lit, ".") && p.peekIs(STAR) { 768 | keyTok.Lit = strings.TrimSuffix(keyTok.Lit, ".") 769 | p.next() 770 | } else if p.peekIs(DOT) && p.peekN(1).Typ == STAR { 771 | p.next() 772 | p.next() 773 | } 774 | keyExpr = &ast.Ident{Parts: strings.Split(keyTok.Lit, ".")} 775 | } 776 | p.skipNewlines() 777 | if !p.peekIs(LPAREN) { 778 | return nil, fmt.Errorf("group expects '(' block") 779 | } 780 | p.next() 781 | subTokens := p.collectUntilMatching(RPAREN) 782 | subParser := &Parser{tokens: subTokens} 783 | subQuery, err := subParser.parseGroupSteps() 784 | if err != nil { 785 | return nil, err 786 | } 787 | return &ast.GroupStep{ 788 | Key: keyExpr, 789 | Steps: subQuery, 790 | }, nil 791 | } 792 | 793 | func (p *Parser) parseGroupSteps() ([]ast.Step, error) { 794 | var steps []ast.Step 795 | for !p.peekIs(EOF) { 796 | p.skipNewlines() 797 | if p.peekIs(EOF) { 798 | break 799 | } 800 | if p.peekIs(PIPE) { 801 | p.next() 802 | continue 803 | } 804 | switch tok := p.peek(); tok.Typ { 805 | case IDENT: 806 | switch tok.Lit { 807 | case "filter": 808 | p.next() 809 | step, err := p.parseFilter() 810 | if err != nil { 811 | return nil, err 812 | } 813 | steps = append(steps, step) 814 | case "derive": 815 | p.next() 816 | step, err := p.parseDerive() 817 | if err != nil { 818 | return nil, err 819 | } 820 | steps = append(steps, step) 821 | case "select": 822 | p.next() 823 | step, err := p.parseSelect() 824 | if err != nil { 825 | return nil, err 826 | } 827 | steps = append(steps, step) 828 | case "aggregate": 829 | p.next() 830 | step, err := p.parseAggregate() 831 | if err != nil { 832 | return nil, err 833 | } 834 | steps = append(steps, step) 835 | case "window": 836 | // Skip window blocks within groups for now. 837 | p.next() 838 | p.skipNewlines() 839 | if p.peekIs(IDENT) && strings.Contains(p.peek().Lit, ":") { 840 | p.next() 841 | p.skipNewlines() 842 | } 843 | if p.peekIs(LPAREN) { 844 | p.next() 845 | p.collectUntilMatching(RPAREN) 846 | } 847 | case "take": 848 | p.next() 849 | step, err := p.parseTake() 850 | if err != nil { 851 | return nil, err 852 | } 853 | steps = append(steps, step) 854 | case "sort": 855 | p.next() 856 | step, err := p.parseSort() 857 | if err != nil { 858 | return nil, err 859 | } 860 | steps = append(steps, step) 861 | default: 862 | return nil, fmt.Errorf("unexpected token %q in group", tok.Lit) 863 | } 864 | case NEWLINE: 865 | p.next() 866 | default: 867 | return nil, fmt.Errorf("unexpected token %v in group at pos %d", tok, p.pos) 868 | } 869 | } 870 | return steps, nil 871 | } 872 | 873 | func (p *Parser) parseSort() (ast.Step, error) { 874 | p.skipNewlines() 875 | var items []ast.SortItem 876 | if p.peekIs(LBRACE) { 877 | p.next() 878 | for { 879 | p.skipNewlines() 880 | if p.peekIs(PIPE) { 881 | break 882 | } 883 | if p.peekIs(RBRACE) { 884 | p.next() 885 | break 886 | } 887 | item, err := p.parseSortItem() 888 | if err != nil { 889 | return nil, err 890 | } 891 | items = append(items, item) 892 | if p.peekIs(COMMA) { 893 | p.next() 894 | } 895 | p.skipNewlines() 896 | } 897 | } else { 898 | if p.peekIs(PIPE) { 899 | return &ast.SortStep{Items: items}, nil 900 | } 901 | item, err := p.parseSortItem() 902 | if err != nil { 903 | return nil, err 904 | } 905 | items = append(items, item) 906 | } 907 | return &ast.SortStep{Items: items}, nil 908 | } 909 | 910 | func (p *Parser) parseSortItem() (ast.SortItem, error) { 911 | desc := false 912 | if p.peekIs(MINUS) { 913 | p.next() 914 | desc = true 915 | } else if p.peekIs(PLUS) { 916 | p.next() 917 | } 918 | expr, err := p.parseExpr(0) 919 | if err != nil { 920 | return ast.SortItem{}, err 921 | } 922 | if p.peekIs(IDENT) && p.peek().Lit == "desc" { 923 | p.next() 924 | desc = true 925 | } 926 | return ast.SortItem{Expr: expr, Desc: desc}, nil 927 | } 928 | 929 | // Expression parsing (Pratt-style with limited operators). 930 | var precedences = map[TokenType]int{ 931 | OROR: 1, 932 | EQ: 2, 933 | REGEXEQ: 2, 934 | NEQ: 2, 935 | NULLCOAL: 2, 936 | RANGE: 2, 937 | LT: 3, 938 | GT: 3, 939 | LTE: 3, 940 | GTE: 3, 941 | PLUS: 4, 942 | MINUS: 4, 943 | STAR: 5, 944 | SLASH: 5, 945 | FLOORDIV: 5, 946 | PERCENT: 5, 947 | POW: 6, 948 | } 949 | 950 | func (p *Parser) parseExpr(precedence int) (ast.Expr, error) { 951 | p.skipNewlines() 952 | left, err := p.parsePrefix() 953 | if err != nil { 954 | return nil, err 955 | } 956 | 957 | for { 958 | if p.peekIs(EOF) || p.peekIs(NEWLINE) || p.peekIs(COMMA) || p.peekIs(RBRACE) || p.peekIs(RPAREN) || p.peekIs(RBRACKET) { 959 | break 960 | } 961 | 962 | // Pipe operator has low precedence; handle directly. 963 | if p.peekIs(PIPE) && !p.stopAtPipe { 964 | if precedence > 1 { 965 | break 966 | } 967 | p.next() 968 | fn, err := p.parsePrefix() 969 | if err != nil { 970 | return nil, err 971 | } 972 | var args []ast.Expr 973 | for p.canStartExpr(p.peek()) { 974 | arg, err := p.parsePrefix() 975 | if err != nil { 976 | return nil, err 977 | } 978 | args = append(args, arg) 979 | } 980 | if p.peekIs(RANGE) && len(args) > 0 { 981 | start := args[len(args)-1] 982 | p.next() 983 | right, err := p.parseExpr(precedences[RANGE] + 1) 984 | if err != nil { 985 | return nil, err 986 | } 987 | args[len(args)-1] = &ast.Binary{Op: "..", Left: start, Right: right} 988 | } 989 | left = &ast.Pipe{Input: left, Func: fn, Args: args} 990 | continue 991 | } 992 | 993 | // Function application by adjacency. 994 | if p.canStartExpr(p.peek()) && p.peek().Typ != MINUS { 995 | arg, err := p.parsePrefix() 996 | if err != nil { 997 | return nil, err 998 | } 999 | left = appendCallArg(left, arg) 1000 | continue 1001 | } 1002 | 1003 | op := p.peek() 1004 | opPrec, ok := precedences[op.Typ] 1005 | if !ok || opPrec < precedence { 1006 | break 1007 | } 1008 | p.next() 1009 | right, err := p.parseExpr(opPrec + 1) 1010 | if err != nil { 1011 | return nil, err 1012 | } 1013 | left = &ast.Binary{Op: op.Lit, Left: left, Right: right} 1014 | } 1015 | 1016 | return left, nil 1017 | } 1018 | 1019 | func (p *Parser) parsePrefix() (ast.Expr, error) { 1020 | tok := p.next() 1021 | switch tok.Typ { 1022 | case IDENT: 1023 | if tok.Lit == "case" && p.peekIs(LBRACKET) { 1024 | return p.parseCase() 1025 | } 1026 | if p.peekIs(DOT) && p.peekN(1).Typ == STAR { 1027 | p.next() 1028 | p.next() 1029 | return &ast.Ident{Parts: []string{tok.Lit, "*"}}, nil 1030 | } 1031 | return &ast.Ident{Parts: strings.Split(tok.Lit, ".")}, nil 1032 | case NUMBER: 1033 | return &ast.Number{Value: tok.Lit}, nil 1034 | case STRING: 1035 | return &ast.StringLit{Value: tok.Lit}, nil 1036 | case FSTRING: 1037 | return p.parseFString(tok.Lit) 1038 | case LPAREN: 1039 | expr, err := p.parseExpr(0) 1040 | if err != nil { 1041 | return nil, err 1042 | } 1043 | if !p.peekIs(RPAREN) { 1044 | return nil, fmt.Errorf("expected ) at pos %d", p.pos) 1045 | } 1046 | p.next() 1047 | return expr, nil 1048 | case MINUS: 1049 | if p.peekIs(NUMBER) { 1050 | num := p.next().Lit 1051 | return &ast.Number{Value: "-" + num}, nil 1052 | } 1053 | expr, err := p.parseExpr(precedences[MINUS]) 1054 | if err != nil { 1055 | return nil, err 1056 | } 1057 | return &ast.Binary{Op: "*", Left: &ast.Number{Value: "-1"}, Right: expr}, nil 1058 | default: 1059 | return nil, fmt.Errorf("unexpected token %v at pos %d", tok, p.pos-1) 1060 | } 1061 | } 1062 | 1063 | func (p *Parser) parseCase() (ast.Expr, error) { 1064 | p.next() // consume '[' 1065 | var branches []ast.CaseBranch 1066 | for { 1067 | p.skipNewlines() 1068 | if p.peekIs(RBRACKET) { 1069 | p.next() 1070 | break 1071 | } 1072 | cond, err := p.parseExpr(0) 1073 | if err != nil { 1074 | return nil, err 1075 | } 1076 | if !p.peekIs(ARROW) { 1077 | return nil, fmt.Errorf("expected => in case expression") 1078 | } 1079 | p.next() 1080 | val, err := p.parseExpr(0) 1081 | if err != nil { 1082 | return nil, err 1083 | } 1084 | branches = append(branches, ast.CaseBranch{Cond: cond, Value: val}) 1085 | p.skipNewlines() 1086 | if p.peekIs(COMMA) { 1087 | p.next() 1088 | } 1089 | } 1090 | return &ast.CaseExpr{Branches: branches}, nil 1091 | } 1092 | 1093 | // Helpers 1094 | func (p *Parser) peek() Token { 1095 | return p.tokens[p.pos] 1096 | } 1097 | 1098 | func (p *Parser) peekN(n int) Token { 1099 | if p.pos+n >= len(p.tokens) { 1100 | return Token{Typ: EOF} 1101 | } 1102 | return p.tokens[p.pos+n] 1103 | } 1104 | 1105 | func (p *Parser) peekIs(tt TokenType) bool { 1106 | return p.peek().Typ == tt 1107 | } 1108 | 1109 | func (p *Parser) next() Token { 1110 | t := p.tokens[p.pos] 1111 | p.pos++ 1112 | return t 1113 | } 1114 | 1115 | func (p *Parser) skipNewlines() { 1116 | for p.peekIs(NEWLINE) { 1117 | p.next() 1118 | } 1119 | } 1120 | 1121 | func (p *Parser) skipToLineEnd() { 1122 | for !p.peekIs(EOF) && !p.peekIs(NEWLINE) { 1123 | if p.stopAtPipe && (p.peekIs(PIPE) || p.peekIs(RPAREN)) { 1124 | break 1125 | } 1126 | p.next() 1127 | } 1128 | p.skipNewlines() 1129 | } 1130 | 1131 | func (p *Parser) matchIdent(lit string) bool { 1132 | if p.peekIs(IDENT) && p.peek().Lit == lit { 1133 | p.next() 1134 | return true 1135 | } 1136 | return false 1137 | } 1138 | 1139 | func (p *Parser) canStartExpr(tok Token) bool { 1140 | switch tok.Typ { 1141 | case IDENT, NUMBER, STRING, FSTRING, LPAREN, MINUS: 1142 | return true 1143 | default: 1144 | return false 1145 | } 1146 | } 1147 | 1148 | func (p *Parser) parseFString(lit string) (ast.Expr, error) { 1149 | var parts []ast.Expr 1150 | var sb strings.Builder 1151 | for i := 0; i < len(lit); i++ { 1152 | ch := lit[i] 1153 | if ch == '{' { 1154 | if i+1 < len(lit) && lit[i+1] == '{' { 1155 | sb.WriteByte('{') 1156 | i++ 1157 | continue 1158 | } 1159 | if sb.Len() > 0 { 1160 | parts = append(parts, &ast.StringLit{Value: sb.String()}) 1161 | sb.Reset() 1162 | } 1163 | i++ 1164 | start := i 1165 | depth := 1 1166 | for i < len(lit) && depth > 0 { 1167 | if lit[i] == '{' { 1168 | depth++ 1169 | } else if lit[i] == '}' { 1170 | depth-- 1171 | if depth == 0 { 1172 | break 1173 | } 1174 | } 1175 | i++ 1176 | } 1177 | if depth != 0 { 1178 | return nil, fmt.Errorf("unterminated expression in f-string") 1179 | } 1180 | exprStr := strings.TrimSpace(lit[start:i]) 1181 | if exprStr == "" { 1182 | return nil, fmt.Errorf("empty expression in f-string") 1183 | } 1184 | expr, err := parseExprFragment(exprStr) 1185 | if err != nil { 1186 | return nil, err 1187 | } 1188 | parts = append(parts, expr) 1189 | } else if ch == '}' { 1190 | if i+1 < len(lit) && lit[i+1] == '}' { 1191 | sb.WriteByte('}') 1192 | i++ 1193 | continue 1194 | } 1195 | return nil, fmt.Errorf("single } in f-string") 1196 | } else { 1197 | sb.WriteByte(ch) 1198 | } 1199 | } 1200 | if sb.Len() > 0 { 1201 | parts = append(parts, &ast.StringLit{Value: sb.String()}) 1202 | } 1203 | if len(parts) == 0 { 1204 | return &ast.StringLit{Value: ""}, nil 1205 | } 1206 | if len(parts) == 1 { 1207 | return parts[0], nil 1208 | } 1209 | return &ast.Call{ 1210 | Func: &ast.Ident{Parts: []string{"__concat__"}}, 1211 | Args: parts, 1212 | }, nil 1213 | } 1214 | 1215 | func parseExprFragment(src string) (ast.Expr, error) { 1216 | toks, err := Lex(src) 1217 | if err != nil { 1218 | return nil, err 1219 | } 1220 | parser := &Parser{tokens: toks} 1221 | expr, err := parser.parseExpr(0) 1222 | if err != nil { 1223 | return nil, err 1224 | } 1225 | parser.skipNewlines() 1226 | if !parser.peekIs(EOF) { 1227 | return nil, fmt.Errorf("unexpected token %v in f-string", parser.peek()) 1228 | } 1229 | return expr, nil 1230 | } 1231 | 1232 | func (p *Parser) collectUntilMatching(end TokenType) []Token { 1233 | var collected []Token 1234 | depth := 1 1235 | for { 1236 | tok := p.next() 1237 | if tok.Typ == EOF { 1238 | break 1239 | } 1240 | if tok.Typ == end { 1241 | depth-- 1242 | if depth == 0 { 1243 | break 1244 | } 1245 | } 1246 | if tok.Typ == LPAREN && end == RPAREN { 1247 | depth++ 1248 | } 1249 | if tok.Typ == LBRACE && end == RBRACE { 1250 | depth++ 1251 | } 1252 | collected = append(collected, tok) 1253 | } 1254 | collected = append(collected, Token{Typ: EOF}) 1255 | return collected 1256 | } 1257 | 1258 | func atoi(s string) int { 1259 | var n int 1260 | for _, r := range s { 1261 | n = n*10 + int(r-'0') 1262 | } 1263 | return n 1264 | } 1265 | 1266 | func exprToIdent(e ast.Expr) string { 1267 | if id, ok := e.(*ast.Ident); ok { 1268 | return strings.Join(id.Parts, ".") 1269 | } 1270 | return "" 1271 | } 1272 | 1273 | func appendCallArg(fn ast.Expr, arg ast.Expr) ast.Expr { 1274 | if call, ok := fn.(*ast.Call); ok { 1275 | return &ast.Call{Func: call.Func, Args: append(call.Args, arg)} 1276 | } 1277 | return &ast.Call{Func: fn, Args: []ast.Expr{arg}} 1278 | } 1279 | 1280 | func (p *Parser) parseJoin() (ast.Step, error) { 1281 | p.skipNewlines() 1282 | side := "inner" 1283 | if p.peekIs(IDENT) && strings.Contains(p.peek().Lit, "side:") { 1284 | side = strings.SplitN(p.next().Lit, ":", 2)[1] 1285 | } 1286 | p.skipNewlines() 1287 | var subQuery *ast.Query 1288 | if inline, ok, err := p.parseInlineRowsSource(); ok || err != nil { 1289 | if err != nil { 1290 | return nil, err 1291 | } 1292 | subQuery = &ast.Query{From: inline} 1293 | } else if p.peekIs(LPAREN) { 1294 | p.next() 1295 | subTokens := p.collectUntilMatching(RPAREN) 1296 | subParser := &Parser{tokens: subTokens} 1297 | q, err := subParser.parseQuery() 1298 | if err != nil { 1299 | return nil, err 1300 | } 1301 | subQuery = q 1302 | } else if p.peekIs(IDENT) { 1303 | table := p.next().Lit 1304 | if p.peekIs(EQUAL) { 1305 | alias := table 1306 | p.next() 1307 | if !p.peekIs(IDENT) { 1308 | return nil, fmt.Errorf("join expects table after alias") 1309 | } 1310 | tableTok := p.next() 1311 | table = fmt.Sprintf("%s AS %s", tableTok.Lit, alias) 1312 | } 1313 | subQuery = &ast.Query{From: ast.Source{Table: table}} 1314 | } else { 1315 | return nil, fmt.Errorf("join expects '(' source") 1316 | } 1317 | p.skipNewlines() 1318 | if p.peekIs(IDENT) && strings.Contains(p.peek().Lit, "side:") { 1319 | side = strings.SplitN(p.next().Lit, ":", 2)[1] 1320 | } 1321 | p.skipNewlines() 1322 | if !p.peekIs(LPAREN) { 1323 | return nil, fmt.Errorf("join expects '(' condition") 1324 | } 1325 | p.next() 1326 | condTokens := p.collectUntilMatching(RPAREN) 1327 | var cond ast.Expr 1328 | if len(condTokens) > 0 && condTokens[0].Typ == EQ && len(condTokens) > 1 && condTokens[1].Typ == IDENT { 1329 | name := condTokens[1].Lit 1330 | cond = &ast.Binary{ 1331 | Op: "==", 1332 | Left: &ast.Ident{Parts: []string{"this", name}}, 1333 | Right: &ast.Ident{ 1334 | Parts: []string{"that", name}, 1335 | }, 1336 | } 1337 | } else { 1338 | condParser := &Parser{tokens: condTokens} 1339 | var err error 1340 | cond, err = condParser.parseExpr(0) 1341 | if err != nil { 1342 | return nil, err 1343 | } 1344 | } 1345 | return &ast.JoinStep{Side: side, Query: subQuery, On: cond}, nil 1346 | } 1347 | --------------------------------------------------------------------------------