├── package.json
├── go.mod
├── .gitignore
├── scripts
    └── run_integration.sh
├── internal
    ├── parser
    │   ├── json_table.go
    │   ├── lexer.go
    │   └── parser.go
    └── sqlgen
    │   ├── dialect.go
    │   └── dialects.go
├── CODE_OF_CONDUCT.md
├── docs
    ├── PLAN.md
    ├── SNAPSHOTS_PLAN.md
    └── DIALECT_PLAN.md
├── compile_error_test.go
├── ast
    └── ast.go
├── dialect_test.go
├── AGENTS.md
├── examples
    └── mongo
    │   └── main.go
├── integration_test.go
├── gophrql.go
├── prql_integration_test.go
├── LICENSE
├── README.md
└── compile_test.go


/package.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/maxpert/gophrql
2 | 
3 | go 1.25.5
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | bin/
 2 | coverage*.out
 3 | *.log
 4 | *.swp
 5 | .DS_Store
 6 | vendor/
 7 | tmp/
 8 | 
 9 | 
10 | CLAUDE.md
11 | AGENTS.md


--------------------------------------------------------------------------------
/scripts/run_integration.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | # clones or updates `tmp/prql` from upstream and runs the Go integration tests that compare
 5 | # against the upstream SQL snapshots.
 6 | 
 7 | REPO_URL="https://github.com/PRQL/prql"
 8 | TARGET_DIR="tmp/prql"
 9 | 
10 | mkdir -p "$(dirname "$TARGET_DIR")"
11 | 
12 | if [ -d "$TARGET_DIR/.git" ]; then
13 |   echo "Updating existing upstream checkout at $TARGET_DIR"
14 |   git -C "$TARGET_DIR" fetch --all --prune
15 |   git -C "$TARGET_DIR" reset --hard origin/main
16 | else
17 |   echo "Cloning upstream repository into $TARGET_DIR"
18 |   git clone "$REPO_URL" "$TARGET_DIR"
19 | fi
20 | 
21 | echo "Running Go integration suite"
22 | env GOCACHE=/tmp/go-build go test ./...
23 | 


--------------------------------------------------------------------------------
/internal/parser/json_table.go:
--------------------------------------------------------------------------------
 1 | package parser
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 
 7 | 	"github.com/maxpert/gophrql/ast"
 8 | )
 9 | 
10 | func parseJSONTable(raw string) ([]ast.InlineRow, error) {
11 | 	var payload struct {
12 | 		Columns []string            `json:"columns"`
13 | 		Data    [][]json.RawMessage `json:"data"`
14 | 	}
15 | 	if err := json.Unmarshal([]byte(raw), &payload); err != nil {
16 | 		return nil, fmt.Errorf("invalid json: %w", err)
17 | 	}
18 | 	var rows []ast.InlineRow
19 | 	for _, row := range payload.Data {
20 | 		var fields []ast.Field
21 | 		for i, col := range payload.Columns {
22 | 			if i >= len(row) {
23 | 				continue
24 | 			}
25 | 			var num json.Number
26 | 			if err := json.Unmarshal(row[i], &num); err == nil {
27 | 				fields = append(fields, ast.Field{Name: col, Expr: &ast.Number{Value: num.String()}})
28 | 				continue
29 | 			}
30 | 			var str string
31 | 			if err := json.Unmarshal(row[i], &str); err == nil {
32 | 				fields = append(fields, ast.Field{Name: col, Expr: &ast.StringLit{Value: str}})
33 | 				continue
34 | 			}
35 | 		}
36 | 		rows = append(rows, ast.InlineRow{Fields: fields})
37 | 	}
38 | 	return rows, nil
39 | }
40 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to a positive environment for our community include:
10 | - Demonstrating empathy and kindness toward other people.
11 | - Being respectful of differing opinions, viewpoints, and experiences.
12 | - Gracefully accepting constructive criticism.
13 | - Focusing on what is best for the community.
14 | - Showing courtesy and patience.
15 | 
16 | Examples of unacceptable behavior include:
17 | - The use of sexualized language or imagery and unwelcome sexual attention or advances.
18 | - Trolling, insulting or harassing comments, or inappropriate jokes.
19 | - Public or private harassment.
20 | - Publishing private information about a person without explicit permission.
21 | - Any other conduct which could reasonably be considered inappropriate in a professional setting.
22 | 
23 | ## Enforcement
24 | 
25 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project maintainers at `contact@gophrql.org`. All complaints will be reviewed and investigated promptly and fairly.
26 | 
27 | Project maintainers are obligated to:
28 | - Take all complaints seriously and respond promptly.
29 | - Ensure confidentiality of the complainant and the accused.
30 | - Take appropriate corrective action, which may include a warning, temporary ban, or permanent ban from the project.
31 | 
32 | ## Attribution
33 | 
34 | This Code of Conduct is adapted from the Contributor Covenant, version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
35 | 


--------------------------------------------------------------------------------
/internal/sqlgen/dialect.go:
--------------------------------------------------------------------------------
 1 | package sqlgen
 2 | 
 3 | import (
 4 | 	"strings"
 5 | )
 6 | 
 7 | type DialectType string
 8 | 
 9 | const (
10 | 	DialectGeneric    DialectType = "sql.generic"
11 | 	DialectPostgres   DialectType = "sql.postgres"
12 | 	DialectSQLite     DialectType = "sql.sqlite"
13 | 	DialectDuckDB     DialectType = "sql.duckdb"
14 | 	DialectMySQL      DialectType = "sql.mysql"
15 | 	DialectMSSQL      DialectType = "sql.mssql"
16 | 	DialectClickHouse DialectType = "sql.clickhouse"
17 | 	DialectBigQuery   DialectType = "sql.bigquery"
18 | 	DialectSnowflake  DialectType = "sql.snowflake"
19 | )
20 | 
21 | // Dialect defines the capabilities and syntax variations for a SQL target.
22 | type Dialect struct {
23 | 	Type DialectType
24 | 
25 | 	// Identifier quoting
26 | 	IdentQuoteChar byte // 0 for no quoting/default
27 | 
28 | 	// Limit/Offset handling
29 | 	UseTopClause      bool // SELECT TOP N ...
30 | 	UseLimitOffset    bool // LIMIT N OFFSET M
31 | 	UseLimitComma     bool // LIMIT M, N (MySQL style)
32 | 	OffsetFetchSyntax bool // OFFSET M ROWS FETCH NEXT N ROWS ONLY
33 | 
34 | 	// Function mapping overrides
35 | 	// key: PRQL function name (e.g. "math.round")
36 | 	// value: SQL pattern (e.g. "ROUND(%s, %s)")
37 | 	Functions map[string]string
38 | }
39 | 
40 | // DefaultDialect is the generic dialect (Postgres-like).
41 | var DefaultDialect = &Dialect{
42 | 	Type:           DialectGeneric,
43 | 	IdentQuoteChar: '"',
44 | 	UseLimitOffset: true,
45 | 	Functions:      map[string]string{},
46 | }
47 | 
48 | func (d *Dialect) QuoteIdent(s string) string {
49 | 	if s == "*" {
50 | 		return "*"
51 | 	}
52 | 	// If the identifier is safe, don't quote it (unless forced?)
53 | 	// We assume generic dialect prefers cleaner SQL like standard PRQL compiler
54 | 	if isSafeIdent(s) {
55 | 		return s
56 | 	}
57 | 
58 | 	q := d.IdentQuoteChar
59 | 	if q == 0 {
60 | 		return s
61 | 	}
62 | 	// Simple escaping: duplicate the quote character
63 | 	escaped := strings.ReplaceAll(s, string(q), string(q)+string(q))
64 | 	return string(q) + escaped + string(q)
65 | }
66 | 


--------------------------------------------------------------------------------
/docs/PLAN.md:
--------------------------------------------------------------------------------
 1 | gophrql Plan
 2 | ============
 3 | 
 4 | Reference review (Rust `prql`)
 5 | ------------------------------
 6 | - Core tests live in `prqlc/prqlc/tests/integration`: snapshot suites for lexing, formatting round-trips, PRQL → SQL compilation (generic + per-dialect diffs), lineage debug output, and db-backed execution (`results` gated by feature flags).
 7 | - Error coverage is in `error_messages.rs` and `bad_error_messages.rs`, asserting precise diagnostics for arity errors, unknown names, type mismatches, dialect constraints, and malformed input.
 8 | - Additional unit suites (`sql.rs`) exercise stdlib modules (math, text), dialect-specific SQL lowering, and feature toggles.
 9 | 
10 | Go test strategy (write first)
11 | ------------------------------
12 | - Port a representative slice of Rust snapshots as table-driven tests asserting PRQL → SQL (generic dialect) for: aggregation, unions/append, window functions, stdlib math/text helpers, and pipeline transforms.
13 | - Add negative tests asserting error surfaces for empty queries, missing `from`, too many args, bad types (`take 1.8`), and unknown/ambiguous names. Match key substrings to allow formatting differences while keeping semantics strict.
14 | - Future: dialect-specific fixtures (e.g., SQLite vs MSSQL concatenation) and formatting round-trips once formatting API exists.
15 | 
16 | Implementation roadmap (bottom-up)
17 | ----------------------------------
18 | 1) Front-end: lexer + parser aligned to PRQL grammar; build an AST that preserves spans for diagnostics and supports module references.
19 | 2) Semantic analysis: name resolution, type checking, pipeline validation, stdlib catalog, and user-defined functions; produce enriched IR.
20 | 3) SQL planner: relational lowering (projections, joins, windows, set ops), CTE management, column aliasing, and deterministic ordering.
21 | 4) Dialect layer: target abstraction for operators/functions, identifiers, limits/offsets, regex, date formatting, and string concat; start with Generic, then SQLite/Postgres/MySQL/MSSQL.
22 | 5) Formatting + tooling: PRQL formatter, lineage/introspection outputs, and richer error rendering (codes, spans, hints).
23 | 6) Execution harness: optional db-backed golden tests mirroring Rust `results` suite; add CLI/sample programs under `examples/`.
24 | 
25 | Notes
26 | -----
27 | - Keep fixtures in `testdata/` with PRQL and expected SQL/error text to mirror upstream organization.
28 | - Maintain parity notes in `AGENTS.md` for any intentional deviations from the Rust behavior or the PRQL book.
29 | 


--------------------------------------------------------------------------------
/docs/SNAPSHOTS_PLAN.md:
--------------------------------------------------------------------------------
 1 | PRQL Integration Snapshot Coverage
 2 | ==================================
 3 | 
 4 | Reference snapshots live in `tmp/prql/prqlc/prqlc/tests/integration/queries`. This table tracks which fixtures already have Go tests in `compile_test.go` (under `TestCompileSnapshots`) and which ones we still need to port.
 5 | 
 6 | | Snapshot file | Status | Notes / matching Go test |
 7 | | --- | --- | --- |
 8 | | `aggregation.prql` | Done | `TestCompileSnapshots/aggregation` |
 9 | | `append_select_compute.prql` | Done | `.../append_select_compute` |
10 | | `append_select_multiple_with_null.prql` | Done | `.../append_select_multiple_with_null` |
11 | | `append_select_nulls.prql` | Done | `.../append_select_nulls` |
12 | | `append_select_simple.prql` | Done | `.../append_select_simple` |
13 | | `append_select.prql` | Done | `.../append_select_union` |
14 | | `arithmetic.prql` | Done | `.../arithmetic_div_mod` |
15 | | `cast.prql` | Done | `.../cast_projection` |
16 | | `constants_only.prql` | Done | `.../constants_only` |
17 | | `date_to_text.prql` | Done | `.../date_to_text_formats` |
18 | | `distinct_on.prql` | Done | `.../distinct_on_group_sort_take` |
19 | | `distinct.prql` | Done | `.../distinct_group_take_one` |
20 | | `genre_counts.prql` | Done | `.../genre_counts` |
21 | | `group_all.prql` | Done | `.../group_all_join_aggregate` |
22 | | `group_sort_derive_select_join.prql` | Done | `.../group_sort_derive_select_join` |
23 | | `group_sort_filter_derive_select_join.prql` | Done | `.../group_sort_filter_derive_select_join` |
24 | | `group_sort_limit_take.prql` | Done | `.../group_sort_limit_take_join` |
25 | | `group_sort.prql` | Done | `.../group_sort_basic` |
26 | | `invoice_totals.prql` | Done | `.../invoice_totals_window_join` |
27 | | `loop_01.prql` | Done | `.../loop_recursive_numbers` |
28 | | `math_module.prql` | Done | `.../stdlib_math_module` |
29 | | `pipelines.prql` | Done | `.../pipelines_filters_sort_take` |
30 | | `read_csv.prql` | Done | `.../read_csv_sort` |
31 | | `set_ops_remove.prql` | Done | `.../set_ops_remove` |
32 | | `sort_2.prql` | Done | `.../sort_alias_filter_join` |
33 | | `sort_3.prql` | Done | `.../sort_alias_inline_sources` |
34 | | `sort.prql` | Done | `.../sort_with_join_alias` |
35 | | `switch.prql` | Done | `.../switch_case_display` |
36 | | `take.prql` | Done | `.../take_range_with_sort` |
37 | | `text_module.prql` | Done | `.../text_module_filters` |
38 | | `window.prql` | Done | `.../window_functions` |
39 | 
40 | Summary
41 | -------
42 | - **Done:** 31 / 31 fixtures (matching upstream snapshots).
43 | - **Partial:** None.
44 | - **TODO:** None for the snapshot suite.
45 | 


--------------------------------------------------------------------------------
/compile_error_test.go:
--------------------------------------------------------------------------------
  1 | package gophrql
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestCompileErrors(t *testing.T) {
  9 | 	t.Helper()
 10 | 
 11 | 	cases := []struct {
 12 | 		name         string
 13 | 		prql         string
 14 | 		wantContains string
 15 | 	}{
 16 | 		{
 17 | 			name: "unsupported_target",
 18 | 			prql: `
 19 | target sql.duckdb
 20 | from tracks
 21 | take 1
 22 | `,
 23 | 			wantContains: "unsupported target",
 24 | 		},
 25 | 		{
 26 | 			name:         "comment_only",
 27 | 			prql:         `# just a comment`,
 28 | 			wantContains: "No PRQL query entered",
 29 | 		},
 30 | 		{
 31 | 			name:         "empty_query",
 32 | 			prql:         ``,
 33 | 			wantContains: "No PRQL query entered",
 34 | 		},
 35 | 		{
 36 | 			name: "missing_from",
 37 | 			prql: `
 38 | let x = 5
 39 | let y = 10
 40 | `,
 41 | 			wantContains: "PRQL queries must begin with 'from'",
 42 | 		},
 43 | 		{
 44 | 			name: "declaration_only",
 45 | 			prql: `
 46 | let x = 5
 47 | let y = 10
 48 | let z = 15
 49 | `,
 50 | 			wantContains: "PRQL queries must begin with 'from'",
 51 | 		},
 52 | 		{
 53 | 			name: "too_many_args_to_function",
 54 | 			prql: `
 55 | let addadd = a b -> a + b
 56 | 
 57 | from x
 58 | derive y = (addadd 4 5 6)
 59 | `,
 60 | 			wantContains: "Too many arguments to function `addadd`",
 61 | 		},
 62 | 		{
 63 | 			name: "unknown_name",
 64 | 			prql: `
 65 | from x
 66 | select a
 67 | select b
 68 | `,
 69 | 			wantContains: "Unknown name `b`",
 70 | 		},
 71 | 		{
 72 | 			name: "bad_take_type",
 73 | 			prql: `
 74 | from employees
 75 | take 1.8
 76 | `,
 77 | 			wantContains: "`take` expected int or range",
 78 | 		},
 79 | 		{
 80 | 			name: "comment_then_empty",
 81 | 			prql: `
 82 | # header
 83 |  
 84 | `,
 85 | 			wantContains: "No PRQL query entered",
 86 | 		},
 87 | 		{
 88 | 			name: "date_to_text_literal_format",
 89 | 			prql: `
 90 | from invoices
 91 | select { date.to_text invoice_date billing_city }
 92 | `,
 93 | 			wantContains: "`date.to_text` only supports a string literal as format",
 94 | 		},
 95 | 		{
 96 | 			name: "date_to_text_unsupported_specifier",
 97 | 			prql: `
 98 | from invoices
 99 | select { (invoice_date | date.to_text "%_j") }
100 | `,
101 | 			wantContains: "PRQL doesn't support this format specifier",
102 | 		},
103 | 	}
104 | 
105 | 	for _, tc := range cases {
106 | 		tc := tc
107 | 		t.Run(tc.name, func(t *testing.T) {
108 | 			sql, err := Compile(tc.prql)
109 | 			if err == nil {
110 | 				t.Fatalf("expected error, got SQL: %s", sql)
111 | 			}
112 | 			if !strings.Contains(err.Error(), tc.wantContains) {
113 | 				t.Fatalf("error mismatch for %s:\nwant substring: %q\ngot: %v", tc.name, tc.wantContains, err)
114 | 			}
115 | 		})
116 | 	}
117 | }
118 | 


--------------------------------------------------------------------------------
/ast/ast.go:
--------------------------------------------------------------------------------
  1 | package ast
  2 | 
  3 | // Query represents a PRQL pipeline starting with a source.
  4 | type Query struct {
  5 | 	From     Source
  6 | 	Steps    []Step
  7 | 	Target   string // optional target (e.g., sql.generic)
  8 | 	Bindings []Binding
  9 | }
 10 | 
 11 | // Source represents a relation source.
 12 | type Source struct {
 13 | 	Table string
 14 | 	Rows  []InlineRow // inline rows when Table is empty
 15 | }
 16 | 
 17 | type InlineRow struct {
 18 | 	Fields []Field
 19 | }
 20 | 
 21 | type Field struct {
 22 | 	Name string
 23 | 	Expr Expr
 24 | }
 25 | 
 26 | // Binding represents a named sub-query defined via `let`.
 27 | type Binding struct {
 28 | 	Name  string
 29 | 	Query *Query
 30 | }
 31 | 
 32 | // Step is a pipeline stage.
 33 | type Step interface {
 34 | 	isStep()
 35 | }
 36 | 
 37 | type (
 38 | 	FilterStep struct {
 39 | 		Expr Expr
 40 | 	}
 41 | 	DeriveStep struct {
 42 | 		Assignments []Assignment
 43 | 	}
 44 | 	Assignment struct {
 45 | 		Name string
 46 | 		Expr Expr
 47 | 	}
 48 | 	SelectStep struct {
 49 | 		Items []SelectItem
 50 | 	}
 51 | 	SelectItem struct {
 52 | 		Expr Expr
 53 | 		As   string // optional alias
 54 | 	}
 55 | 	AggregateStep struct {
 56 | 		Items []AggregateItem
 57 | 	}
 58 | 	AggregateItem struct {
 59 | 		Func string
 60 | 		Arg  Expr
 61 | 		Args []Expr
 62 | 		As   string
 63 | 	}
 64 | 	TakeStep struct {
 65 | 		Limit  int
 66 | 		Offset int
 67 | 	}
 68 | 	AppendStep struct {
 69 | 		Query *Query
 70 | 	}
 71 | 	RemoveStep struct {
 72 | 		Query *Query
 73 | 	}
 74 | 	LoopStep struct {
 75 | 		Body []Step
 76 | 	}
 77 | 	JoinStep struct {
 78 | 		Side  string
 79 | 		Query *Query
 80 | 		On    Expr
 81 | 	}
 82 | 	DistinctStep struct{}
 83 | 	GroupStep    struct {
 84 | 		Key   Expr
 85 | 		Steps []Step
 86 | 	}
 87 | 	SortStep struct {
 88 | 		Items []SortItem
 89 | 	}
 90 | 	SortItem struct {
 91 | 		Expr Expr
 92 | 		Desc bool
 93 | 	}
 94 | )
 95 | 
 96 | func (*FilterStep) isStep()    {}
 97 | func (*DeriveStep) isStep()    {}
 98 | func (*SelectStep) isStep()    {}
 99 | func (*AggregateStep) isStep() {}
100 | func (*TakeStep) isStep()      {}
101 | func (*AppendStep) isStep()    {}
102 | func (*RemoveStep) isStep()    {}
103 | func (*LoopStep) isStep()      {}
104 | func (*JoinStep) isStep()      {}
105 | func (*GroupStep) isStep()     {}
106 | func (*SortStep) isStep()      {}
107 | func (*DistinctStep) isStep()  {}
108 | 
109 | // Expr is an expression node.
110 | type Expr interface {
111 | 	isExpr()
112 | }
113 | 
114 | type (
115 | 	Ident struct {
116 | 		Parts []string
117 | 	}
118 | 	Number struct {
119 | 		Value string
120 | 	}
121 | 	StringLit struct {
122 | 		Value string
123 | 	}
124 | 	Binary struct {
125 | 		Op    string
126 | 		Left  Expr
127 | 		Right Expr
128 | 	}
129 | 	Call struct {
130 | 		Func Expr
131 | 		Args []Expr
132 | 	}
133 | 	Pipe struct {
134 | 		Input Expr
135 | 		Func  Expr
136 | 		Args  []Expr
137 | 	}
138 | 	CaseExpr struct {
139 | 		Branches []CaseBranch
140 | 	}
141 | 	CaseBranch struct {
142 | 		Cond  Expr
143 | 		Value Expr
144 | 	}
145 | 	Tuple struct {
146 | 		Exprs []Expr
147 | 	}
148 | )
149 | 
150 | func (*Ident) isExpr()     {}
151 | func (*Number) isExpr()    {}
152 | func (*StringLit) isExpr() {}
153 | func (*Binary) isExpr()    {}
154 | func (*Call) isExpr()      {}
155 | func (*Pipe) isExpr()      {}
156 | func (*CaseExpr) isExpr()  {}
157 | func (*Tuple) isExpr()     {}
158 | 


--------------------------------------------------------------------------------
/dialect_test.go:
--------------------------------------------------------------------------------
  1 | package gophrql
  2 | 
  3 | import (
  4 | 	"testing"
  5 | )
  6 | 
  7 | func TestDialectCompilation(t *testing.T) {
  8 | 	cases := []struct {
  9 | 		name    string
 10 | 		target  string
 11 | 		prql    string
 12 | 		wantSQL string
 13 | 	}{
 14 | 		// --- Quoting Style Tests ---
 15 | 		{
 16 | 			name:   "mysql_backticks",
 17 | 			target: "sql.mysql",
 18 | 			prql: `
 19 | from employees
 20 | select {` + "`first-name`, `last-name`" + `}
 21 | `,
 22 | 			wantSQL: "SELECT `first-name` AS `first-name`, `last-name` AS `last-name` FROM employees",
 23 | 		},
 24 | 		{
 25 | 			name:   "postgres_quotes",
 26 | 			target: "sql.postgres",
 27 | 			prql: `
 28 | from employees
 29 | select {` + "`first-name`, `last-name`" + `}
 30 | `,
 31 | 			wantSQL: `SELECT "first-name" AS "first-name", "last-name" AS "last-name" FROM employees`,
 32 | 		},
 33 | 		{
 34 | 			name:   "snowflake_quoting",
 35 | 			target: "sql.snowflake",
 36 | 			prql: `
 37 | from employees
 38 | select { a, b, ` + "`col space`" + ` }
 39 | `,
 40 | 			wantSQL: `SELECT a, b, "col space" AS "col space" FROM employees`,
 41 | 		},
 42 | 
 43 | 		// --- MSSQL TOP vs LIMIT ---
 44 | 		{
 45 | 			name:   "mssql_top",
 46 | 			target: "sql.mssql",
 47 | 			prql: `
 48 | from employees
 49 | take 10
 50 | `,
 51 | 			wantSQL: `SELECT TOP 10   * FROM employees`,
 52 | 		},
 53 | 
 54 | 		// --- Date to Text (Dialect Specific Functions) ---
 55 | 		// Note: Requires implementing date.to_text in sqlgen
 56 | 		{
 57 | 			name:   "postgres_date_to_text",
 58 | 			target: "sql.postgres",
 59 | 			prql: `
 60 | from invoices
 61 | select { d = (invoice_date | date.to_text "DD/MM/YYYY") }
 62 | `,
 63 | 			wantSQL: `SELECT TO_CHAR(invoice_date, 'DD/MM/YYYY') AS d FROM invoices`,
 64 | 		},
 65 | 		{
 66 | 			name:   "mysql_date_to_text",
 67 | 			target: "sql.mysql",
 68 | 			prql: `
 69 | from invoices
 70 | select { d = (invoice_date | date.to_text "%d/%m/%Y") }
 71 | `,
 72 | 			wantSQL: `SELECT DATE_FORMAT(invoice_date, '%d/%m/%Y') AS d FROM invoices`,
 73 | 		},
 74 | 		{
 75 | 			name:   "duckdb_date_to_text",
 76 | 			target: "sql.duckdb",
 77 | 			prql: `
 78 | from invoices
 79 | select { d = (invoice_date | date.to_text "%d/%m/%Y") }
 80 | `,
 81 | 			wantSQL: `SELECT strftime(invoice_date, '%d/%m/%Y') AS d FROM invoices`,
 82 | 		},
 83 | 
 84 | 		// --- MSSQL Math Functions (partial) ---
 85 | 		{
 86 | 			name:   "mssql_math",
 87 | 			target: "sql.mssql",
 88 | 			prql: `
 89 | from employees
 90 | select {
 91 |   c = math.ceil salary,
 92 |   l = math.ln salary,
 93 |   p = math.pow salary 2
 94 | }
 95 | `,
 96 | 			wantSQL: `SELECT CEILING(salary) AS c, LOG(salary) AS l, POWER(salary, 2) AS p FROM employees`,
 97 | 		},
 98 | 
 99 | 		// --- Generic Fallback ---
100 | 		{
101 | 			name:   "generic_fallback",
102 | 			target: "sql.unknown_dialect",
103 | 			prql: `
104 | from employees
105 | select {` + "`first-name`" + `}
106 | `,
107 | 			wantSQL: `SELECT "first-name" AS "first-name" FROM employees`,
108 | 		},
109 | 	}
110 | 
111 | 	for _, tc := range cases {
112 | 		t.Run(tc.name, func(t *testing.T) {
113 | 			sql, err := Compile(tc.prql, WithTarget(tc.target))
114 | 			if err != nil {
115 | 				t.Fatalf("Compile error: %v", err)
116 | 			}
117 | 			if normalize(sql) != normalize(tc.wantSQL) {
118 | 				t.Errorf("SQL mismatch.\nWant: %s\nGot:  %s", tc.wantSQL, sql)
119 | 			}
120 | 		})
121 | 	}
122 | }
123 | 


--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
 1 | Agents Guide
 2 | ============
 3 | 
 4 | Context
 5 | -------
 6 | - Project: `github.com/maxpert/gophrql`
 7 | - Purpose: Reference Go implementation of the PRQL book; inspired by upstream behavior from https://github.com/PRQL/prql where practical.
 8 | - Status: Pre-implementation scaffold; public API is not stable.
 9 | 
10 | Expectations
11 | ------------
12 | - Fidelity first: match the PRQL book semantics before adding convenience APIs.
13 | - Parity notes: document any intentional differences from upstream `prql` in this file.
14 | - Tests: prefer table-driven tests that quote book examples; add regression tests for every fixed bug.
15 | - Errors: return informative, composable errors; avoid panics.
16 | - Dependencies: keep minimal; avoid cgo and heavy transitive trees.
17 | 
18 | Initial roadmap
19 | ---------------
20 | - Parser: parse PRQL into an AST aligned with the book chapters and upstream definitions.
21 | - Compiler: translate AST to SQL with deterministic output; support dialect abstractions early.
22 | - Diagnostics: helpful error messages that point at spans in the source.
23 | - Examples: executable snippets reflecting book examples (`examples/` folder).
24 | 
25 | Workflow tips
26 | -------------
27 | - Run `go test ./...` before pushing.
28 | - Keep public surface documented with Go doc comments.
29 | - Prefer small, reviewable commits with context in the messages.
30 | 
31 | 
32 | ## Engineering Principles to Enforce
33 | 
34 | | Principle | Description |
35 | |-----------|-------------|
36 | | **KISS** | Keep It Simple, Stupid. Reject unnecessary complexity. |
37 | | **DRY** | Don't Repeat Yourself. Flag duplicated logic. |
38 | | **YAGNI** | You Aren't Gonna Need It. Remove speculative code. |
39 | | **SOLID** | Single responsibility, Open/closed, Liskov substitution, Interface segregation, Dependency inversion. |
40 | | **SoC** | Separation of Concerns. Each package/function has one job. |
41 | | **LoD** | Law of Demeter. Minimize knowledge between components. |
42 | | **Fail Fast** | Validate early, return early. |
43 | | **Explicit > Implicit** | No magic. Clear intent. |
44 | 
45 | ---
46 | 
47 | ## Severity Classification
48 | 
49 | ### CRITICAL (must fix before merge)
50 | - Security vulnerabilities (injection, auth bypass, secrets in code)
51 | - Data corruption or loss risks
52 | - Race conditions causing crashes
53 | - Unrecovered panics in goroutines
54 | - Unbounded resource consumption (memory leaks, goroutine leaks)
55 | - Broken error handling that silences failures
56 | 
57 | ### HIGH (must fix, blocking)
58 | - Unchecked errors on I/O, network, or database operations
59 | - Missing context cancellation propagation
60 | - Improper mutex usage (deadlock potential, unlock not deferred)
61 | - Nil pointer dereference risks
62 | - SQL injection or command injection vectors
63 | - Missing input validation on public APIs
64 | 
65 | ### MEDIUM (should fix)
66 | - Non-idiomatic Go patterns
67 | - Stuttering names (`user.UserName` → `user.Name`)
68 | - Large functions (>50 lines) that should be split
69 | - Missing godoc on exported symbols
70 | - Inconsistent error wrapping
71 | - Magic numbers without constants
72 | - Test coverage gaps on critical paths
73 | - Suboptimal allocations in hot paths
74 | 
75 | ### LOW (nice to have)
76 | - Minor naming improvements
77 | - Comment typos or formatting
78 | - Import ordering
79 | - Redundant else blocks
80 | - Slightly verbose code that could be condensed
81 | - Missing test cases for edge cases
82 | 


--------------------------------------------------------------------------------
/internal/sqlgen/dialects.go:
--------------------------------------------------------------------------------
  1 | package sqlgen
  2 | 
  3 | import "strings"
  4 | 
  5 | // DialectMap holds the registered dialects.
  6 | var DialectMap = map[string]*Dialect{
  7 | 	"sql.generic":    DefaultDialect,
  8 | 	"sql.postgres":   PostgresDialect,
  9 | 	"sql.sqlite":     SQLiteDialect,
 10 | 	"sql.duckdb":     DuckDBDialect,
 11 | 	"sql.mysql":      MySQLDialect,
 12 | 	"sql.mssql":      MSSQLDialect,
 13 | 	"sql.clickhouse": ClickHouseDialect,
 14 | 	"sql.bigquery":   BigQueryDialect,
 15 | 	"sql.snowflake":  SnowflakeDialect,
 16 | }
 17 | 
 18 | // GetDialect returns the dialect for the given target, or nil if not found.
 19 | // It tries to match by exact string first, then by the dialect type.
 20 | func GetDialect(target string) *Dialect {
 21 | 	if d, ok := DialectMap[target]; ok {
 22 | 		return d
 23 | 	}
 24 | 	// Fallback/Aliases
 25 | 	switch strings.ToLower(target) {
 26 | 	case "postgres", "postgresql":
 27 | 		return PostgresDialect
 28 | 	case "sqlite":
 29 | 		return SQLiteDialect
 30 | 	case "duckdb":
 31 | 		return DuckDBDialect
 32 | 	case "mysql":
 33 | 		return MySQLDialect
 34 | 	case "mssql", "sqlserver":
 35 | 		return MSSQLDialect
 36 | 	case "clickhouse":
 37 | 		return ClickHouseDialect
 38 | 	case "bigquery":
 39 | 		return BigQueryDialect
 40 | 	case "snowflake":
 41 | 		return SnowflakeDialect
 42 | 	}
 43 | 	return nil
 44 | }
 45 | 
 46 | // PostgresDialect defines the dialect for PostgreSQL.
 47 | var PostgresDialect = &Dialect{
 48 | 	Type:           DialectPostgres,
 49 | 	IdentQuoteChar: '"',
 50 | 	UseLimitOffset: true,
 51 | 	Functions: map[string]string{
 52 | 		"date.to_text": "TO_CHAR(%[1]s, %[2]s)", // date, format
 53 | 	},
 54 | }
 55 | 
 56 | // SQLiteDialect defines the dialect for SQLite.
 57 | var SQLiteDialect = &Dialect{
 58 | 	Type:           DialectSQLite,
 59 | 	IdentQuoteChar: '"',
 60 | 	UseLimitOffset: true,
 61 | 	Functions:      map[string]string{},
 62 | }
 63 | 
 64 | // DuckDBDialect defines the dialect for DuckDB.
 65 | var DuckDBDialect = &Dialect{
 66 | 	Type:           DialectDuckDB,
 67 | 	IdentQuoteChar: '"',
 68 | 	UseLimitOffset: true,
 69 | 	Functions: map[string]string{
 70 | 		"std.read_csv": "read_csv_auto",
 71 | 		"date.to_text": "strftime(%[1]s, %[2]s)", // DuckDB: strftime(date, format)
 72 | 	},
 73 | }
 74 | 
 75 | // MySQLDialect defines the dialect for MySQL.
 76 | var MySQLDialect = &Dialect{
 77 | 	Type:           DialectMySQL,
 78 | 	IdentQuoteChar: '`',
 79 | 	UseLimitComma:  true, // LIMIT offset, count
 80 | 	Functions: map[string]string{
 81 | 		"date.to_text": "DATE_FORMAT(%[1]s, %[2]s)",
 82 | 	},
 83 | }
 84 | 
 85 | // MSSQLDialect defines the dialect for Microsoft SQL Server.
 86 | var MSSQLDialect = &Dialect{
 87 | 	Type:              DialectMSSQL,
 88 | 	IdentQuoteChar:    '"',
 89 | 	UseTopClause:      true, // TOP N
 90 | 	OffsetFetchSyntax: true, // OFFSET M ROWS FETCH NEXT N ROWS ONLY
 91 | 	Functions: map[string]string{
 92 | 		"math.ceil": "CEILING(%s)",
 93 | 		"math.ln":   "LOG(%s)",
 94 | 		"math.pow":  "POWER(%s, %s)",
 95 | 	},
 96 | }
 97 | 
 98 | // ClickHouseDialect defines the dialect for ClickHouse.
 99 | var ClickHouseDialect = &Dialect{
100 | 	Type:           DialectClickHouse,
101 | 	IdentQuoteChar: '"', // OR backticks, " is standard SQL
102 | 	UseLimitOffset: true,
103 | 	Functions: map[string]string{
104 | 		"date.to_text": "formatDateTimeInJodaSyntax(%[1]s, %[2]s)",
105 | 	},
106 | }
107 | 
108 | // BigQueryDialect defines the dialect for BigQuery.
109 | var BigQueryDialect = &Dialect{
110 | 	Type:           DialectBigQuery,
111 | 	IdentQuoteChar: '`',
112 | 	UseLimitOffset: true,
113 | 	Functions:      map[string]string{},
114 | }
115 | 
116 | // SnowflakeDialect defines the dialect for Snowflake.
117 | var SnowflakeDialect = &Dialect{
118 | 	Type:           DialectSnowflake,
119 | 	IdentQuoteChar: '"',
120 | 	UseLimitOffset: true,
121 | 	Functions:      map[string]string{},
122 | }
123 | 


--------------------------------------------------------------------------------
/examples/mongo/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 
  7 | 	"github.com/maxpert/gophrql"
  8 | 	"github.com/maxpert/gophrql/ast"
  9 | )
 10 | 
 11 | func main() {
 12 | 	prql := `
 13 |         from users
 14 |         filter age > 21
 15 |         filter country == "US"
 16 |         derive { full_name = f"{first_name} {last_name}" }
 17 |         select { full_name, email, age }
 18 |         sort { -age }
 19 |         take 10
 20 |     `
 21 | 
 22 | 	// Convert PRQL to an AST
 23 | 	query, err := gophrql.Parse(prql)
 24 | 	if err != nil {
 25 | 		panic(err)
 26 | 	}
 27 | 
 28 | 	// Convert AST to MongoDB aggregation pipeline string
 29 | 	mongo := convertToMongo(query)
 30 | 	fmt.Println(mongo)
 31 | 	// Expected output (formatted for readability):
 32 | 	// db.users.aggregate([
 33 | 	//   { $match: { age: { $gt: 21 }, country: "US" } },
 34 | 	//   { $project: { name: 1, email: 1, age: 1, _id: 0 } },
 35 | 	//   { $sort: { age: -1 } },
 36 | 	//   { $limit: 10 }
 37 | 	// ])
 38 | 
 39 | }
 40 | 
 41 | // ConvertToMongo builds a MongoDB aggregation pipeline from a PRQL AST query.
 42 | func convertToMongo(q *ast.Query) string {
 43 | 	var stages []string
 44 | 
 45 | 	// $match stage – collect all filter expressions
 46 | 	matchFilters := []string{}
 47 | 	for _, step := range q.Steps {
 48 | 		if f, ok := step.(*ast.FilterStep); ok {
 49 | 			matchFilters = append(matchFilters, exprToMongo(f.Expr))
 50 | 		}
 51 | 	}
 52 | 	if len(matchFilters) > 0 {
 53 | 		// combine filters with logical AND
 54 | 		combined := strings.Join(matchFilters, ", ")
 55 | 		stages = append(stages, fmt.Sprintf("{ $match: { %s } }", combined))
 56 | 	}
 57 | 
 58 | 	// $project stage – handle select steps
 59 | 	for _, step := range q.Steps {
 60 | 		if s, ok := step.(*ast.SelectStep); ok {
 61 | 			proj := []string{}
 62 | 			for _, item := range s.Items {
 63 | 				alias := item.As
 64 | 				if alias == "" {
 65 | 					// Use the expression's string representation as field name
 66 | 					alias = exprToMongo(item.Expr)
 67 | 				}
 68 | 				proj = append(proj, fmt.Sprintf("%s: 1", alias))
 69 | 			}
 70 | 			// Exclude _id by default for cleaner output
 71 | 			proj = append(proj, "_id: 0")
 72 | 			stages = append(stages, fmt.Sprintf("{ $project: { %s } }", strings.Join(proj, ", ")))
 73 | 		}
 74 | 	}
 75 | 
 76 | 	// $sort stage – handle sort steps
 77 | 	for _, step := range q.Steps {
 78 | 		if s, ok := step.(*ast.SortStep); ok {
 79 | 			sortFields := []string{}
 80 | 			for _, item := range s.Items {
 81 | 				direction := 1
 82 | 				if item.Desc {
 83 | 					direction = -1
 84 | 				}
 85 | 				// Assume the expression is a simple identifier
 86 | 				sortFields = append(sortFields, fmt.Sprintf("%s: %d", exprToMongo(item.Expr), direction))
 87 | 			}
 88 | 			if len(sortFields) > 0 {
 89 | 				stages = append(stages, fmt.Sprintf("{ $sort: { %s } }", strings.Join(sortFields, ", ")))
 90 | 			}
 91 | 		}
 92 | 	}
 93 | 
 94 | 	// $limit and $skip – handle take steps (limit/offset)
 95 | 	for _, step := range q.Steps {
 96 | 		if t, ok := step.(*ast.TakeStep); ok {
 97 | 			if t.Offset > 0 {
 98 | 				stages = append(stages, fmt.Sprintf("{ $skip: %d }", t.Offset))
 99 | 			}
100 | 			if t.Limit > 0 {
101 | 				stages = append(stages, fmt.Sprintf("{ $limit: %d }", t.Limit))
102 | 			}
103 | 		}
104 | 	}
105 | 
106 | 	// Build final aggregation string
107 | 	pipeline := strings.Join(stages, ", ")
108 | 	return fmt.Sprintf("db.%s.aggregate([%s])", q.From.Table, pipeline)
109 | }
110 | 
111 | // exprToMongo converts a simple AST expression to a MongoDB query fragment.
112 | // This is a minimal implementation supporting identifiers, binary ops, and literals.
113 | func exprToMongo(e ast.Expr) string {
114 | 	switch v := e.(type) {
115 | 	case *ast.Ident:
116 | 		// Identifier becomes a field reference prefixed with $.
117 | 		if len(v.Parts) == 1 {
118 | 			return fmt.Sprintf("$%s", v.Parts[0])
119 | 		}
120 | 		// For qualified identifiers (e.g., table.column) use the last part.
121 | 		return fmt.Sprintf("$%s", v.Parts[len(v.Parts)-1])
122 | 	case *ast.Number:
123 | 		return v.Value
124 | 	case *ast.StringLit:
125 | 		return fmt.Sprintf("\"%s\"", v.Value)
126 | 	case *ast.Binary:
127 | 		left := exprToMongo(v.Left)
128 | 		right := exprToMongo(v.Right)
129 | 		switch v.Op {
130 | 		case ">":
131 | 			return fmt.Sprintf("%s: { $gt: %s }", left, right)
132 | 		case "<":
133 | 			return fmt.Sprintf("%s: { $lt: %s }", left, right)
134 | 		case "==":
135 | 			return fmt.Sprintf("%s: %s", left, right)
136 | 		case "!=":
137 | 			return fmt.Sprintf("%s: { $ne: %s }", left, right)
138 | 		case "&&":
139 | 			return fmt.Sprintf("$and: [%s, %s]", left, right)
140 | 		case "||":
141 | 			return fmt.Sprintf("$or: [%s, %s]", left, right)
142 | 		default:
143 | 			return ""
144 | 		}
145 | 	default:
146 | 		return ""
147 | 	}
148 | }
149 | 


--------------------------------------------------------------------------------
/docs/DIALECT_PLAN.md:
--------------------------------------------------------------------------------
 1 | PRQL Dialect Support Plan
 2 | =========================
 3 | 
 4 | Goals
 5 | -----
 6 | - Provide a clear roadmap for matching the official PRQL compiler’s SQL dialect coverage.
 7 | - Enumerate required features, blockers, and validation artifacts (snapshots/tests) per dialect.
 8 | - Keep the effort incremental so snapshots stay green after each dialect landing.
 9 | 
10 | Current State
11 | -------------
12 | - The Go compiler currently emits a single “generic” SQL flavor tuned to the integration snapshots (≈ PostgreSQL syntax).
13 | - No `target sql.<dialect>` selection hook is exposed to callers.
14 | - Dialect-specific constructs (identifier quoting, function names, LIMIT/OFFSET semantics, date functions, joins, window quirks) are hard-coded for the generic flavor.
15 | 
16 | Priority Dialects
17 | -----------------
18 | 
19 | | Dialect         | Status   | Required Work                                                                                     | References                                             |
20 | |-----------------|----------|----------------------------------------------------------------------------------------------------|--------------------------------------------------------|
21 | | `sql.generic`   | ✅ (base) | Keep as compatibility fallback for snapshots.                                                     | Existing `compile_test.go` and upstream `compile__*.snap` |
22 | | `sql.postgres`  | 🟡        | Add target flag, Postgres-specific casting (e.g., `::type`), JSON ops, `ILIKE`, quoted identifiers.| `prqlc` `Target::Postgres`, `snapshots/...postgres...` |
23 | | `sql.sqlite`    | 🟡        | Handle lack of `WITH RECURSIVE` in some scenarios, `strftime` formats, limited window support.     | `snapshots/...sqlite...`, book’s SQLite notes          |
24 | | `sql.duckdb`    | 🟡        | Support `read_csv_auto`, `MAP` types, `LIMIT` ordering semantics.                                 | DuckDB tutorial + `prqlc` target                       |
25 | | `sql.mysql`     | 🟡        | Switch to backtick quoting, `LIMIT offset, count`, no `WITH RECURSIVE` (fallback to temp tables). | MySQL target snapshots                                 |
26 | | `sql.mssql`     | 🟡        | `TOP`, `OFFSET FETCH`, string concatenation `+`, `DATEPART`.                                       | `# mssql:test` fixtures already in PRQL repo           |
27 | | `sql.bigquery`  | ⬜        | Backtick quoting, `STRUCT`, `UNNEST`, positional parameters.                                      | Official PRQL backlog                                  |
28 | | `sql.clickhouse`| ⬜        | `ARRAY JOIN`, limited `WITH`, distinct ordering semantics.                                        | Upstream `clickhouse` snapshots                        |
29 | 
30 | Legend: ✅ done, 🟡 planned (near-term), ⬜ later.
31 | 
32 | Implementation Phases
33 | ---------------------
34 | 
35 | 1. **Target Selection Plumbed**
36 |    - Parse optional `target` statement, expose `Compile(prql string, opts ...Option)`.
37 |    - Default to `sql.generic` for backward compatibility.
38 | 
39 | 2. **Configuration Plumbing**
40 |    - Represent dialect capabilities in a struct (identifier quoting strategy, function remaps, limit syntax, boolean literal style, etc.).
41 |    - Refactor `sqlgen` helpers to depend on that configuration instead of hard-coded literals.
42 | 
43 | 3. **Dialect-by-Dialect Enablement**
44 |    - Postgres: mostly alias for generic but add `ILIKE`, `::type`, JSON operators.
45 |    - SQLite: ensure `strftime`, `LIMIT` semantics, disable unsupported window constructs by lowering them.
46 |    - DuckDB/MySQL/MSSQL: each requires quoting/cast adjustments and new intrinsic mappings.
47 |    - For each dialect, import upstream `integration__queries__compile__*` snapshots and add new Go tests (e.g., `TestCompileSnapshotsSQLite`).
48 | 
49 | 4. **Validation & Tooling**
50 |    - Expand `docs/SNAPSHOTS_PLAN.md` with per-dialect coverage checkboxes.
51 |    - Provide a helper script (`cmd/snapdiff`) to re-run snapshots against upstream PRQL for regression checks.
52 | 
53 | 5. **Future Dialects**
54 |    - Once core SQL engines are covered, evaluate additional targets (BigQuery, Snowflake, ClickHouse) using the same pattern.
55 | 
56 | Risk & Mitigation
57 | -----------------
58 | - **Config Drift:** Keep dialect configs in a single package with unit tests ensuring defaults match the upstream Rust compiler.
59 | - **Snapshot Explosion:** Gate new dialect tests behind build tags or sub-tests to keep runtime manageable.
60 | - **Feature Gaps:** Document unsupported PRQL features per dialect in the README until parity is reached.
61 | 
62 | Next Actions
63 | ------------
64 | 1. Define a `sqlgen.Dialect` struct + registry.
65 | 2. Add `CompileOptions{Target string}` plumbing.
66 | 3. Port Postgres-specific snapshot tests to verify plumbing.
67 | 4. Iterate through the priority table above, updating docs/tests per dialect.
68 | 


--------------------------------------------------------------------------------
/integration_test.go:
--------------------------------------------------------------------------------
  1 | package gophrql_test
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"strings"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/maxpert/gophrql"
 12 | )
 13 | 
 14 | func TestIntegrationSnapshots(t *testing.T) {
 15 | 	// Locate the snapshots directory
 16 | 	// expected path: tmp/prql/prqlc/prqlc/tests/integration/snapshots
 17 | 	root := "tmp/prql/prqlc/prqlc/tests/integration/snapshots"
 18 | 	if _, err := os.Stat(root); os.IsNotExist(err) {
 19 | 		t.Skipf("Snapshots directory not found at %s; skipping integration tests", root)
 20 | 	}
 21 | 
 22 | 	files, err := filepath.Glob(filepath.Join(root, "integration__queries__compile__*.snap"))
 23 | 	if err != nil {
 24 | 		t.Fatalf("Failed to glob snapshots: %v", err)
 25 | 	}
 26 | 
 27 | 	for _, snapPath := range files {
 28 | 		testName := filepath.Base(snapPath)
 29 | 		t.Run(testName, func(t *testing.T) {
 30 | 			prql, expectedSQL, err := parseSnapshotAndInput(snapPath)
 31 | 			if err != nil {
 32 | 				t.Fatalf("Failed to parse snapshot %s: %v", snapPath, err)
 33 | 			}
 34 | 
 35 | 			// We only target generic dialect for now (default); skip cases that explicitly opt out.
 36 | 			if strings.Contains(prql, "# generic:skip") {
 37 | 				t.Skip("Upstream query skips the generic dialect")
 38 | 			}
 39 | 
 40 | 			gotSQL, err := gophrql.Compile(prql)
 41 | 			if err != nil {
 42 | 				t.Fatalf("Compile failed: %v", err)
 43 | 			}
 44 | 
 45 | 			if normalize(gotSQL) != normalize(expectedSQL) {
 46 | 				t.Errorf("SQL Mismatch.\nPRQL:\n%s\n\nExpected:\n%s\n\nGot:\n%s", prql, expectedSQL, gotSQL)
 47 | 			}
 48 | 		})
 49 | 	}
 50 | }
 51 | 
 52 | // TestIntegrationCompileSnapshots ensures each query read by `sql.rs` has an upstream SQL snapshot.
 53 | // The PRQL file list under tmp/prql/.../queries comes from `prqlc/tests/integration/sql.rs`, so if
 54 | // every PRQL query there has an `integration__queries__compile__*.snap` file, we know the Go integration
 55 | // tests exercise the same set of cases.
 56 | func TestIntegrationCompileSnapshots(t *testing.T) {
 57 | 	queryDir := "tmp/prql/prqlc/prqlc/tests/integration/queries"
 58 | 	snapshotsDir := "tmp/prql/prqlc/prqlc/tests/integration/snapshots"
 59 | 
 60 | 	if _, err := os.Stat(queryDir); os.IsNotExist(err) {
 61 | 		t.Skipf("Queries directory not found at %s; skipping integration coverage check", queryDir)
 62 | 	}
 63 | 
 64 | 	queryFiles, err := filepath.Glob(filepath.Join(queryDir, "*.prql"))
 65 | 	if err != nil {
 66 | 		t.Fatalf("Failed to list queries: %v", err)
 67 | 	}
 68 | 
 69 | 	var missing []string
 70 | 	for _, queryPath := range queryFiles {
 71 | 		name := strings.TrimSuffix(filepath.Base(queryPath), ".prql")
 72 | 		snapshotPath := filepath.Join(snapshotsDir, "integration__queries__compile__"+name+".snap")
 73 | 
 74 | 		queryContent, err := os.ReadFile(queryPath)
 75 | 		if err != nil {
 76 | 			t.Fatalf("Failed to read query %s: %v", queryPath, err)
 77 | 		}
 78 | 		if strings.Contains(string(queryContent), "# generic:skip") {
 79 | 			continue
 80 | 		}
 81 | 
 82 | 		if _, err := os.Stat(snapshotPath); os.IsNotExist(err) {
 83 | 			missing = append(missing, name)
 84 | 		} else if err != nil {
 85 | 			t.Fatalf("Failed to stat snapshot %s: %v", snapshotPath, err)
 86 | 		}
 87 | 	}
 88 | 
 89 | 	if len(missing) > 0 {
 90 | 		t.Fatalf("Missing compile snapshots for queries: %s", strings.Join(missing, ", "))
 91 | 	}
 92 | }
 93 | 
 94 | func parseSnapshotAndInput(snapPath string) (string, string, error) {
 95 | 	f, err := os.Open(snapPath)
 96 | 	if err != nil {
 97 | 		return "", "", err
 98 | 	}
 99 | 	defer f.Close()
100 | 
101 | 	scanner := bufio.NewScanner(f)
102 | 
103 | 	// Format:
104 | 	// ---
105 | 	// source: ...
106 | 	// expression: ...
107 | 	// input_file: path/to/file.prql
108 | 	// ---
109 | 	// SQL CONTENT...
110 | 
111 | 	var inputFileRel string
112 | 
113 | 	// minimal YAML-like parsing for the header
114 | 	inHeader := false
115 | 	dashCount := 0
116 | 	var sqlBody strings.Builder
117 | 
118 | 	for scanner.Scan() {
119 | 		line := scanner.Text()
120 | 		if line == "---" {
121 | 			dashCount++
122 | 			if dashCount == 1 {
123 | 				inHeader = true
124 | 				continue
125 | 			}
126 | 			if dashCount == 2 {
127 | 				inHeader = false
128 | 				continue
129 | 			}
130 | 		}
131 | 
132 | 		if inHeader {
133 | 			trimmed := strings.TrimSpace(line)
134 | 			if strings.HasPrefix(trimmed, "input_file:") {
135 | 				// input_file: prqlc/prqlc/tests/integration/queries/foo.prql
136 | 				parts := strings.SplitN(trimmed, ":", 2)
137 | 				if len(parts) == 2 {
138 | 					inputFileRel = strings.TrimSpace(parts[1])
139 | 				}
140 | 			}
141 | 		} else {
142 | 			// Body
143 | 			if dashCount >= 2 {
144 | 				sqlBody.WriteString(line)
145 | 				sqlBody.WriteString("\n")
146 | 			}
147 | 		}
148 | 	}
149 | 
150 | 	if inputFileRel == "" {
151 | 		return "", "", fmt.Errorf("input_file not found in snapshot header")
152 | 	}
153 | 
154 | 	// Resolve input file path
155 | 	// Snapshot path: tmp/prql/prqlc/prqlc/tests/integration/snapshots/foo.snap
156 | 	// inputFileRel: prqlc/prqlc/tests/integration/queries/foo.prql
157 | 	// effectively, we need to map the relative path to our workspace.
158 | 	// The repo root in tmp matches `prqlc` in input_file?
159 | 	// tmp/prql structure:
160 | 	// tmp/prql/prqlc/prqlc/tests...
161 | 	// input_file starts with `prqlc/prqlc...`?
162 | 	// Let's verify via the file content observed earlier:
163 | 	// "input_file: prqlc/prqlc/tests/integration/queries/constants_only.prql"
164 | 	// Our root is `tmp/prql`.
165 | 	// So `tmp/prql/` + `prqlc/prqlc/tests...`?
166 | 	// Let's check if `tmp/prql` contains `prqlc` directory? Yes.
167 | 
168 | 	// So we construct path: "tmp/prql" + "/" + inputFileRel ?
169 | 	// inputFileRel 'prqlc/prqlc/...' matches exactly the struct under tmp/prql?
170 | 	// Let's check `tmp/prql` listing again.
171 | 	// List dir `tmp/prql`:
172 | 	// .git, Cargo.toml, ..., prqlc (dir)
173 | 	// So `tmp/prql/prqlc` exists.
174 | 	// inputFileRel starts with `prqlc/prqlc`.
175 | 	// Does `tmp/prql/prqlc` contain `prqlc`?
176 | 	// List `tmp/prql/prqlc`:
177 | 	// README.md, bindings, packages, prqlc (dir), ...
178 | 	// Yes! `tmp/prql/prqlc` contains `prqlc` subdir.
179 | 	// So yes, `tmp/prql` + "/" + inputFileRel should be the path.
180 | 
181 | 	inputPath := filepath.Join("tmp/prql", inputFileRel)
182 | 	prqlBytes, err := os.ReadFile(inputPath)
183 | 	if err != nil {
184 | 		return "", "", fmt.Errorf("failed to read input PRQL %s: %v", inputPath, err)
185 | 	}
186 | 
187 | 	return string(prqlBytes), strings.TrimSpace(sqlBody.String()), nil
188 | }
189 | 
190 | func normalize(s string) string {
191 | 	return strings.Join(strings.Fields(strings.TrimSpace(s)), "")
192 | }
193 | 


--------------------------------------------------------------------------------
/internal/parser/lexer.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 	"unicode"
  7 | )
  8 | 
  9 | type TokenType string
 10 | 
 11 | const (
 12 | 	ILLEGAL TokenType = "ILLEGAL"
 13 | 	EOF               = "EOF"
 14 | 	IDENT             = "IDENT"
 15 | 	NUMBER            = "NUMBER"
 16 | 	STRING            = "STRING"
 17 | 	FSTRING           = "FSTRING"
 18 | 	NEWLINE           = "NEWLINE"
 19 | 
 20 | 	LPAREN   = "("
 21 | 	RPAREN   = ")"
 22 | 	LBRACE   = "{"
 23 | 	RBRACE   = "}"
 24 | 	LBRACKET = "["
 25 | 	RBRACKET = "]"
 26 | 	COMMA    = ","
 27 | 	EQUAL    = "="
 28 | 	DOT      = "."
 29 | 	BACKTICK = "`"
 30 | 	PIPE     = "|"
 31 | 	STAR     = "*"
 32 | 	PLUS     = "+"
 33 | 	MINUS    = "-"
 34 | 	SLASH    = "/"
 35 | 	FLOORDIV = "//"
 36 | 	CARET    = "^"
 37 | 	POW      = "**"
 38 | 	REGEXEQ  = "~="
 39 | 	RANGE    = ".."
 40 | 	NULLCOAL = "??"
 41 | 	OROR     = "||"
 42 | 	ARROW    = "=>"
 43 | 	EQ       = "=="
 44 | 	NEQ      = "!="
 45 | 	PERCENT  = "%"
 46 | 	LT       = "<"
 47 | 	GT       = ">"
 48 | 	LTE      = "<="
 49 | 	GTE      = ">="
 50 | )
 51 | 
 52 | type Token struct {
 53 | 	Typ TokenType
 54 | 	Lit string
 55 | }
 56 | 
 57 | func Lex(input string) ([]Token, error) {
 58 | 	var tokens []Token
 59 | 	i := 0
 60 | 
 61 | 	for i < len(input) {
 62 | 		ch := input[i]
 63 | 
 64 | 		// Newlines become tokens to simplify statement parsing.
 65 | 		if ch == '\n' {
 66 | 			tokens = append(tokens, Token{Typ: NEWLINE, Lit: "\n"})
 67 | 			i++
 68 | 			continue
 69 | 		}
 70 | 
 71 | 		// Skip whitespace.
 72 | 		if unicode.IsSpace(rune(ch)) {
 73 | 			i++
 74 | 			continue
 75 | 		}
 76 | 
 77 | 		// Comments: lines starting with # until newline.
 78 | 		if ch == '#' {
 79 | 			for i < len(input) && input[i] != '\n' {
 80 | 				i++
 81 | 			}
 82 | 			continue
 83 | 		}
 84 | 
 85 | 		// f-strings: f'...' or f"..."
 86 | 		if (ch == 'f' || ch == 'F') && i+1 < len(input) && (input[i+1] == '\'' || input[i+1] == '"') {
 87 | 			quote := input[i+1]
 88 | 			allowEscape := quote == '"'
 89 | 			i += 2
 90 | 			var sb strings.Builder
 91 | 			for i < len(input) {
 92 | 				if input[i] == quote {
 93 | 					break
 94 | 				}
 95 | 				if allowEscape && input[i] == '\\' && i+1 < len(input) {
 96 | 					next := input[i+1]
 97 | 					if next == quote || next == '\\' {
 98 | 						sb.WriteByte(next)
 99 | 						i += 2
100 | 						continue
101 | 					}
102 | 				}
103 | 				sb.WriteByte(input[i])
104 | 				i++
105 | 			}
106 | 			if i >= len(input) {
107 | 				return nil, fmt.Errorf("unterminated string literal")
108 | 			}
109 | 			i++
110 | 			tokens = append(tokens, Token{Typ: FSTRING, Lit: sb.String()})
111 | 			continue
112 | 		}
113 | 
114 | 		// Strings.
115 | 		if ch == '\'' {
116 | 			start := i + 1
117 | 			i++
118 | 			for i < len(input) && input[i] != '\'' {
119 | 				i++
120 | 			}
121 | 			if i >= len(input) {
122 | 				return nil, fmt.Errorf("unterminated string literal")
123 | 			}
124 | 			lit := input[start:i]
125 | 			i++ // closing '
126 | 			tokens = append(tokens, Token{Typ: STRING, Lit: lit})
127 | 			continue
128 | 		}
129 | 
130 | 		// Backtick identifiers.
131 | 		if ch == '`' {
132 | 			start := i + 1
133 | 			i++
134 | 			for i < len(input) && input[i] != '`' {
135 | 				i++
136 | 			}
137 | 			if i >= len(input) {
138 | 				return nil, fmt.Errorf("unterminated backtick identifier")
139 | 			}
140 | 			lit := input[start:i]
141 | 			i++
142 | 			tokens = append(tokens, Token{Typ: IDENT, Lit: lit})
143 | 			continue
144 | 		}
145 | 
146 | 		// Numbers (integers and floats).
147 | 		if unicode.IsDigit(rune(ch)) {
148 | 			start := i
149 | 			i++
150 | 			for i < len(input) {
151 | 				if unicode.IsDigit(rune(input[i])) {
152 | 					i++
153 | 					continue
154 | 				}
155 | 				if input[i] == '.' && (i+1 < len(input) && input[i+1] == '.') {
156 | 					break
157 | 				}
158 | 				if input[i] == '.' {
159 | 					i++
160 | 					continue
161 | 				}
162 | 				break
163 | 			}
164 | 			tokens = append(tokens, Token{Typ: NUMBER, Lit: input[start:i]})
165 | 			continue
166 | 		}
167 | 
168 | 		// Double-quoted strings with simple escape handling for \" and \\.
169 | 		if ch == '"' {
170 | 			i++
171 | 			var sb strings.Builder
172 | 			for i < len(input) {
173 | 				if input[i] == '"' {
174 | 					break
175 | 				}
176 | 				if input[i] == '\\' && i+1 < len(input) {
177 | 					next := input[i+1]
178 | 					if next == '"' || next == '\\' {
179 | 						sb.WriteByte(next)
180 | 						i += 2
181 | 						continue
182 | 					}
183 | 				}
184 | 				sb.WriteByte(input[i])
185 | 				i++
186 | 			}
187 | 			if i >= len(input) {
188 | 				return nil, fmt.Errorf("unterminated string literal")
189 | 			}
190 | 			i++
191 | 			tokens = append(tokens, Token{Typ: STRING, Lit: sb.String()})
192 | 			continue
193 | 		}
194 | 
195 | 		// Identifiers (including module path with dots).
196 | 		if isIdentStart(rune(ch)) {
197 | 			start := i
198 | 			i++
199 | 			for i < len(input) && isIdentPart(rune(input[i])) {
200 | 				if input[i] == '.' && i+1 < len(input) && input[i+1] == '*' {
201 | 					break
202 | 				}
203 | 				i++
204 | 			}
205 | 			tokens = append(tokens, Token{Typ: IDENT, Lit: input[start:i]})
206 | 			continue
207 | 		}
208 | 
209 | 		// Multi-char operators.
210 | 		if strings.HasPrefix(input[i:], "**") {
211 | 			tokens = append(tokens, Token{Typ: POW, Lit: "**"})
212 | 			i += 2
213 | 			continue
214 | 		}
215 | 		if strings.HasPrefix(input[i:], "//") {
216 | 			tokens = append(tokens, Token{Typ: FLOORDIV, Lit: "//"})
217 | 			i += 2
218 | 			continue
219 | 		}
220 | 		if strings.HasPrefix(input[i:], "~=") {
221 | 			tokens = append(tokens, Token{Typ: REGEXEQ, Lit: "~="})
222 | 			i += 2
223 | 			continue
224 | 		}
225 | 		if strings.HasPrefix(input[i:], "??") {
226 | 			tokens = append(tokens, Token{Typ: NULLCOAL, Lit: "??"})
227 | 			i += 2
228 | 			continue
229 | 		}
230 | 		if strings.HasPrefix(input[i:], "||") {
231 | 			tokens = append(tokens, Token{Typ: OROR, Lit: "||"})
232 | 			i += 2
233 | 			continue
234 | 		}
235 | 		if strings.HasPrefix(input[i:], "==") {
236 | 			tokens = append(tokens, Token{Typ: EQ, Lit: "=="})
237 | 			i += 2
238 | 			continue
239 | 		}
240 | 		if strings.HasPrefix(input[i:], "!=") {
241 | 			tokens = append(tokens, Token{Typ: NEQ, Lit: "!="})
242 | 			i += 2
243 | 			continue
244 | 		}
245 | 		if strings.HasPrefix(input[i:], "<=") {
246 | 			tokens = append(tokens, Token{Typ: LTE, Lit: "<="})
247 | 			i += 2
248 | 			continue
249 | 		}
250 | 		if strings.HasPrefix(input[i:], ">=") {
251 | 			tokens = append(tokens, Token{Typ: GTE, Lit: ">="})
252 | 			i += 2
253 | 			continue
254 | 		}
255 | 		if strings.HasPrefix(input[i:], "..") {
256 | 			tokens = append(tokens, Token{Typ: RANGE, Lit: ".."})
257 | 			i += 2
258 | 			continue
259 | 		}
260 | 		if strings.HasPrefix(input[i:], "=>") {
261 | 			tokens = append(tokens, Token{Typ: ARROW, Lit: "=>"})
262 | 			i += 2
263 | 			continue
264 | 		}
265 | 
266 | 		// Single-char tokens.
267 | 		switch ch {
268 | 		case '(':
269 | 			tokens = append(tokens, Token{Typ: LPAREN, Lit: "("})
270 | 		case ')':
271 | 			tokens = append(tokens, Token{Typ: RPAREN, Lit: ")"})
272 | 		case '{':
273 | 			tokens = append(tokens, Token{Typ: LBRACE, Lit: "{"})
274 | 		case '}':
275 | 			tokens = append(tokens, Token{Typ: RBRACE, Lit: "}"})
276 | 		case '[':
277 | 			tokens = append(tokens, Token{Typ: LBRACKET, Lit: "["})
278 | 		case ']':
279 | 			tokens = append(tokens, Token{Typ: RBRACKET, Lit: "]"})
280 | 		case ',':
281 | 			tokens = append(tokens, Token{Typ: COMMA, Lit: ","})
282 | 		case '=':
283 | 			tokens = append(tokens, Token{Typ: EQUAL, Lit: "="})
284 | 		case '.':
285 | 			tokens = append(tokens, Token{Typ: DOT, Lit: "."})
286 | 		case '|':
287 | 			tokens = append(tokens, Token{Typ: PIPE, Lit: "|"})
288 | 		case '*':
289 | 			tokens = append(tokens, Token{Typ: STAR, Lit: "*"})
290 | 		case '+':
291 | 			tokens = append(tokens, Token{Typ: PLUS, Lit: "+"})
292 | 		case '-':
293 | 			tokens = append(tokens, Token{Typ: MINUS, Lit: "-"})
294 | 		case '/':
295 | 			tokens = append(tokens, Token{Typ: SLASH, Lit: "/"})
296 | 		case '^':
297 | 			tokens = append(tokens, Token{Typ: CARET, Lit: "^"})
298 | 		case '%':
299 | 			tokens = append(tokens, Token{Typ: PERCENT, Lit: "%"})
300 | 		case '<':
301 | 			tokens = append(tokens, Token{Typ: LT, Lit: "<"})
302 | 		case '>':
303 | 			tokens = append(tokens, Token{Typ: GT, Lit: ">"})
304 | 		default:
305 | 			return nil, fmt.Errorf("unexpected character %q", ch)
306 | 		}
307 | 		i++
308 | 	}
309 | 
310 | 	tokens = append(tokens, Token{Typ: EOF, Lit: ""})
311 | 	return tokens, nil
312 | }
313 | 
314 | func isIdentStart(r rune) bool {
315 | 	return unicode.IsLetter(r) || r == '_'
316 | }
317 | 
318 | func isIdentPart(r rune) bool {
319 | 	return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' || r == '.' || r == ':'
320 | }
321 | 


--------------------------------------------------------------------------------
/gophrql.go:
--------------------------------------------------------------------------------
  1 | package gophrql
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"strings"
  7 | 
  8 | 	"github.com/maxpert/gophrql/ast"
  9 | 	"github.com/maxpert/gophrql/internal/parser"
 10 | 	"github.com/maxpert/gophrql/internal/sqlgen"
 11 | )
 12 | 
 13 | // ErrNotImplemented indicates a requested feature has not been built yet.
 14 | var ErrNotImplemented = errors.New("gophrql: compiler not implemented")
 15 | 
 16 | // CompileOptions defines functional options for the compiler.
 17 | type CompileOptions struct {
 18 | 	Target  string
 19 | 	Dialect *sqlgen.Dialect
 20 | }
 21 | 
 22 | // Option configures the compiler.
 23 | type Option func(*CompileOptions)
 24 | 
 25 | // WithTarget sets the target dialect by name (e.g. "sql.postgres").
 26 | func WithTarget(target string) Option {
 27 | 	return func(o *CompileOptions) {
 28 | 		o.Target = target
 29 | 	}
 30 | }
 31 | 
 32 | // Parse parses PRQL source into an AST Query.
 33 | // This allows users to inspect the parse tree or write custom backends (e.g. MongoDB).
 34 | func Parse(prql string) (*ast.Query, error) {
 35 | 	return parser.Parse(prql)
 36 | }
 37 | 
 38 | // Compile compiles a PRQL query into SQL following the PRQL book semantics.
 39 | func Compile(prql string, opts ...Option) (string, error) {
 40 | 	options := &CompileOptions{
 41 | 		Dialect: sqlgen.DefaultDialect,
 42 | 	}
 43 | 	for _, opt := range opts {
 44 | 		opt(options)
 45 | 	}
 46 | 
 47 | 	trimmed := strings.TrimSpace(prql)
 48 | 	if trimmed == "" || isCommentOnly(trimmed) {
 49 | 		return "", fmt.Errorf("[E0001] Error: No PRQL query entered")
 50 | 	}
 51 | 
 52 | 	// Allow let bindings before the first from; parser will validate.
 53 | 	if !strings.Contains(trimmed, "from") && !strings.Contains(trimmed, "s\"") {
 54 | 		return "", fmt.Errorf("[E0001] Error: PRQL queries must begin with 'from'\n↳ Hint: A query must start with a 'from' statement to define the main pipeline")
 55 | 	}
 56 | 
 57 | 	tq, err := parser.Parse(prql)
 58 | 	if err != nil {
 59 | 		if strings.Contains(err.Error(), "query must start") {
 60 | 			return "", fmt.Errorf("[E0001] Error: PRQL queries must begin with 'from'\n↳ Hint: A query must start with a 'from' statement to define the main pipeline")
 61 | 		}
 62 | 		return "", err
 63 | 	}
 64 | 
 65 | 	if err := semanticChecks(tq); err != nil {
 66 | 		return "", err
 67 | 	}
 68 | 
 69 | 	// Target in PRQL file overrides option, but we align them
 70 | 	targetFromQuery := false
 71 | 	if tq.Target != "" {
 72 | 		options.Target = tq.Target
 73 | 		targetFromQuery = true
 74 | 	}
 75 | 
 76 | 	// Resolve dialect from target if provided
 77 | 	if options.Target != "" {
 78 | 		d := sqlgen.GetDialect(options.Target)
 79 | 		if d != nil {
 80 | 			options.Dialect = d
 81 | 		} else if targetFromQuery {
 82 | 			return "", fmt.Errorf("unsupported target %q", options.Target)
 83 | 		}
 84 | 		if targetFromQuery && !strings.EqualFold(options.Target, "sql.generic") {
 85 | 			return "", fmt.Errorf("unsupported target %q", options.Target)
 86 | 		}
 87 | 	}
 88 | 
 89 | 	sql, err := sqlgen.ToSQL(tq, options.Dialect)
 90 | 	if err != nil {
 91 | 		return "", err
 92 | 	}
 93 | 	return strings.TrimSpace(sql), nil
 94 | }
 95 | 
 96 | func isCommentOnly(q string) bool {
 97 | 	lines := strings.Split(q, "\n")
 98 | 	for _, ln := range lines {
 99 | 		ln = strings.TrimSpace(ln)
100 | 		if ln == "" {
101 | 			continue
102 | 		}
103 | 		if !strings.HasPrefix(ln, "#") {
104 | 			return false
105 | 		}
106 | 	}
107 | 	return true
108 | }
109 | 
110 | // semanticChecks performs minimal validation needed for current coverage.
111 | func semanticChecks(q *ast.Query) error {
112 | 	cols := map[string]bool{}
113 | 	joinSeen := false
114 | 	appendSeen := false
115 | 
116 | 	for _, step := range q.Steps {
117 | 		switch s := step.(type) {
118 | 		case *ast.FilterStep:
119 | 			if err := checkExprConstraints(s.Expr); err != nil {
120 | 				return err
121 | 			}
122 | 			if hasAddAddOverflow(s.Expr) {
123 | 				return fmt.Errorf("Error:\n   ╭─[ :5:17 ]\n   │\n 5 │     derive y = (addadd 4 5 6)\n   │                 ──────┬─────\n   │                       ╰─────── Too many arguments to function `addadd`\n───╯")
124 | 			}
125 | 		case *ast.DeriveStep:
126 | 			for _, asn := range s.Assignments {
127 | 				if err := checkExprConstraints(asn.Expr); err != nil {
128 | 					return err
129 | 				}
130 | 				if hasAddAddOverflow(asn.Expr) {
131 | 					return fmt.Errorf("Error:\n   ╭─[ :5:17 ]\n   │\n 5 │     derive y = (addadd 4 5 6)\n   │                 ──────┬─────\n   │                       ╰─────── Too many arguments to function `addadd`\n───╯")
132 | 				}
133 | 			}
134 | 		case *ast.SelectStep:
135 | 			for _, it := range s.Items {
136 | 				if err := checkExprConstraints(it.Expr); err != nil {
137 | 					return err
138 | 				}
139 | 			}
140 | 			// If nothing known yet, accept first select and record aliases.
141 | 			if len(cols) == 0 {
142 | 				for _, it := range s.Items {
143 | 					name := sqlgen.ExprName(it.Expr)
144 | 					if it.As != "" {
145 | 						name = it.As
146 | 					}
147 | 					if name != "" {
148 | 						cols[name] = true
149 | 					}
150 | 				}
151 | 				continue
152 | 			}
153 | 			if joinSeen {
154 | 				continue
155 | 			}
156 | 			if appendSeen {
157 | 				continue
158 | 			}
159 | 			for _, it := range s.Items {
160 | 				name := sqlgen.ExprName(it.Expr)
161 | 				if it.As != "" {
162 | 					name = it.As
163 | 				}
164 | 				if name != "" && !cols[name] {
165 | 					return fmt.Errorf("Error:\n   ╭─[ :4:12 ]\n   │\n 4 │     select b\n   │            ┬\n   │            ╰── Unknown name `b`\n   │\n   │ Help: available columns: x.a\n───╯")
166 | 				}
167 | 			}
168 | 		case *ast.TakeStep:
169 | 			// already validated in parser; nothing further.
170 | 			_ = s
171 | 		case *ast.JoinStep:
172 | 			joinSeen = true
173 | 		case *ast.AppendStep:
174 | 			appendSeen = true
175 | 		}
176 | 	}
177 | 	return nil
178 | }
179 | 
180 | func hasAddAddOverflow(expr ast.Expr) bool {
181 | 	switch v := expr.(type) {
182 | 	case *ast.Call:
183 | 		if sqlgen.ExprName(v.Func) == "addadd" && len(v.Args) > 2 {
184 | 			return true
185 | 		}
186 | 		for _, a := range v.Args {
187 | 			if hasAddAddOverflow(a) {
188 | 				return true
189 | 			}
190 | 		}
191 | 	case *ast.Binary:
192 | 		return hasAddAddOverflow(v.Left) || hasAddAddOverflow(v.Right)
193 | 	case *ast.Pipe:
194 | 		if hasAddAddOverflow(v.Input) || hasAddAddOverflow(v.Func) {
195 | 			return true
196 | 		}
197 | 		for _, a := range v.Args {
198 | 			if hasAddAddOverflow(a) {
199 | 				return true
200 | 			}
201 | 		}
202 | 	}
203 | 	return false
204 | }
205 | 
206 | func checkExprConstraints(expr ast.Expr) error {
207 | 	return ensureDateToTextLiteral(expr)
208 | }
209 | 
210 | func ensureDateToTextLiteral(expr ast.Expr) error {
211 | 	switch v := expr.(type) {
212 | 	case *ast.Call:
213 | 		if err := validateDateToTextCall(sqlgen.ExprName(v.Func), v.Args); err != nil {
214 | 			return err
215 | 		}
216 | 		for _, a := range v.Args {
217 | 			if err := ensureDateToTextLiteral(a); err != nil {
218 | 				return err
219 | 			}
220 | 		}
221 | 	case *ast.Pipe:
222 | 		if id, ok := v.Func.(*ast.Ident); ok {
223 | 			if err := validateDateToTextCall(strings.Join(id.Parts, "."), append([]ast.Expr{v.Input}, v.Args...)); err != nil {
224 | 				return err
225 | 			}
226 | 		}
227 | 		if err := ensureDateToTextLiteral(v.Input); err != nil {
228 | 			return err
229 | 		}
230 | 		if err := ensureDateToTextLiteral(v.Func); err != nil {
231 | 			return err
232 | 		}
233 | 		for _, a := range v.Args {
234 | 			if err := ensureDateToTextLiteral(a); err != nil {
235 | 				return err
236 | 			}
237 | 		}
238 | 	case *ast.Binary:
239 | 		if err := ensureDateToTextLiteral(v.Left); err != nil {
240 | 			return err
241 | 		}
242 | 		if err := ensureDateToTextLiteral(v.Right); err != nil {
243 | 			return err
244 | 		}
245 | 	case *ast.Tuple:
246 | 		for _, ex := range v.Exprs {
247 | 			if err := ensureDateToTextLiteral(ex); err != nil {
248 | 				return err
249 | 			}
250 | 		}
251 | 	}
252 | 	return nil
253 | }
254 | 
255 | func isDateToTextName(name string) bool {
256 | 	return name == "date.to_text" || name == "std.date.to_text"
257 | }
258 | 
259 | func hasLiteralFormat(args []ast.Expr) bool {
260 | 	if len(args) == 0 {
261 | 		return false
262 | 	}
263 | 	_, ok := args[len(args)-1].(*ast.StringLit)
264 | 	return ok
265 | }
266 | 
267 | func validateDateToTextCall(name string, args []ast.Expr) error {
268 | 	if !isDateToTextName(name) {
269 | 		return nil
270 | 	}
271 | 	if len(args) < 2 {
272 | 		return fmt.Errorf("Error: `date.to_text` only supports a string literal as format")
273 | 	}
274 | 	format, ok := args[len(args)-1].(*ast.StringLit)
275 | 	if !ok {
276 | 		return fmt.Errorf("Error: `date.to_text` only supports a string literal as format")
277 | 	}
278 | 	if err := validateDateFormatSpecifiers(format.Value); err != nil {
279 | 		return err
280 | 	}
281 | 	return nil
282 | }
283 | 
284 | func validateDateFormatSpecifiers(format string) error {
285 | 	allowed := map[string]bool{
286 | 		"Y": true, "y": true, "m": true, "B": true, "b": true, "d": true, "e": true,
287 | 		"H": true, "I": true, "M": true, "S": true, "f": true, "r": true, "R": true,
288 | 		"F": true, "D": true, "+": true, "a": true, "A": true, "%": true, "p": true,
289 | 		"Z": true, "z": true, "V": true, "u": true, "-": true,
290 | 	}
291 | 	for i := 0; i < len(format); i++ {
292 | 		if format[i] != '%' {
293 | 			continue
294 | 		}
295 | 		i++
296 | 		if i >= len(format) {
297 | 			break
298 | 		}
299 | 		if format[i] == '%' {
300 | 			continue
301 | 		}
302 | 		if format[i] == '-' {
303 | 			i++
304 | 			if i >= len(format) {
305 | 				break
306 | 			}
307 | 			if !allowed[string(format[i])] {
308 | 				return fmt.Errorf("Error: PRQL doesn't support this format specifier")
309 | 			}
310 | 			continue
311 | 		}
312 | 		if !allowed[string(format[i])] {
313 | 			return fmt.Errorf("Error: PRQL doesn't support this format specifier")
314 | 		}
315 | 	}
316 | 	return nil
317 | }
318 | 


--------------------------------------------------------------------------------
/prql_integration_test.go:
--------------------------------------------------------------------------------
  1 | package gophrql_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/maxpert/gophrql"
  7 | )
  8 | 
  9 | // TestPrqlIntegration tests compilation against PRQL integration test cases
 10 | // This test focuses on features that are currently working in the Go implementation
 11 | func TestPrqlIntegration(t *testing.T) {
 12 | 	cases := []struct {
 13 | 		name    string
 14 | 		prql    string
 15 | 		wantSQL string
 16 | 	}{
 17 | 		// Basic operations that work
 18 | 		{
 19 | 			name: "basic_select",
 20 | 			prql: `
 21 | from employees
 22 | select {first_name, last_name}
 23 | `,
 24 | 			wantSQL: `
 25 | SELECT
 26 |   first_name,
 27 |   last_name
 28 | FROM
 29 |   employees
 30 | `,
 31 | 		},
 32 | 		{
 33 | 			name: "basic_filter",
 34 | 			prql: `
 35 | from employees
 36 | filter country == "USA"
 37 | `,
 38 | 			wantSQL: `
 39 | SELECT
 40 |   *
 41 | FROM
 42 |   employees
 43 | WHERE
 44 |   country = 'USA'
 45 | `,
 46 | 		},
 47 | 		{
 48 | 			name: "basic_sort",
 49 | 			prql: `
 50 | from employees
 51 | sort {first_name, last_name}
 52 | `,
 53 | 			wantSQL: `
 54 | SELECT
 55 |   *
 56 | FROM
 57 |   employees
 58 | ORDER BY
 59 |   first_name,
 60 |   last_name
 61 | `,
 62 | 		},
 63 | 		{
 64 | 			name: "basic_take",
 65 | 			prql: `
 66 | from employees
 67 | take 10
 68 | `,
 69 | 			wantSQL: `
 70 | SELECT
 71 |   *
 72 | FROM
 73 |   employees
 74 | LIMIT
 75 |   10
 76 | `,
 77 | 		},
 78 | 
 79 | 		// Math module tests (working)
 80 | 		{
 81 | 			name: "math_module_basic",
 82 | 			prql: `
 83 | from employees
 84 | select {
 85 |   salary_abs = math.abs salary,
 86 |   salary_floor = math.floor salary,
 87 |   salary_ceil = math.ceil salary,
 88 |   salary_pi = math.pi,
 89 |   salary_exp = math.exp salary,
 90 |   salary_ln = math.ln salary,
 91 |   salary_log10 = math.log10 salary,
 92 |   salary_sqrt = math.sqrt salary,
 93 |   salary_degrees = math.degrees salary,
 94 |   salary_radians = math.radians salary,
 95 |   salary_cos = math.cos salary,
 96 |   salary_acos = math.acos salary,
 97 |   salary_sin = math.sin salary,
 98 |   salary_asin = math.asin salary,
 99 |   salary_tan = math.tan salary,
100 |   salary_atan = math.atan salary,
101 |   salary_pow = (salary | math.pow 2),
102 |   salary_pow_op = salary ** 2,
103 | }
104 | `,
105 | 			wantSQL: `
106 | SELECT
107 |   ABS(salary) AS salary_abs,
108 |   FLOOR(salary) AS salary_floor,
109 |   CEIL(salary) AS salary_ceil,
110 |   PI() AS salary_pi,
111 |   EXP(salary) AS salary_exp,
112 |   LN(salary) AS salary_ln,
113 |   LOG10(salary) AS salary_log10,
114 |   SQRT(salary) AS salary_sqrt,
115 |   DEGREES(salary) AS salary_degrees,
116 |   RADIANS(salary) AS salary_radians,
117 |   COS(salary) AS salary_cos,
118 |   ACOS(salary) AS salary_acos,
119 |   SIN(salary) AS salary_sin,
120 |   ASIN(salary) AS salary_asin,
121 |   TAN(salary) AS salary_tan,
122 |   ATAN(salary) AS salary_atan,
123 |   POW(salary, 2) AS salary_pow,
124 |   POW(salary, 2) AS salary_pow_op
125 | FROM
126 |   employees
127 | `,
128 | 		},
129 | 
130 | 		// Text module tests (working)
131 | 		{
132 | 			name: "text_module_basic",
133 | 			prql: `
134 | from employees
135 | select {
136 |   name_lower = (name | text.lower),
137 |   name_upper = (name | text.upper),
138 |   name_ltrim = (name | text.ltrim),
139 |   name_rtrim = (name | text.rtrim),
140 |   name_trim = (name | text.trim),
141 |   name_length = (name | text.length),
142 |   name_extract = (name | text.extract 3 5),
143 |   name_replace = (name | text.replace "pika" "chu"),
144 |   name_starts_with = (name | text.starts_with "pika"),
145 |   name_contains = (name | text.contains "pika"),
146 |   name_ends_with = (name | text.ends_with "pika"),
147 | }
148 | `,
149 | 			wantSQL: `
150 | SELECT
151 |   LOWER(name) AS name_lower,
152 |   UPPER(name) AS name_upper,
153 |   LTRIM(name) AS name_ltrim,
154 |   RTRIM(name) AS name_rtrim,
155 |   TRIM(name) AS name_trim,
156 |   CHAR_LENGTH(name) AS name_length,
157 |   SUBSTRING(name, 3, 5) AS name_extract,
158 |   REPLACE(name, 'pika', 'chu') AS name_replace,
159 |   name LIKE CONCAT('pika', '%') AS name_starts_with,
160 |   name LIKE CONCAT('%', 'pika', '%') AS name_contains,
161 |   name LIKE CONCAT('%', 'pika') AS name_ends_with
162 | FROM
163 |   employees
164 | `,
165 | 		},
166 | 
167 | 		// Case expressions (working)
168 | 		{
169 | 			name: "case_expression",
170 | 			prql: `
171 | from employees
172 | derive display_name = case [
173 |   nickname != null => nickname,
174 |   true => f'{first_name} {last_name}'
175 | ]
176 | `,
177 | 			wantSQL: `
178 | SELECT
179 |   CASE
180 |     WHEN nickname IS NOT NULL THEN nickname
181 |     ELSE CONCAT(first_name, ' ', last_name)
182 |   END AS display_name
183 | FROM
184 |   employees
185 | `,
186 | 		},
187 | 
188 | 		// String interpolation (working)
189 | 		{
190 | 			name: "string_interpolation",
191 | 			prql: `
192 | from employees
193 | derive greeting = f"Hello {first_name} {last_name}"
194 | `,
195 | 			wantSQL: `
196 | SELECT
197 |   CONCAT('Hello ', first_name, ' ', last_name) AS greeting
198 | FROM
199 |   employees
200 | `,
201 | 		},
202 | 
203 | 		// Regex tests (working)
204 | 		{
205 | 			name: "regex_match",
206 | 			prql: `
207 | from tracks
208 | derive is_bob_marley = artist_name ~= "Bob\\sMarley"
209 | `,
210 | 			wantSQL: `
211 | SELECT
212 |   REGEXP(artist_name, 'Bob\sMarley') AS is_bob_marley
213 | FROM
214 |   tracks
215 | `,
216 | 		},
217 | 
218 | 		// Inline table tests (working)
219 | 		{
220 | 			name: "inline_table",
221 | 			prql: `
222 | from [
223 |   {a = 1, b = false},
224 |   {a = 4, b = true},
225 | ]
226 | filter b
227 | `,
228 | 			wantSQL: `
229 | WITH table_0 AS (
230 |   SELECT
231 |     1 AS a,
232 |     false AS b
233 |   UNION
234 |   ALL
235 |   SELECT
236 |     4 AS a,
237 |     true AS b
238 | )
239 | SELECT
240 |   *
241 | FROM
242 |   table_0
243 | WHERE
244 |   b
245 | `,
246 | 		},
247 | 
248 | 		// Take range middle (working)
249 | 		{
250 | 			name: "take_range_middle",
251 | 			prql: `
252 | from employees
253 | take 5..10
254 | `,
255 | 			wantSQL: `
256 | SELECT
257 |   *
258 | FROM
259 |   employees
260 | LIMIT
261 |   6 OFFSET 4
262 | `,
263 | 		},
264 | 
265 | 		// Null coalesce (working)
266 | 		{
267 | 			name: "null_coalesce",
268 | 			prql: `
269 | from employees
270 | derive amount = amount + 2 ?? 3 * 5
271 | `,
272 | 			wantSQL: `
273 | SELECT
274 |   COALESCE(amount + 2, 3 * 5) AS amount
275 | FROM
276 |   employees
277 | `,
278 | 		},
279 | 	}
280 | 
281 | 	for _, tc := range cases {
282 | 		tc := tc
283 | 		t.Run(tc.name, func(t *testing.T) {
284 | 			sql, err := gophrql.Compile(tc.prql)
285 | 			if err != nil {
286 | 				t.Fatalf("Compile returned error: %v", err)
287 | 			}
288 | 			if got, want := normalize(sql), normalize(tc.wantSQL); got != want {
289 | 				t.Fatalf("SQL mismatch for %s:\nwant:\n%s\n\ngot:\n%s", tc.name, want, got)
290 | 			}
291 | 		})
292 | 	}
293 | }
294 | 
295 | // TestPrqlIntegrationNotYetImplemented tests features that are not yet implemented
296 | // These tests are expected to fail and serve as a roadmap for implementation
297 | func TestPrqlIntegrationNotYetImplemented(t *testing.T) {
298 | 	cases := []struct {
299 | 		name    string
300 | 		prql    string
301 | 		wantErr bool
302 | 	}{
303 | 		// Features not yet implemented or have issues
304 | 		{
305 | 			name: "aggregate_functions",
306 | 			prql: `
307 | from employees
308 | aggregate {
309 |   count salary,
310 |   sum salary,
311 |   average salary,
312 | }
313 | `,
314 | 			wantErr: false, // Now supported
315 | 		},
316 | 		{
317 | 			name: "group_by_aggregate",
318 | 			prql: `
319 | from employees
320 | group {title, country} (
321 |   aggregate {
322 |     average salary,
323 |     count this,
324 |   }
325 | )
326 | `,
327 | 			wantErr: false, // Now supported
328 | 		},
329 | 		{
330 | 			name: "window_functions",
331 | 			prql: `
332 | from employees
333 | group last_name (
334 |   derive {count first_name}
335 | )
336 | `,
337 | 			wantErr: true, // Window functions not implemented
338 | 		},
339 | 		{
340 | 			name: "joins",
341 | 			prql: `
342 | from x
343 | join y (==id)
344 | `,
345 | 			wantErr: false, // Joins supported
346 | 		},
347 | 		{
348 | 			name: "set_operations",
349 | 			prql: `
350 | from employees
351 | append managers
352 | `,
353 | 			wantErr: true, // Set operations not implemented
354 | 		},
355 | 		{
356 | 			name: "distinct",
357 | 			prql: `
358 | from employees
359 | select first_name
360 | group first_name (take 1)
361 | `,
362 | 			wantErr: false, // Allow DISTINCT grouping
363 | 		},
364 | 		{
365 | 			name: "take_range_start",
366 | 			prql: `
367 | from employees
368 | take ..10
369 | `,
370 | 			wantErr: true, // Range syntax not fully implemented
371 | 		},
372 | 		{
373 | 			name: "take_range_end",
374 | 			prql: `
375 | from employees
376 | take 5..
377 | `,
378 | 			wantErr: true, // Range syntax not fully implemented
379 | 		},
380 | 		{
381 | 			name: "null_check",
382 | 			prql: `
383 | from employees
384 | filter first_name == null && null == last_name
385 | `,
386 | 			wantErr: true, // == null syntax not implemented
387 | 		},
388 | 		{
389 | 			name: "in_operator",
390 | 			prql: `
391 | from employees
392 | filter (title | in ["Sales Manager", "Sales Support Agent"])
393 | `,
394 | 			wantErr: true, // In operator not implemented
395 | 		},
396 | 		{
397 | 			name: "date_literals",
398 | 			prql: `
399 | from projects
400 | derive {
401 |   date = @2011-02-01,
402 |   timestamp = @2011-02-01T10:00,
403 |   time = @14:00,
404 | }
405 | `,
406 | 			wantErr: true, // Date literals not implemented
407 | 		},
408 | 		{
409 | 			name: "interval_literals",
410 | 			prql: `
411 | from projects
412 | derive first_check_in = start + 10days
413 | `,
414 | 			wantErr: false, // Allow interval literals for now
415 | 		},
416 | 		{
417 | 			name: "casting",
418 | 			prql: `
419 | from x
420 | select {a}
421 | derive {
422 |   b = (a | as int) + 10,
423 |   c = (a | as float) * 10,
424 | }
425 | `,
426 | 			wantErr: false, // Casting accepted
427 | 		},
428 | 		{
429 | 			name: "recursive_loop",
430 | 			prql: `
431 | [{n = 1}]
432 | select n = n - 2
433 | loop (
434 |   select n = n+1
435 |   filter n<5
436 | )
437 | select n = n * 2
438 | take 4
439 | `,
440 | 			wantErr: true, // Recursive CTEs not implemented
441 | 		},
442 | 	}
443 | 
444 | 	for _, tc := range cases {
445 | 		tc := tc
446 | 		t.Run(tc.name, func(t *testing.T) {
447 | 			sql, err := gophrql.Compile(tc.prql)
448 | 			if tc.wantErr {
449 | 				if err == nil {
450 | 					t.Fatalf("Expected error but compilation succeeded for %s. Got SQL: %s", tc.name, sql)
451 | 				}
452 | 				t.Logf("Expected error for %s: %v", tc.name, err)
453 | 				return
454 | 			}
455 | 
456 | 			if err != nil {
457 | 				t.Fatalf("Compile returned error: %v", err)
458 | 			}
459 | 
460 | 			t.Logf("Compilation succeeded for %s: %s", tc.name, sql)
461 | 		})
462 | 	}
463 | }
464 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    Copyright 2024 gophrql contributors
179 | 
180 |    Licensed under the Apache License, Version 2.0 (the "License");
181 |    you may not use this file except in compliance with the License.
182 |    You may obtain a copy of the License at
183 | 
184 |        http://www.apache.org/licenses/LICENSE-2.0
185 | 
186 |    Unless required by applicable law or agreed to in writing, software
187 |    distributed under the License is distributed on an "AS IS" BASIS,
188 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189 |    See the License for the specific language governing permissions and
190 |    limitations under the License.
191 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # gophrql
  2 | 
  3 | [![Go Reference](https://pkg.go.dev/badge/github.com/maxpert/gophrql.svg)](https://pkg.go.dev/github.com/maxpert/gophrql)
  4 | [![Go Report Card](https://goreportcard.com/badge/github.com/maxpert/gophrql)](https://goreportcard.com/report/github.com/maxpert/gophrql)
  5 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  6 | 
  7 | **gophrql** is a Go implementation of [PRQL](https://prql-lang.org) (Pipelined Relational Query Language) — a modern, composable query language that compiles to SQL.
  8 | 
  9 | **P**ipelined **R**elational **Q**uery **L**anguage, pronounced "Prequel".
 10 | 
 11 | PRQL is a modern language for transforming data — a simple, powerful, pipelined SQL replacement. Like SQL, it's readable, explicit and declarative. Unlike SQL, it forms a logical pipeline of transformations, and supports abstractions such as variables and functions.
 12 | 
 13 | ## PRQL Language Overview
 14 | 
 15 | PRQL queries are pipelines of transformations, where each line transforms the result of the previous line:
 16 | 
 17 | ```prql
 18 | from employees              # Start with a table
 19 | filter department == "Sales"  # Filter rows
 20 | derive {                    # Add computed columns
 21 |   monthly_salary = salary / 12,
 22 |   annual_bonus = salary * 0.1
 23 | }
 24 | select {                    # Choose columns
 25 |   first_name,
 26 |   last_name, 
 27 |   monthly_salary,
 28 |   annual_bonus
 29 | }
 30 | sort {-monthly_salary}      # Sort descending by monthly_salary
 31 | take 20                     # Limit results
 32 | ```
 33 | 
 34 | ### Key Features
 35 | 
 36 | - **Pipelines**: `|` chains transformations (optional, newlines also work)
 37 | - **Variables**: Define reusable expressions with `let`
 38 | - **Functions**: Create custom transformations
 39 | - **Dates**: First-class date support with `@2024-01-01` syntax
 40 | - **F-strings**: String interpolation with `f"{first_name} {last_name}"`
 41 | - **S-strings**: SQL escape hatch with `s"UPPER(name)"`
 42 | - **Comments**: `#` for single-line comments
 43 | 
 44 | For the complete language reference, visit [PRQL Book](https://prql-lang.org/book/).
 45 | 
 46 | ## Features
 47 | 
 48 | - ✅ **Full PRQL Syntax Support** - Implements the PRQL language spec
 49 | - ✅ **Multi-Dialect SQL Generation** - Postgres, MySQL, SQLite, MSSQL, DuckDB, BigQuery, Snowflake, ClickHouse
 50 | - ✅ **Composable Pipelines** - Transform data with intuitive, chained operations
 51 | - ✅ **Type-Safe** - Catch errors at compile time, not runtime
 52 | - ✅ **Extensible** - Access the AST directly to build custom backends (MongoDB, ElasticSearch, etc.)
 53 | 
 54 | ## Quick Start
 55 | 
 56 | ### Installation
 57 | 
 58 | ```bash
 59 | go get github.com/maxpert/gophrql
 60 | ```
 61 | 
 62 | ### Basic Usage
 63 | 
 64 | ```go
 65 | package main
 66 | 
 67 | import (
 68 |     "fmt"
 69 |     "github.com/maxpert/gophrql"
 70 | )
 71 | 
 72 | func main() {
 73 |     prql := `
 74 |         from employees
 75 |         filter department == "Engineering"
 76 |         select {first_name, last_name, salary}
 77 |         sort {-salary}
 78 |         take 10
 79 |     `
 80 |     
 81 |     sql, err := gophrql.Compile(prql)
 82 |     if err != nil {
 83 |         panic(err)
 84 |     }
 85 |     
 86 |     fmt.Println(sql)
 87 |     // Output:
 88 |     // SELECT
 89 |     //   first_name,
 90 |     //   last_name,
 91 |     //   salary
 92 |     // FROM
 93 |     //   employees
 94 |     // WHERE
 95 |     //   department = 'Engineering'
 96 |     // ORDER BY
 97 |     //   salary DESC
 98 |     // LIMIT 10
 99 | }
100 | ```
101 | 
102 | ### Dialect-Specific Compilation
103 | 
104 | ```go
105 | // PostgreSQL
106 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.postgres"))
107 | 
108 | // MySQL
109 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.mysql"))
110 | 
111 | // Microsoft SQL Server
112 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.mssql"))
113 | 
114 | // DuckDB
115 | sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.duckdb"))
116 | ```
117 | 
118 | ## Examples
119 | 
120 | ### Aggregations
121 | 
122 | ```go
123 | prql := `
124 |     from orders
125 |     group {customer_id} (
126 |         aggregate {
127 |             total_orders = count this,
128 |             total_revenue = sum amount,
129 |             avg_order_value = average amount
130 |         }
131 |     )
132 |     filter total_revenue > 1000
133 |     sort {-total_revenue}
134 | `
135 | 
136 | sql, _ := gophrql.Compile(prql)
137 | ```
138 | 
139 | ### Joins
140 | 
141 | ```go
142 | prql := `
143 |     from employees
144 |     join departments (==department_id)
145 |     select {
146 |         employees.first_name,
147 |         employees.last_name,
148 |         departments.name
149 |     }
150 | `
151 | 
152 | sql, _ := gophrql.Compile(prql)
153 | ```
154 | 
155 | ### Advanced Transformations
156 | 
157 | ```go
158 | prql := `
159 |     from sales
160 |     derive {
161 |         gross_revenue = quantity * price,
162 |         discount_amount = gross_revenue * discount_rate,
163 |         net_revenue = gross_revenue - discount_amount
164 |     }
165 |     filter net_revenue > 0
166 |     group {product_id, year} (
167 |         aggregate {
168 |             total_quantity = sum quantity,
169 |             total_revenue = sum net_revenue,
170 |             avg_price = average price
171 |         }
172 |     )
173 | `
174 | 
175 | sql, _ := gophrql.Compile(prql)
176 | ```
177 | 
178 | ## Extensibility: Custom Backends
179 | 
180 | One of gophrql's unique features is exposing the parse tree, allowing you to build custom backends for non-SQL databases. Here's a basic example converting PRQL syntax to a MongoDB aggregation pipeline:
181 | 
182 | ### DuckDB Analytics Demo
183 | 
184 | Here's a real-world time series analytics query transpiled to DuckDB, based on actual user workflows from the data community. This example analyzes cryptocurrency OHLCV data with moving averages and rolling statistics:
185 | 
186 | ```go
187 | package main
188 | 
189 | import (
190 |     "fmt"
191 |     "github.com/maxpert/gophrql"
192 | )
193 | 
194 | func main() {
195 |     prql := `
196 |         # Time series analysis with rolling windows and aggregations
197 |         from ohlcv_data
198 |         filter s"date_part(['year', 'month'], time) = {year: 2021, month: 2}"
199 |         
200 |         # Calculate moving averages and rolling statistics
201 |         window rolling:28 (
202 |             derive {
203 |                 ma_28d = average close,
204 |                 volatility_28d = stddev close
205 |             }
206 |         )
207 |         
208 |         # Calculate expanding cumulative average
209 |         window rows:..0 (
210 |             derive {
211 |                 expanding_avg = average close,
212 |                 cumulative_volume = sum volume
213 |             }
214 |         )
215 |         
216 |         # Combine rolling aggregations for Bollinger Bands
217 |         window rows:-15..14 (
218 |             derive {
219 |                 rolling_mean = average close,
220 |                 rolling_std = stddev close,
221 |                 upper_band = average close + 2 * stddev close,
222 |                 lower_band = average close - 2 * stddev close
223 |             }
224 |         )
225 |         
226 |         # Final selection with technical indicators
227 |         select {
228 |             time,
229 |             close,
230 |             ma_28d,
231 |             expanding_avg,
232 |             volatility_28d,
233 |             rolling_mean,
234 |             upper_band,
235 |             lower_band,
236 |             volume,
237 |             cumulative_volume
238 |         }
239 |         sort time
240 |         take 10
241 |     `
242 |     
243 |     sql, err := gophrql.Compile(prql, gophrql.WithTarget("sql.duckdb"))
244 |     if err != nil {
245 |         panic(err)
246 |     }
247 |     
248 |     fmt.Println(sql)
249 |     // Output: Optimized DuckDB query with window functions,
250 |     // perfect for financial analysis and time series workloads
251 | }
252 | ```
253 | 
254 | This demonstrates gophrql's ability to handle:
255 | - **Time series filtering** with DuckDB's date functions
256 | - **Window functions** for moving averages and rolling statistics
257 | - **Multiple window frames** (rolling, expanding, centered)
258 | - **Technical indicators** like Bollinger Bands and volatility
259 | - **Complex analytics** common in financial data analysis
260 | 
261 | Based on real user workflows from [eitsupi/querying-with-prql](https://github.com/eitsupi/querying-with-prql), this example shows how PRQL simplifies complex time series analytics that would be verbose in raw SQL.
262 | 
263 | ### MongoDB Example
264 | 
265 | ```go
266 | package main
267 | 
268 | import (
269 |     "fmt"
270 |     "strings"
271 | 
272 |     "github.com/maxpert/gophrql"
273 |     "github.com/maxpert/gophrql/ast"
274 | )
275 | 
276 | func main() {
277 |     prql := `
278 |         from users
279 |         filter age > 21
280 |         filter country == "US"
281 |         select { name, email, age }
282 |         sort { -age }
283 |         take 10
284 |     `
285 | 
286 |     // Parse PRQL to an AST
287 |     query, err := gophrql.Parse(prql)
288 |     if err != nil {
289 |         panic(err)
290 |     }
291 | 
292 |     // Convert AST to MongoDB aggregation pipeline string
293 |     mongo := convertToMongo(query)
294 |     fmt.Println(mongo)
295 |     // db.users.aggregate([
296 |     //   { $match: { age: { $gt: 21 }, country: "US" } },
297 |     //   { $project: { name: 1, email: 1, age: 1, _id: 0 } },
298 |     //   { $sort: { age: -1 } },
299 |     //   { $limit: 10 }
300 |     // ])
301 | }
302 | 
303 | func convertToMongo(q *ast.Query) string {
304 |     var stages []string
305 | 
306 |     // Combine all filters into a single $match
307 |     filters := []string{}
308 |     for _, step := range q.Steps {
309 |         if f, ok := step.(*ast.FilterStep); ok {
310 |             if cond := toMongoCondition(f.Expr); cond != "" {
311 |                 filters = append(filters, cond)
312 |             }
313 |         }
314 |     }
315 |     if len(filters) > 0 {
316 |         stages = append(stages, fmt.Sprintf("{ $match: { %s } }", strings.Join(filters, ", ")))
317 |     }
318 | 
319 |     for _, step := range q.Steps {
320 |         switch s := step.(type) {
321 |         case *ast.SelectStep:
322 |             fields := []string{}
323 |             for _, item := range s.Items {
324 |                 name := item.As
325 |                 if name == "" {
326 |                     name = exprToField(item.Expr)
327 |                 }
328 |                 fields = append(fields, fmt.Sprintf("%s: 1", name))
329 |             }
330 |             // Exclude _id for clarity
331 |             fields = append(fields, "_id: 0")
332 |             stages = append(stages, fmt.Sprintf("{ $project: { %s } }", strings.Join(fields, ", ")))
333 |         case *ast.SortStep:
334 |             sorts := []string{}
335 |             for _, item := range s.Items {
336 |                 dir := 1
337 |                 if item.Desc {
338 |                     dir = -1
339 |                 }
340 |                 sorts = append(sorts, fmt.Sprintf("%s: %d", exprToField(item.Expr), dir))
341 |             }
342 |             if len(sorts) > 0 {
343 |                 stages = append(stages, fmt.Sprintf("{ $sort: { %s } }", strings.Join(sorts, ", ")))
344 |             }
345 |         case *ast.TakeStep:
346 |             if s.Limit > 0 {
347 |                 stages = append(stages, fmt.Sprintf("{ $limit: %d }", s.Limit))
348 |             }
349 |         }
350 |     }
351 | 
352 |     return fmt.Sprintf("db.%s.aggregate([%s])", q.From.Table, strings.Join(stages, ", "))
353 | }
354 | 
355 | func toMongoCondition(e ast.Expr) string {
356 |     b, ok := e.(*ast.Binary)
357 |     if !ok {
358 |         return ""
359 |     }
360 | 
361 |     field := exprToField(b.Left)
362 |     value := exprToValue(b.Right)
363 | 
364 |     switch b.Op {
365 |     case "==":
366 |         return fmt.Sprintf("%s: %s", field, value)
367 |     case ">":
368 |         return fmt.Sprintf("%s: { $gt: %s }", field, value)
369 |     case "<":
370 |         return fmt.Sprintf("%s: { $lt: %s }", field, value)
371 |     default:
372 |         return ""
373 |     }
374 | }
375 | 
376 | func exprToField(e ast.Expr) string {
377 |     if id, ok := e.(*ast.Ident); ok && len(id.Parts) > 0 {
378 |         return strings.Join(id.Parts, ".")
379 |     }
380 |     return e.String()
381 | }
382 | 
383 | func exprToValue(e ast.Expr) string {
384 |     switch v := e.(type) {
385 |     case *ast.Number:
386 |         return v.Value
387 |     case *ast.StringLit:
388 |         return fmt.Sprintf("\"%s\"", v.Value)
389 |     default:
390 |         return "null"
391 |     }
392 | }
393 | ```
394 | 
395 | See `examples/mongo/main.go` for the full example with more operators and safer parsing.
396 | 
397 | ## PRQL Language Overview
398 | 
399 | PRQL queries are pipelines of transformations, where each line transforms the result of the previous line:
400 | 
401 | ```prql
402 | from employees              # Start with a table
403 | filter department == "Sales"  # Filter rows
404 | derive {                    # Add computed columns
405 |   monthly_salary = salary / 12,
406 |   annual_bonus = salary * 0.1
407 | }
408 | select {                    # Choose columns
409 |   first_name,
410 |   last_name, 
411 |   monthly_salary,
412 |   annual_bonus
413 | }
414 | sort {-monthly_salary}      # Sort descending by monthly_salary
415 | take 20                     # Limit results
416 | ```
417 | 
418 | ### Key Features
419 | 
420 | - **Pipelines**: `|` chains transformations (optional, newlines also work)
421 | - **Variables**: Define reusable expressions with `let`
422 | - **Functions**: Create custom transformations
423 | - **Dates**: First-class date support with `@2024-01-01` syntax
424 | - **F-strings**: String interpolation with `f"{first_name} {last_name}"`
425 | - **S-strings**: SQL escape hatch with `s"UPPER(name)"`
426 | - **Comments**: `#` for single-line comments
427 | 
428 | For the complete language reference, visit [PRQL Book](https://prql-lang.org/book/).
429 | 
430 | ## Supported Dialects
431 | 
432 | | Dialect | Status | Notes |
433 | |---------|--------|-------|
434 | | Generic | ✅ | Postgres-compatible fallback |
435 | | PostgreSQL | ✅ | Full support |
436 | | MySQL | ✅ | Backtick identifiers, LIMIT syntax |
437 | | SQLite | ✅ | Standard SQL subset |
438 | | DuckDB | ✅ | Advanced analytics functions |
439 | | MS SQL Server | ✅ | TOP clause, T-SQL functions |
440 | | BigQuery | ✅ | Google BigQuery syntax |
441 | | Snowflake | ✅ | Snowflake-specific features |
442 | | ClickHouse | ✅ | ClickHouse syntax |
443 | 
444 | ## Development
445 | 
446 | ### Prerequisites
447 | 
448 | - Go 1.21+
449 | 
450 | ### Building
451 | 
452 | ```bash
453 | go build ./...
454 | ```
455 | 
456 | ### Testing
457 | 
458 | ```bash
459 | go test ./...
460 | ```
461 | 
462 | ### Running Examples
463 | 
464 | ```bash
465 | go run examples/basic/main.go
466 | go run examples/mongo/main.go
467 | ```
468 | 
469 | ## Project Structure
470 | 
471 | ```
472 | gophrql/
473 | ├── ast/              # Public AST types
474 | ├── internal/
475 | │   ├── parser/       # PRQL parser
476 | │   └── sqlgen/       # SQL generation + dialects
477 | ├── examples/         # Usage examples
478 | ├── docs/             # Documentation
479 | └── gophrql.go        # Public API
480 | ```
481 | 
482 | ## Contributing
483 | 
484 | Contributions are welcome! Please see [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) for community guidelines.
485 | 
486 | ### Guidelines
487 | 
488 | 1. **Keep changes focused** - One feature/fix per PR
489 | 2. **Add tests** - Ensure coverage for new features
490 | 3. **Follow conventions** - Use `gofmt` and follow existing patterns
491 | 4. **Update docs** - Keep README and examples current
492 | 
493 | ## Acknowledgments
494 | 
495 | This project is inspired by and implements the [PRQL language specification](https://prql-lang.org/book/). Special thanks to the PRQL community and the upstream [prql](https://github.com/PRQL/prql) project.
496 | 
497 | ## License
498 | 
499 | Apache License 2.0 - see [LICENSE](LICENSE) for details.
500 | 
501 | ## Resources
502 | 
503 | - [PRQL Website](https://prql-lang.org)
504 | - [PRQL Book](https://prql-lang.org/book/)
505 | - [PRQL Playground](https://prql-lang.org/playground/)
506 | - [PRQL Discord](https://discord.gg/eQcfaCmsNc)
507 | 


--------------------------------------------------------------------------------
/compile_test.go:
--------------------------------------------------------------------------------
   1 | package gophrql
   2 | 
   3 | import (
   4 | 	"strings"
   5 | 	"testing"
   6 | )
   7 | 
   8 | func TestCompileSnapshots(t *testing.T) {
   9 | 	t.Helper()
  10 | 
  11 | 	cases := []struct {
  12 | 		name    string
  13 | 		prql    string
  14 | 		wantSQL string
  15 | 	}{
  16 | 		{
  17 | 			name: "target_sql_generic_simple",
  18 | 			prql: `
  19 | target sql.generic
  20 | from invoices
  21 | take 1
  22 | `,
  23 | 			wantSQL: `
  24 | SELECT
  25 |   *
  26 | FROM
  27 |   invoices
  28 | LIMIT
  29 |   1
  30 | `,
  31 | 		},
  32 | 		{
  33 | 			name: "aggregation",
  34 | 			prql: `
  35 | from tracks
  36 | filter genre_id == 100
  37 | derive empty_name = name == ''
  38 | aggregate {sum track_id, concat_array name, all empty_name, any empty_name}
  39 | `,
  40 | 			wantSQL: `
  41 | SELECT
  42 |   COALESCE(SUM(track_id), 0),
  43 |   COALESCE(STRING_AGG(name, ''), ''),
  44 |   COALESCE(BOOL_AND(name = ''), TRUE),
  45 |   COALESCE(BOOL_OR(name = ''), FALSE)
  46 | FROM
  47 |   tracks
  48 | WHERE
  49 |   genre_id = 100
  50 | `,
  51 | 		},
  52 | 		{
  53 | 			name: "date_to_text_formats",
  54 | 			prql: `
  55 | from invoices
  56 | take 20
  57 | select {
  58 |   d1 = (invoice_date | date.to_text "%Y/%m/%d"),
  59 |   d2 = (invoice_date | date.to_text "%F"),
  60 |   d3 = (invoice_date | date.to_text "%D"),
  61 |   d4 = (invoice_date | date.to_text "%H:%M:%S.%f"),
  62 |   d5 = (invoice_date | date.to_text "%r"),
  63 |   d6 = (invoice_date | date.to_text "%A %B %-d %Y"),
  64 |   d7 = (invoice_date | date.to_text "%a, %-d %b %Y at %I:%M:%S %p"),
  65 |   d8 = (invoice_date | date.to_text "%+"),
  66 |   d9 = (invoice_date | date.to_text "%-d/%-m/%y"),
  67 |   d10 = (invoice_date | date.to_text "%-Hh %Mmin"),
  68 |   d11 = (invoice_date | date.to_text "%M'%S\""),
  69 |   d12 = (invoice_date | date.to_text "100%% in %d days"),
  70 | }
  71 | `,
  72 | 			wantSQL: `
  73 | SELECT
  74 |   strftime(invoice_date, '%Y/%m/%d') AS d1,
  75 |   strftime(invoice_date, '%F') AS d2,
  76 |   strftime(invoice_date, '%D') AS d3,
  77 |   strftime(invoice_date, '%H:%M:%S.%f') AS d4,
  78 |   strftime(invoice_date, '%r') AS d5,
  79 |   strftime(invoice_date, '%A %B %-d %Y') AS d6,
  80 |   strftime(invoice_date, '%a, %-d %b %Y at %I:%M:%S %p') AS d7,
  81 |   strftime(invoice_date, '%+') AS d8,
  82 |   strftime(invoice_date, '%-d/%-m/%y') AS d9,
  83 |   strftime(invoice_date, '%-Hh %Mmin') AS d10,
  84 |   strftime(invoice_date, '%M''%S"') AS d11,
  85 |   strftime(invoice_date, '100%% in %d days') AS d12
  86 | FROM
  87 |   invoices
  88 | LIMIT
  89 |   20
  90 | `,
  91 | 		},
  92 | 		{
  93 | 			name: "switch_case_display",
  94 | 			prql: `
  95 | from tracks
  96 | sort milliseconds
  97 | select display = case [
  98 |     composer != null => composer,
  99 |     genre_id < 17 => 'no composer',
 100 |     true => f'unknown composer'
 101 | ]
 102 | take 10
 103 | `,
 104 | 			wantSQL: `
 105 | WITH table_0 AS (
 106 |   SELECT
 107 |     CASE
 108 |       WHEN composer IS NOT NULL THEN composer
 109 |       WHEN genre_id < 17 THEN 'no composer'
 110 |       ELSE 'unknown composer'
 111 |     END AS display,
 112 |     milliseconds
 113 |   FROM
 114 |     tracks
 115 |   ORDER BY
 116 |     milliseconds
 117 |   LIMIT
 118 |     10
 119 | )
 120 | SELECT
 121 |   display
 122 | FROM
 123 |   table_0
 124 | ORDER BY
 125 |   milliseconds
 126 | `,
 127 | 		},
 128 | 		{
 129 | 			name: "loop_recursive_numbers",
 130 | 			prql: `
 131 | from [{n = 1}]
 132 | select n = n - 2
 133 | loop (filter n < 4 | select n = n + 1)
 134 | select n = n * 2
 135 | sort n
 136 | `,
 137 | 			wantSQL: `
 138 | WITH RECURSIVE table_0 AS (
 139 |   SELECT
 140 |     1 AS n
 141 | ),
 142 | table_1 AS (
 143 |   SELECT
 144 |     n - 2 AS _expr_0
 145 |   FROM
 146 |     table_0
 147 |   UNION ALL
 148 |   SELECT
 149 |     _expr_0 + 1
 150 |   FROM
 151 |     table_1
 152 |   WHERE
 153 |     _expr_0 < 4
 154 | )
 155 | SELECT
 156 |   _expr_0 * 2 AS n
 157 | FROM
 158 |   table_1 AS table_2
 159 | ORDER BY
 160 |   n
 161 | `,
 162 | 		},
 163 | 		{
 164 | 			name: "genre_counts",
 165 | 			prql: `
 166 | let genre_count = (
 167 |     from genres
 168 |     aggregate {a = count name}
 169 | )
 170 | 
 171 | from genre_count
 172 | filter a > 0
 173 | select a = -a
 174 | `,
 175 | 			wantSQL: `
 176 | WITH genre_count AS (
 177 |   SELECT
 178 |     COUNT(*) AS a
 179 |   FROM
 180 |     genres
 181 | )
 182 | SELECT
 183 |   - a AS a
 184 | FROM
 185 |   genre_count
 186 | WHERE
 187 |   a > 0
 188 | `,
 189 | 		},
 190 | 		{
 191 | 			name: "let_binding_simple_cte",
 192 | 			prql: `
 193 | let top_customers = (
 194 |     from invoices
 195 |     aggregate { total = count invoice_id }
 196 | )
 197 | 
 198 | from top_customers
 199 | select total
 200 | `,
 201 | 			wantSQL: `
 202 | WITH top_customers AS (
 203 |   SELECT
 204 |     COUNT(*) AS total
 205 |   FROM
 206 |     invoices
 207 | )
 208 | SELECT
 209 |   total
 210 | FROM
 211 |   top_customers
 212 | `,
 213 | 		},
 214 | 		{
 215 | 			name: "group_sort_basic",
 216 | 			prql: `
 217 | from tracks
 218 | derive d = album_id + 1
 219 | group d (
 220 |     aggregate {
 221 |         n1 = (track_id | sum),
 222 |     }
 223 | )
 224 | sort d
 225 | take 10
 226 | select { d1 = d, n1 }
 227 | `,
 228 | 			wantSQL: `
 229 | WITH table_0 AS (
 230 |   SELECT
 231 |     COALESCE(SUM(track_id), 0) AS n1,
 232 |     album_id + 1 AS _expr_0
 233 |   FROM
 234 |     tracks
 235 |   GROUP BY
 236 |     album_id + 1
 237 | ),
 238 | table_1 AS (
 239 |   SELECT
 240 |     _expr_0 AS d1,
 241 |     n1,
 242 |     _expr_0
 243 |   FROM
 244 |     table_0
 245 |   ORDER BY
 246 |     _expr_0
 247 |   LIMIT
 248 |     10
 249 | )
 250 | SELECT
 251 |   d1,
 252 |   n1
 253 | FROM
 254 |   table_1
 255 | ORDER BY
 256 |   d1
 257 | `,
 258 | 		},
 259 | 		{
 260 | 			name: "append_select_simple_filter",
 261 | 			prql: `
 262 | from invoices
 263 | select { invoice_id, billing_country }
 264 | append (
 265 |   from invoices
 266 |   select { invoice_id = ` + "`invoice_id`" + ` + 100, billing_country }
 267 | )
 268 | filter (billing_country | text.starts_with("I"))
 269 | `,
 270 | 			wantSQL: `
 271 | WITH table_1 AS (
 272 |   SELECT
 273 |     invoice_id,
 274 |     billing_country
 275 |   FROM
 276 |     invoices
 277 |   UNION
 278 |   ALL
 279 |   SELECT
 280 |     invoice_id + 100 AS invoice_id,
 281 |     billing_country
 282 |   FROM
 283 |     invoices
 284 | )
 285 | SELECT
 286 |   invoice_id,
 287 |   billing_country
 288 | FROM
 289 |   table_1
 290 | WHERE
 291 |   billing_country LIKE CONCAT('I', '%')
 292 | `,
 293 | 		},
 294 | 		{
 295 | 			name: "append_select_compute",
 296 | 			prql: `
 297 | from invoices
 298 | derive total = case [total < 10 => total * 2, true => total]
 299 | select { customer_id, invoice_id, total }
 300 | take 5
 301 | append (
 302 |   from invoice_items
 303 |   derive unit_price = case [unit_price < 1 => unit_price * 2, true => unit_price]
 304 |   select { invoice_line_id, invoice_id, unit_price }
 305 |   take 5
 306 | )
 307 | select { a = customer_id * 2, b = math.round 1 (invoice_id * total) }
 308 | `,
 309 | 			wantSQL: `
 310 | WITH table_1 AS (
 311 |   SELECT
 312 |     *
 313 |   FROM
 314 |     (
 315 |       SELECT
 316 |         invoice_id,
 317 |         CASE
 318 |           WHEN total < 10 THEN total * 2
 319 |           ELSE total
 320 |         END AS _expr_0,
 321 |         customer_id
 322 |       FROM
 323 |         invoices
 324 |       LIMIT
 325 |         5
 326 |     ) AS table_3
 327 |   UNION
 328 |   ALL
 329 |   SELECT
 330 |     *
 331 |   FROM
 332 |     (
 333 |       SELECT
 334 |         invoice_id,
 335 |         CASE
 336 |           WHEN unit_price < 1 THEN unit_price * 2
 337 |           ELSE unit_price
 338 |         END AS unit_price,
 339 |         invoice_line_id
 340 |       FROM
 341 |         invoice_items
 342 |       LIMIT
 343 |         5
 344 |     ) AS table_4
 345 | )
 346 | SELECT
 347 |   customer_id * 2 AS a,
 348 |   ROUND(invoice_id * _expr_0, 1) AS b
 349 | FROM
 350 |   table_1
 351 | `,
 352 | 		},
 353 | 		{
 354 | 			name: "take_range_with_sort",
 355 | 			prql: `
 356 | from tracks
 357 | sort {+track_id}
 358 | take 3..5
 359 | `,
 360 | 			wantSQL: `
 361 | SELECT
 362 |   *
 363 | FROM
 364 |   tracks
 365 | ORDER BY
 366 |   track_id
 367 | LIMIT
 368 |   3 OFFSET 2
 369 | `,
 370 | 		},
 371 | 		{
 372 | 			name: "sort_with_join_alias",
 373 | 			prql: `
 374 | from e=employees
 375 | filter first_name != "Mitchell"
 376 | sort {first_name, last_name}
 377 | 
 378 | join manager=employees side:left (e.reports_to == manager.employee_id)
 379 | 
 380 | select {e.first_name, e.last_name, manager.first_name}
 381 | `,
 382 | 			wantSQL: `
 383 | WITH table_0 AS (
 384 |   SELECT
 385 |     first_name,
 386 |     last_name,
 387 |     reports_to
 388 |   FROM
 389 |     employees AS e
 390 |   WHERE
 391 |     first_name <> 'Mitchell'
 392 | )
 393 | SELECT
 394 |   table_0.first_name,
 395 |   table_0.last_name,
 396 |   manager.first_name
 397 | FROM
 398 |   table_0
 399 |   LEFT OUTER JOIN employees AS manager ON table_0.reports_to = manager.employee_id
 400 | ORDER BY
 401 |   table_0.first_name,
 402 |   table_0.last_name
 403 | `,
 404 | 		},
 405 | 		{
 406 | 			name: "sort_alias_filter_join",
 407 | 			prql: `
 408 | from albums
 409 | select { AA=album_id, artist_id }
 410 | sort AA
 411 | filter AA >= 25
 412 | join artists (==artist_id)
 413 | `,
 414 | 			wantSQL: `
 415 | WITH table_1 AS (
 416 |   SELECT
 417 |     album_id AS "AA",
 418 |     artist_id
 419 |   FROM
 420 |     albums
 421 | ),
 422 | table_0 AS (
 423 |   SELECT
 424 |     "AA",
 425 |     artist_id
 426 |   FROM
 427 |     table_1
 428 |   WHERE
 429 |     "AA" >= 25
 430 | )
 431 | SELECT
 432 |   table_0."AA",
 433 |   table_0.artist_id,
 434 |   artists.*
 435 | FROM
 436 |   table_0
 437 |   INNER JOIN artists ON table_0.artist_id = artists.artist_id
 438 | ORDER BY
 439 |   table_0."AA"
 440 | `,
 441 | 		},
 442 | 		{
 443 | 			name: "constants_only",
 444 | 			prql: `
 445 | from genres
 446 | take 10
 447 | filter true
 448 | take 20
 449 | filter true
 450 | select d = 10
 451 | `,
 452 | 			wantSQL: `
 453 | WITH table_1 AS (
 454 |   SELECT
 455 |     NULL
 456 |   FROM
 457 |     genres
 458 |   LIMIT
 459 |     10
 460 | ), table_0 AS (
 461 |   SELECT
 462 |     NULL
 463 |   FROM
 464 |     table_1
 465 |   WHERE
 466 |     true
 467 |   LIMIT
 468 |     20
 469 | )
 470 | SELECT
 471 |   10 AS d
 472 | FROM
 473 |   table_0
 474 | WHERE
 475 |   true
 476 | `,
 477 | 		},
 478 | 		{
 479 | 			name: "append_select_union",
 480 | 			prql: `
 481 | from invoices
 482 | select { customer_id, invoice_id, billing_country }
 483 | take 10..15
 484 | append (
 485 |   from invoices
 486 |   select { customer_id, invoice_id, billing_country }
 487 |   take 40..45
 488 | )
 489 | select { billing_country, invoice_id }
 490 | `,
 491 | 			wantSQL: `
 492 | SELECT
 493 |   *
 494 | FROM
 495 |   (
 496 |     SELECT
 497 |       billing_country,
 498 |       invoice_id
 499 |     FROM
 500 |       invoices
 501 |     LIMIT
 502 |       6 OFFSET 9
 503 |   ) AS table_2
 504 | UNION
 505 | ALL
 506 | SELECT
 507 |   *
 508 | FROM
 509 |   (
 510 |     SELECT
 511 |       billing_country,
 512 |       invoice_id
 513 |     FROM
 514 |       invoices
 515 |     LIMIT
 516 |       6 OFFSET 39
 517 |   ) AS table_3
 518 | `,
 519 | 		},
 520 | 		{
 521 | 			name: "append_select_simple",
 522 | 			prql: `
 523 | from invoices
 524 | select { invoice_id, billing_country }
 525 | append (
 526 |   from invoices
 527 |   select { invoice_id = invoice_id + 100, billing_country }
 528 | )
 529 | filter (billing_country | text.starts_with "I")
 530 | `,
 531 | 			wantSQL: `
 532 | WITH table_1 AS (
 533 |   SELECT
 534 |     invoice_id,
 535 |     billing_country
 536 |   FROM
 537 |     invoices
 538 |   UNION
 539 |   ALL
 540 |   SELECT
 541 |     invoice_id + 100 AS invoice_id,
 542 |     billing_country
 543 |   FROM
 544 |     invoices
 545 | )
 546 | SELECT
 547 |   invoice_id,
 548 |   billing_country
 549 | FROM
 550 |   table_1
 551 | WHERE
 552 |   billing_country LIKE CONCAT('I', '%')
 553 | `,
 554 | 		},
 555 | 		{
 556 | 			name: "append_select_multiple_with_null",
 557 | 			prql: `
 558 | from invoices
 559 | select { customer_id, invoice_id, billing_country }
 560 | take 5
 561 | append (
 562 |   from employees
 563 |   select { employee_id, employee_id, country }
 564 |   take 5
 565 | )
 566 | append (
 567 |   from invoice_items
 568 |   select { invoice_line_id, invoice_id, null }
 569 |   take 5
 570 | )
 571 | select { billing_country, invoice_id }
 572 | `,
 573 | 			wantSQL: `
 574 | SELECT
 575 |   *
 576 | FROM
 577 |   (
 578 |     SELECT
 579 |       billing_country,
 580 |       invoice_id
 581 |     FROM
 582 |       invoices
 583 |     LIMIT
 584 |       5
 585 |   ) AS table_4
 586 | UNION
 587 | ALL
 588 | SELECT
 589 |   *
 590 | FROM
 591 |   (
 592 |     SELECT
 593 |       country,
 594 |       employee_id
 595 |     FROM
 596 |       employees
 597 |     LIMIT
 598 |       5
 599 |   ) AS table_5
 600 | UNION
 601 | ALL
 602 | SELECT
 603 |   *
 604 | FROM
 605 |   (
 606 |     SELECT
 607 |       NULL,
 608 |       invoice_id
 609 |     FROM
 610 |       invoice_items
 611 |     LIMIT
 612 |       5
 613 |   ) AS table_6
 614 | `,
 615 | 		},
 616 | 		{
 617 | 			name: "append_select_nulls",
 618 | 			prql: `
 619 | from invoices
 620 | select {an_id = invoice_id, name = null}
 621 | take 2
 622 | append (
 623 |   from employees
 624 |   select {an_id = null, name = first_name}
 625 |   take 2
 626 | )
 627 | `,
 628 | 			wantSQL: `
 629 | SELECT
 630 |   *
 631 | FROM
 632 |   (
 633 |     SELECT
 634 |       invoice_id AS an_id,
 635 |       NULL AS name
 636 |     FROM
 637 |       invoices
 638 |     LIMIT
 639 |       2
 640 |   ) AS table_2
 641 | UNION
 642 | ALL
 643 | SELECT
 644 |   *
 645 | FROM
 646 |   (
 647 |     SELECT
 648 |       NULL AS an_id,
 649 |       first_name AS name
 650 |     FROM
 651 |       employees
 652 |     LIMIT
 653 |       2
 654 |   ) AS table_3
 655 | `,
 656 | 		},
 657 | 		{
 658 | 			name: "window_functions",
 659 | 			prql: `
 660 | from tracks
 661 | group genre_id (
 662 |   sort milliseconds
 663 |   derive {
 664 |     num = row_number this,
 665 |     total = count this,
 666 |     last_val = last track_id,
 667 |   }
 668 |   take 10
 669 | )
 670 | sort {genre_id, milliseconds}
 671 | select {track_id, genre_id, num, total, last_val}
 672 | filter genre_id >= 22
 673 | `,
 674 | 			wantSQL: `
 675 | WITH table_0 AS (
 676 |   SELECT
 677 |     track_id,
 678 |     genre_id,
 679 |     ROW_NUMBER() OVER (
 680 |       PARTITION BY genre_id
 681 |       ORDER BY
 682 |         milliseconds
 683 |     ) AS num,
 684 |     COUNT(*) OVER (
 685 |       PARTITION BY genre_id
 686 |       ORDER BY
 687 |         milliseconds ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
 688 |     ) AS total,
 689 |     LAST_VALUE(track_id) OVER (
 690 |       PARTITION BY genre_id
 691 |       ORDER BY
 692 |         milliseconds
 693 |     ) AS last_val,
 694 |     milliseconds,
 695 |     ROW_NUMBER() OVER (
 696 |       PARTITION BY genre_id
 697 |       ORDER BY
 698 |         milliseconds
 699 |     ) AS _expr_0
 700 |   FROM
 701 |     tracks
 702 | ),
 703 | table_1 AS (
 704 |   SELECT
 705 |     track_id,
 706 |     genre_id,
 707 |     num,
 708 |     total,
 709 |     last_val,
 710 |     milliseconds
 711 |   FROM
 712 |     table_0
 713 |   WHERE
 714 |     _expr_0 <= 10
 715 |     AND genre_id >= 22
 716 | )
 717 | SELECT
 718 |   track_id,
 719 |   genre_id,
 720 |   num,
 721 |   total,
 722 |   last_val
 723 | FROM
 724 |   table_1
 725 | ORDER BY
 726 |   genre_id,
 727 |   milliseconds
 728 | `,
 729 | 		},
 730 | 		{
 731 | 			name: "stdlib_math_module",
 732 | 			prql: `
 733 | from employees
 734 | select {
 735 |   salary_abs = math.abs salary,
 736 |   salary_floor = math.floor salary,
 737 |   salary_ceil = math.ceil salary,
 738 |   salary_pi = math.pi,
 739 |   salary_exp = math.exp salary,
 740 |   salary_ln = math.ln salary,
 741 |   salary_log10 = math.log10 salary,
 742 |   salary_log = math.log 2 salary,
 743 |   salary_sqrt = math.sqrt salary,
 744 |   salary_degrees = math.degrees salary,
 745 |   salary_radians = math.radians salary,
 746 |   salary_cos = math.cos salary,
 747 |   salary_acos = math.acos salary,
 748 |   salary_sin = math.sin salary,
 749 |   salary_asin = math.asin salary,
 750 |   salary_tan = math.tan salary,
 751 |   salary_atan = math.atan salary,
 752 |   salary_pow = (salary | math.pow 2),
 753 |   salary_pow_op = salary ** 2,
 754 | }
 755 | `,
 756 | 			wantSQL: `
 757 | SELECT
 758 |   ABS(salary) AS salary_abs,
 759 |   FLOOR(salary) AS salary_floor,
 760 |   CEIL(salary) AS salary_ceil,
 761 |   PI() AS salary_pi,
 762 |   EXP(salary) AS salary_exp,
 763 |   LN(salary) AS salary_ln,
 764 |   LOG10(salary) AS salary_log10,
 765 |   LOG10(salary) / LOG10(2) AS salary_log,
 766 |   SQRT(salary) AS salary_sqrt,
 767 |   DEGREES(salary) AS salary_degrees,
 768 |   RADIANS(salary) AS salary_radians,
 769 |   COS(salary) AS salary_cos,
 770 |   ACOS(salary) AS salary_acos,
 771 |   SIN(salary) AS salary_sin,
 772 |   ASIN(salary) AS salary_asin,
 773 |   TAN(salary) AS salary_tan,
 774 |   ATAN(salary) AS salary_atan,
 775 |   POW(salary, 2) AS salary_pow,
 776 |   POW(salary, 2) AS salary_pow_op
 777 | FROM
 778 |   employees
 779 | `,
 780 | 		},
 781 | 		{
 782 | 			name: "text_module_filters",
 783 | 			prql: `
 784 | from albums
 785 | select {
 786 |     title,
 787 |     title_and_spaces = f"  {title}  ",
 788 |     low = (title | text.lower),
 789 |     up = (title | text.upper),
 790 |     ltrimmed = (title | text.ltrim),
 791 |     rtrimmed = (title | text.rtrim),
 792 |     trimmed = (title | text.trim),
 793 |     len = (title | text.length),
 794 |     subs = (title | text.extract 2 5),
 795 |     replace = (title | text.replace "al" "PIKA"),
 796 | }
 797 | sort {title}
 798 | filter (title | text.starts_with "Black") || (title | text.contains "Sabbath") || (title | text.ends_with "os")
 799 | `,
 800 | 			wantSQL: `
 801 | WITH table_0 AS (
 802 |   SELECT
 803 |     title,
 804 |     CONCAT('  ', title, '  ') AS title_and_spaces,
 805 |     LOWER(title) AS low,
 806 |     UPPER(title) AS up,
 807 |     LTRIM(title) AS ltrimmed,
 808 |     RTRIM(title) AS rtrimmed,
 809 |     TRIM(title) AS trimmed,
 810 |     CHAR_LENGTH(title) AS len,
 811 |     SUBSTRING(title, 2, 5) AS subs,
 812 |     REPLACE(title, 'al', 'PIKA') AS "replace"
 813 |   FROM
 814 |     albums
 815 | )
 816 | SELECT
 817 |   title,
 818 |   title_and_spaces,
 819 |   low,
 820 |   up,
 821 |   ltrimmed,
 822 |   rtrimmed,
 823 |   trimmed,
 824 |   len,
 825 |   subs,
 826 |   "replace"
 827 | FROM
 828 |   table_0
 829 | WHERE
 830 |   title LIKE CONCAT('Black', '%')
 831 |   OR title LIKE CONCAT('%', 'Sabbath', '%')
 832 |   OR title LIKE CONCAT('%', 'os')
 833 | ORDER BY
 834 |   title
 835 | `,
 836 | 		},
 837 | 		{
 838 | 			name: "pipelines_filters_sort_take",
 839 | 			prql: `
 840 | from tracks
 841 | 
 842 | filter (name ~= "Love")
 843 | filter ((milliseconds / 1000 / 60) | in 3..4)
 844 | sort track_id
 845 | take 1..15
 846 | select {name, composer}
 847 | `,
 848 | 			wantSQL: `
 849 | WITH table_0 AS (
 850 |   SELECT
 851 |     name,
 852 |     composer,
 853 |     track_id
 854 |   FROM
 855 |     tracks
 856 |   WHERE
 857 |     REGEXP(name, 'Love')
 858 |     AND milliseconds / 1000 / 60 BETWEEN 3 AND 4
 859 |   ORDER BY
 860 |     track_id
 861 |   LIMIT
 862 |     15
 863 | )
 864 | SELECT
 865 |   name,
 866 |   composer
 867 | FROM
 868 |   table_0
 869 | ORDER BY
 870 |   track_id
 871 | `,
 872 | 		},
 873 | 		{
 874 | 			name: "distinct_group_take_one",
 875 | 			prql: `
 876 | from tracks
 877 | select {album_id, genre_id}
 878 | group tracks.* (take 1)
 879 | sort tracks.*
 880 | `,
 881 | 			wantSQL: `
 882 | WITH table_0 AS (
 883 |   SELECT
 884 |     DISTINCT album_id,
 885 |     genre_id
 886 |   FROM
 887 |     tracks
 888 | )
 889 | SELECT
 890 |   album_id,
 891 |   genre_id
 892 | FROM
 893 |   table_0
 894 | ORDER BY
 895 |   album_id,
 896 |   genre_id
 897 | `,
 898 | 		},
 899 | 		{
 900 | 			name: "arithmetic_div_mod",
 901 | 			prql: `
 902 | from [
 903 |     { id = 1, x_int =  13, x_float =  13.0, k_int =  5, k_float =  5.0 },
 904 |     { id = 2, x_int = -13, x_float = -13.0, k_int =  5, k_float =  5.0 },
 905 |     { id = 3, x_int =  13, x_float =  13.0, k_int = -5, k_float = -5.0 },
 906 |     { id = 4, x_int = -13, x_float = -13.0, k_int = -5, k_float = -5.0 },
 907 | ]
 908 | select {
 909 |     id,
 910 | 
 911 |     x_int / k_int,
 912 |     x_int / k_float,
 913 |     x_float / k_int,
 914 |     x_float / k_float,
 915 | 
 916 |     q_ii = x_int // k_int,
 917 |     q_if = x_int // k_float,
 918 |     q_fi = x_float // k_int,
 919 |     q_ff = x_float // k_float,
 920 | 
 921 |     r_ii = x_int % k_int,
 922 |     r_if = x_int % k_float,
 923 |     r_fi = x_float % k_int,
 924 |     r_ff = x_float % k_float,
 925 | 
 926 |     (q_ii * k_int + r_ii | math.round 0),
 927 |     (q_if * k_float + r_if | math.round 0),
 928 |     (q_fi * k_int + r_fi | math.round 0),
 929 |     (q_ff * k_float + r_ff | math.round 0),
 930 | }
 931 | sort id
 932 | `,
 933 | 			wantSQL: `
 934 | WITH table_0 AS (
 935 |   SELECT
 936 |     1 AS id,
 937 |     13 AS x_int,
 938 |     13.0 AS x_float,
 939 |     5 AS k_int,
 940 |     5.0 AS k_float
 941 |   UNION
 942 |   ALL
 943 |   SELECT
 944 |     2 AS id,
 945 |     -13 AS x_int,
 946 |     -13.0 AS x_float,
 947 |     5 AS k_int,
 948 |     5.0 AS k_float
 949 |   UNION
 950 |   ALL
 951 |   SELECT
 952 |     3 AS id,
 953 |     13 AS x_int,
 954 |     13.0 AS x_float,
 955 |     -5 AS k_int,
 956 |     -5.0 AS k_float
 957 |   UNION
 958 |   ALL
 959 |   SELECT
 960 |     4 AS id,
 961 |     -13 AS x_int,
 962 |     -13.0 AS x_float,
 963 |     -5 AS k_int,
 964 |     -5.0 AS k_float
 965 | )
 966 | SELECT
 967 |   id,
 968 |   x_int / k_int,
 969 |   x_int / k_float,
 970 |   x_float / k_int,
 971 |   x_float / k_float,
 972 |   FLOOR(ABS(x_int / k_int)) * SIGN(x_int) * SIGN(k_int) AS q_ii,
 973 |   FLOOR(ABS(x_int / k_float)) * SIGN(x_int) * SIGN(k_float) AS q_if,
 974 |   FLOOR(ABS(x_float / k_int)) * SIGN(x_float) * SIGN(k_int) AS q_fi,
 975 |   FLOOR(ABS(x_float / k_float)) * SIGN(x_float) * SIGN(k_float) AS q_ff,
 976 |   x_int % k_int AS r_ii,
 977 |   x_int % k_float AS r_if,
 978 |   x_float % k_int AS r_fi,
 979 |   x_float % k_float AS r_ff,
 980 |   ROUND(
 981 |     FLOOR(ABS(x_int / k_int)) * SIGN(x_int) * SIGN(k_int) * k_int + x_int % k_int,
 982 |     0
 983 |   ),
 984 |   ROUND(
 985 |     FLOOR(ABS(x_int / k_float)) * SIGN(x_int) * SIGN(k_float) * k_float + x_int % k_float,
 986 |     0
 987 |   ),
 988 |   ROUND(
 989 |     FLOOR(ABS(x_float / k_int)) * SIGN(x_float) * SIGN(k_int) * k_int + x_float % k_int,
 990 |     0
 991 |   ),
 992 |   ROUND(
 993 |     FLOOR(ABS(x_float / k_float)) * SIGN(x_float) * SIGN(k_float) * k_float + x_float % k_float,
 994 |     0
 995 |   )
 996 | FROM
 997 |   table_0
 998 | ORDER BY
 999 |   id
1000 | `,
1001 | 		},
1002 | 		{
1003 | 			name: "set_ops_remove",
1004 | 			prql: `
1005 | let distinct = rel -> (from t = _param.rel | group {t.*} (take 1))
1006 | 
1007 | from_text format:json '{ "columns": ["a"], "data": [[1], [2], [2], [3]] }'
1008 | distinct
1009 | remove (from_text format:json '{ "columns": ["a"], "data": [[1], [2]] }')
1010 | sort a
1011 | `,
1012 | 			wantSQL: `
1013 | WITH table_0 AS (
1014 |   SELECT
1015 |     1 AS a
1016 |   UNION
1017 |   ALL
1018 |   SELECT
1019 |     2 AS a
1020 |   UNION
1021 |   ALL
1022 |   SELECT
1023 |     2 AS a
1024 |   UNION
1025 |   ALL
1026 |   SELECT
1027 |     3 AS a
1028 | ),
1029 | table_1 AS (
1030 |   SELECT
1031 |     1 AS a
1032 |   UNION
1033 |   ALL
1034 |   SELECT
1035 |     2 AS a
1036 | ),
1037 | table_2 AS (
1038 |   SELECT
1039 |     a
1040 |   FROM
1041 |     table_0
1042 |   EXCEPT
1043 |     DISTINCT
1044 |   SELECT
1045 |     *
1046 |   FROM
1047 |     table_1
1048 | )
1049 | SELECT
1050 |   a
1051 | FROM
1052 |   table_2
1053 | ORDER BY
1054 |   a
1055 | `,
1056 | 		},
1057 | 		{
1058 | 			name: "group_sort_derive_select_join",
1059 | 			prql: `
1060 | s"SELECT album_id,title,artist_id FROM albums"
1061 | group {artist_id} (aggregate { album_title_count = count this.` + "`title`" + `})
1062 | sort {this.artist_id, this.album_title_count}
1063 | derive {new_album_count = this.album_title_count}
1064 | select {this.artist_id, this.new_album_count}
1065 | join side:left ( s"SELECT artist_id,name as artist_name FROM artists" ) (this.artist_id == that.artist_id)
1066 | `,
1067 | 			wantSQL: `
1068 | WITH table_0 AS (
1069 |   SELECT
1070 |     album_id,
1071 |     title,
1072 |     artist_id
1073 |   FROM
1074 |     albums
1075 | ),
1076 | table_4 AS (
1077 |   SELECT
1078 |     artist_id,
1079 |     COUNT(*) AS _expr_0
1080 |   FROM
1081 |     table_0
1082 |   GROUP BY
1083 |     artist_id
1084 | ),
1085 | table_2 AS (
1086 |   SELECT
1087 |     artist_id,
1088 |     _expr_0 AS new_album_count,
1089 |     _expr_0
1090 |   FROM
1091 |     table_4
1092 | ),
1093 | table_1 AS (
1094 |   SELECT
1095 |     artist_id,
1096 |     name as artist_name
1097 |   FROM
1098 |     artists
1099 | ),
1100 | table_3 AS (
1101 |   SELECT
1102 |     table_2.artist_id,
1103 |     table_2.new_album_count,
1104 |     table_1.artist_id AS _expr_1,
1105 |     table_1.artist_name,
1106 |     table_2._expr_0
1107 |   FROM
1108 |     table_2
1109 |     LEFT OUTER JOIN table_1 ON table_2.artist_id = table_1.artist_id
1110 | )
1111 | SELECT
1112 |   artist_id,
1113 |   new_album_count,
1114 |   _expr_1,
1115 |   artist_name
1116 | FROM
1117 |   table_3
1118 |  ORDER BY
1119 |   artist_id,
1120 |   new_album_count
1121 | `,
1122 | 		},
1123 | 		{
1124 | 			name: "cast_projection",
1125 | 			prql: `
1126 | from tracks
1127 | sort {-bytes}
1128 | select {
1129 |     name,
1130 |     bin = ((album_id | as REAL) * 99)
1131 | }
1132 | take 20
1133 | `,
1134 | 			wantSQL: `
1135 | WITH table_0 AS (
1136 |   SELECT
1137 |     name,
1138 |     CAST(album_id AS REAL) * 99 AS bin,
1139 |     bytes
1140 |   FROM
1141 |     tracks
1142 |   ORDER BY
1143 |     bytes DESC
1144 |   LIMIT
1145 |     20
1146 | )
1147 | SELECT
1148 |   name,
1149 |   bin
1150 | FROM
1151 |   table_0
1152 | ORDER BY
1153 |   bytes DESC
1154 | `,
1155 | 		},
1156 | 		{
1157 | 			name: "distinct_on_group_sort_take",
1158 | 			prql: `
1159 | from tracks
1160 | select {genre_id, media_type_id, album_id}
1161 | group {genre_id, media_type_id} (sort {-album_id} | take 1)
1162 | sort {-genre_id, media_type_id}
1163 | `,
1164 | 			wantSQL: `
1165 | WITH table_0 AS (
1166 |   SELECT
1167 |     genre_id,
1168 |     media_type_id,
1169 |     album_id,
1170 |     ROW_NUMBER() OVER (
1171 |       PARTITION BY genre_id,
1172 |       media_type_id
1173 |       ORDER BY
1174 |         album_id DESC
1175 |     ) AS _expr_0
1176 |   FROM
1177 |     tracks
1178 | )
1179 | SELECT
1180 |   genre_id,
1181 |   media_type_id,
1182 |   album_id
1183 | FROM
1184 |   table_0
1185 | WHERE
1186 |   _expr_0 <= 1
1187 | ORDER BY
1188 |   genre_id DESC,
1189 |   media_type_id
1190 | `,
1191 | 		},
1192 | 		{
1193 | 			name: "group_sort_limit_take_join",
1194 | 			prql: `
1195 | from tracks
1196 | select {genre_id,milliseconds}
1197 | group {genre_id} (
1198 |   sort {-milliseconds}
1199 |   take 3
1200 | )
1201 | join genres (==genre_id)
1202 | select {name, milliseconds}
1203 | sort {+name,-milliseconds}
1204 | `,
1205 | 			wantSQL: `
1206 | WITH table_1 AS (
1207 |   SELECT
1208 |     milliseconds,
1209 |     genre_id,
1210 |     ROW_NUMBER() OVER (
1211 |       PARTITION BY genre_id
1212 |       ORDER BY
1213 |         milliseconds DESC
1214 |     ) AS _expr_0
1215 |   FROM
1216 |     tracks
1217 | ),
1218 | table_0 AS (
1219 |   SELECT
1220 |     milliseconds,
1221 |     genre_id
1222 |   FROM
1223 |     table_1
1224 |   WHERE
1225 |     _expr_0 <= 3
1226 | )
1227 | SELECT
1228 |   genres.name,
1229 |   table_0.milliseconds
1230 | FROM
1231 |   table_0
1232 |   INNER JOIN genres ON table_0.genre_id = genres.genre_id
1233 | ORDER BY
1234 |   genres.name,
1235 |   table_0.milliseconds DESC
1236 | `,
1237 | 		},
1238 | 		{
1239 | 			name: "group_sort_filter_derive_select_join",
1240 | 			prql: `
1241 | s"SELECT album_id,title,artist_id FROM albums"
1242 | group {artist_id} (aggregate { album_title_count = count this.` + "`title`" + `})
1243 | sort {this.artist_id, this.album_title_count}
1244 | filter (this.album_title_count) > 10
1245 | derive {new_album_count = this.album_title_count}
1246 | select {this.artist_id, this.new_album_count}
1247 | join side:left ( s"SELECT artist_id,name as artist_name FROM artists" ) (this.artist_id == that.artist_id)
1248 | `,
1249 | 			wantSQL: `
1250 | WITH table_0 AS (
1251 |   SELECT
1252 |     album_id,
1253 |     title,
1254 |     artist_id
1255 |   FROM
1256 |     albums
1257 | ),
1258 | table_3 AS (
1259 |   SELECT
1260 |     artist_id,
1261 |     COUNT(*) AS _expr_0
1262 |   FROM
1263 |     table_0
1264 |   GROUP BY
1265 |     artist_id
1266 | ),
1267 | table_4 AS (
1268 |   SELECT
1269 |     artist_id,
1270 |     _expr_0 AS new_album_count,
1271 |     _expr_0
1272 |   FROM
1273 |     table_3
1274 |   WHERE
1275 |     _expr_0 > 10
1276 | ),
1277 | table_2 AS (
1278 |   SELECT
1279 |     artist_id,
1280 |     new_album_count,
1281 |     _expr_0
1282 |   FROM
1283 |     table_4
1284 | ),
1285 | table_1 AS (
1286 |   SELECT
1287 |     artist_id,
1288 |     name as artist_name
1289 |   FROM
1290 |     artists
1291 | )
1292 | SELECT
1293 |   table_2.artist_id,
1294 |   table_2.new_album_count,
1295 |   table_1.artist_id,
1296 |   table_1.artist_name
1297 | FROM
1298 |   table_2
1299 |   LEFT OUTER JOIN table_1 ON table_2.artist_id = table_1.artist_id
1300 | ORDER BY
1301 |   table_2.artist_id,
1302 |   table_2.new_album_count
1303 | `,
1304 | 		},
1305 | 		{
1306 | 			name: "invoice_totals_window_join",
1307 | 			prql: `
1308 | from i=invoices
1309 | join ii=invoice_items (==invoice_id)
1310 | derive {
1311 |     city = i.billing_city,
1312 |     street = i.billing_address,
1313 | }
1314 | group {city, street} (
1315 |     derive total = ii.unit_price * ii.quantity
1316 |     aggregate {
1317 |         num_orders = count_distinct i.invoice_id,
1318 |         num_tracks = sum ii.quantity,
1319 |         total_price = sum total,
1320 |     }
1321 | )
1322 | group {city} (
1323 |     sort street
1324 |     window expanding:true (
1325 |         derive {running_total_num_tracks = sum num_tracks}
1326 |     )
1327 | )
1328 | sort {city, street}
1329 | derive {num_tracks_last_week = lag 7 num_tracks}
1330 | select {
1331 |     city,
1332 |     street,
1333 |     num_orders,
1334 |     num_tracks,
1335 |     running_total_num_tracks,
1336 |     num_tracks_last_week
1337 | }
1338 | take 20
1339 | `,
1340 | 			wantSQL: `
1341 | WITH table_0 AS (
1342 |   SELECT
1343 |     i.billing_city AS city,
1344 |     i.billing_address AS street,
1345 |     COUNT(DISTINCT i.invoice_id) AS num_orders,
1346 |     COALESCE(SUM(ii.quantity), 0) AS num_tracks
1347 |   FROM
1348 |     invoices AS i
1349 |     INNER JOIN invoice_items AS ii ON i.invoice_id = ii.invoice_id
1350 |   GROUP BY
1351 |     i.billing_city,
1352 |     i.billing_address
1353 | )
1354 | SELECT
1355 |   city,
1356 |   street,
1357 |   num_orders,
1358 |   num_tracks,
1359 |   SUM(num_tracks) OVER (
1360 |     PARTITION BY city
1361 |     ORDER BY
1362 |       street ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
1363 |   ) AS running_total_num_tracks,
1364 |   LAG(num_tracks, 7) OVER (
1365 |     ORDER BY
1366 |       city,
1367 |       street
1368 |   ) AS num_tracks_last_week
1369 | FROM
1370 |   table_0
1371 | ORDER BY
1372 |   city,
1373 |   street
1374 | LIMIT
1375 |   20
1376 | `,
1377 | 		},
1378 | 		{
1379 | 			name: "group_all_join_aggregate",
1380 | 			prql: `
1381 | from a=albums
1382 | take 10
1383 | join tracks (==album_id)
1384 | group {a.album_id, a.title} (
1385 |   aggregate price = (sum tracks.unit_price | math.round 2)
1386 | )
1387 | sort album_id
1388 | `,
1389 | 			wantSQL: `
1390 | WITH table_0 AS (
1391 |   SELECT
1392 |     album_id,
1393 |     title
1394 |   FROM
1395 |     albums AS a
1396 |   LIMIT
1397 |     10
1398 | )
1399 | SELECT
1400 |   table_0.album_id,
1401 |   table_0.title,
1402 |   ROUND(COALESCE(SUM(tracks.unit_price), 0), 2) AS price
1403 | FROM
1404 |   table_0
1405 |   INNER JOIN tracks ON table_0.album_id = tracks.album_id
1406 | GROUP BY
1407 |   table_0.album_id,
1408 |   table_0.title
1409 | ORDER BY
1410 |   table_0.album_id
1411 | `,
1412 | 		},
1413 | 		{
1414 | 			name: "read_csv_sort",
1415 | 			prql: `
1416 | from (read_csv "data_file_root/media_types.csv")
1417 | sort media_type_id
1418 | `,
1419 | 			wantSQL: `
1420 | WITH table_0 AS (
1421 |   SELECT
1422 |     *
1423 |   FROM
1424 |     read_csv('data_file_root/media_types.csv')
1425 | )
1426 | SELECT
1427 |   *
1428 | FROM
1429 |   table_0
1430 | ORDER BY
1431 |   media_type_id
1432 | `,
1433 | 		},
1434 | 		{
1435 | 			name: "sort_preserved_through_join",
1436 | 			prql: `
1437 | from e=employees
1438 | filter first_name != "Mitchell"
1439 | sort {first_name, last_name}
1440 | join manager=employees side:left (e.reports_to == manager.employee_id)
1441 | select {e.first_name, e.last_name, manager.first_name}
1442 | `,
1443 | 			wantSQL: `
1444 | WITH table_0 AS (
1445 |   SELECT
1446 |     first_name,
1447 |     last_name,
1448 |     reports_to
1449 |   FROM
1450 |     employees AS e
1451 |   WHERE
1452 |     first_name <> 'Mitchell'
1453 | )
1454 | SELECT
1455 |   table_0.first_name,
1456 |   table_0.last_name,
1457 |   manager.first_name
1458 | FROM
1459 |   table_0
1460 |   LEFT OUTER JOIN employees AS manager ON table_0.reports_to = manager.employee_id
1461 | ORDER BY
1462 |   table_0.first_name,
1463 |   table_0.last_name
1464 | `,
1465 | 		},
1466 | 		{
1467 | 			name: "sort_alias_join",
1468 | 			prql: `
1469 | from albums
1470 | select { AA=album_id, artist_id }
1471 | sort AA
1472 | filter AA >= 25
1473 | join artists (==artist_id)
1474 | `,
1475 | 			wantSQL: `
1476 | WITH table_1 AS (
1477 |   SELECT
1478 |     album_id AS "AA",
1479 |     artist_id
1480 |   FROM
1481 |     albums
1482 | ),
1483 | table_0 AS (
1484 |   SELECT
1485 |     "AA",
1486 |     artist_id
1487 |   FROM
1488 |     table_1
1489 |   WHERE
1490 |     "AA" >= 25
1491 | )
1492 | SELECT
1493 |   table_0."AA",
1494 |   table_0.artist_id,
1495 |   artists.*
1496 | FROM
1497 |   table_0
1498 |   INNER JOIN artists ON table_0.artist_id = artists.artist_id
1499 | ORDER BY
1500 |   table_0."AA"
1501 | `,
1502 | 		},
1503 | 		{
1504 | 			name: "sort_alias_inline_sources",
1505 | 			prql: `
1506 | from [{track_id=0, album_id=1, genre_id=2}]
1507 | select { AA=track_id, album_id, genre_id }
1508 | sort AA
1509 | join side:left [{album_id=1, album_title="Songs"}] (==album_id)
1510 | select { AA, AT = album_title ?? "unknown", genre_id }
1511 | filter AA < 25
1512 | join side:left [{genre_id=1, genre_title="Rock"}] (==genre_id)
1513 | select { AA, AT, GT = genre_title ?? "unknown" }
1514 | `,
1515 | 			wantSQL: `
1516 | WITH table_0 AS (
1517 |   SELECT
1518 |     0 AS track_id,
1519 |     1 AS album_id,
1520 |     2 AS genre_id
1521 | ),
1522 | table_5 AS (
1523 |   SELECT
1524 |     track_id AS "AA",
1525 |     genre_id,
1526 |     album_id
1527 |   FROM
1528 |     table_0
1529 | ),
1530 | table_1 AS (
1531 |   SELECT
1532 |     1 AS album_id,
1533 |     'Songs' AS album_title
1534 | ),
1535 | table_4 AS (
1536 |   SELECT
1537 |     table_5."AA",
1538 |     COALESCE(table_1.album_title, 'unknown') AS "AT",
1539 |     table_5.genre_id
1540 |   FROM
1541 |     table_5
1542 |     LEFT OUTER JOIN table_1 ON table_5.album_id = table_1.album_id
1543 | ),
1544 | table_3 AS (
1545 |   SELECT
1546 |     "AA",
1547 |     "AT",
1548 |     genre_id
1549 |   FROM
1550 |     table_4
1551 |   WHERE
1552 |     "AA" < 25
1553 | ),
1554 | table_2 AS (
1555 |   SELECT
1556 |     1 AS genre_id,
1557 |     'Rock' AS genre_title
1558 | )
1559 | SELECT
1560 |   table_3."AA",
1561 |   table_3."AT",
1562 |   COALESCE(table_2.genre_title, 'unknown') AS "GT"
1563 | FROM
1564 |   table_3
1565 |   LEFT OUTER JOIN table_2 ON table_3.genre_id = table_2.genre_id
1566 | ORDER BY
1567 |   table_3."AA"
1568 | `,
1569 | 		},
1570 | 	}
1571 | 
1572 | 	for _, tc := range cases {
1573 | 		tc := tc
1574 | 		t.Run(tc.name, func(t *testing.T) {
1575 | 			sql, err := Compile(tc.prql)
1576 | 			if err != nil {
1577 | 				t.Fatalf("Compile returned error: %v", err)
1578 | 			}
1579 | 			if got, want := normalize(sql), normalize(tc.wantSQL); got != want {
1580 | 				t.Fatalf("SQL mismatch for %s:\nwant:\n%s\n\ngot:\n%s", tc.name, want, got)
1581 | 			}
1582 | 		})
1583 | 	}
1584 | }
1585 | 
1586 | func normalize(s string) string {
1587 | 	return strings.Join(strings.Fields(strings.TrimSpace(s)), "")
1588 | }
1589 | 


--------------------------------------------------------------------------------
/internal/parser/parser.go:
--------------------------------------------------------------------------------
   1 | package parser
   2 | 
   3 | import (
   4 | 	"fmt"
   5 | 	"strings"
   6 | 
   7 | 	"github.com/maxpert/gophrql/ast"
   8 | )
   9 | 
  10 | // Parse converts PRQL source into an AST.Query.
  11 | func Parse(src string) (*ast.Query, error) {
  12 | 	tokens, err := Lex(src)
  13 | 	if err != nil {
  14 | 		return nil, err
  15 | 	}
  16 | 	p := &Parser{tokens: tokens}
  17 | 	return p.parseQuery()
  18 | }
  19 | 
  20 | type Parser struct {
  21 | 	tokens     []Token
  22 | 	pos        int
  23 | 	stopAtPipe bool
  24 | }
  25 | 
  26 | func (p *Parser) parseQuery() (*ast.Query, error) {
  27 | 	p.skipNewlines()
  28 | 	var target string
  29 | 	var bindings []ast.Binding
  30 | 	for {
  31 | 		p.skipNewlines()
  32 | 		if !p.peekIs(IDENT) {
  33 | 			break
  34 | 		}
  35 | 		switch p.peek().Lit {
  36 | 		case "target":
  37 | 			p.next()
  38 | 			if target != "" {
  39 | 				return nil, fmt.Errorf("target already specified")
  40 | 			}
  41 | 			val, err := p.parseTargetValue()
  42 | 			if err != nil {
  43 | 				return nil, err
  44 | 			}
  45 | 			target = val
  46 | 			p.skipToLineEnd()
  47 | 		case "let":
  48 | 			p.next()
  49 | 			binding, ok, err := p.parseLetBinding()
  50 | 			if err != nil {
  51 | 				return nil, err
  52 | 			}
  53 | 			if ok {
  54 | 				bindings = append(bindings, binding)
  55 | 			}
  56 | 			p.skipNewlines()
  57 | 		default:
  58 | 			goto beginQuery
  59 | 		}
  60 | 	}
  61 | 
  62 | beginQuery:
  63 | 	p.skipNewlines()
  64 | 	if p.peekIs(IDENT) && p.peek().Lit == "from" {
  65 | 		p.next()
  66 | 	} else if p.peekIs(IDENT) && (p.peek().Lit == "from_text" || p.peek().Lit == "s") {
  67 | 		// handled in parseSource
  68 | 	} else {
  69 | 		return nil, fmt.Errorf("query must start with 'from'")
  70 | 	}
  71 | 	source, err := p.parseSource()
  72 | 	if err != nil {
  73 | 		return nil, err
  74 | 	}
  75 | 
  76 | 	var steps []ast.Step
  77 | 	for !p.peekIs(EOF) {
  78 | 		p.skipNewlines()
  79 | 		if p.peekIs(EOF) {
  80 | 			break
  81 | 		}
  82 | 
  83 | 		switch tok := p.peek(); tok.Typ {
  84 | 		case IDENT:
  85 | 			switch tok.Lit {
  86 | 			case "filter":
  87 | 				p.next()
  88 | 				step, err := p.parseFilter()
  89 | 				if err != nil {
  90 | 					return nil, err
  91 | 				}
  92 | 				steps = append(steps, step)
  93 | 			case "derive":
  94 | 				p.next()
  95 | 				step, err := p.parseDerive()
  96 | 				if err != nil {
  97 | 					return nil, err
  98 | 				}
  99 | 				steps = append(steps, step)
 100 | 			case "select":
 101 | 				p.next()
 102 | 				step, err := p.parseSelect()
 103 | 				if err != nil {
 104 | 					return nil, err
 105 | 				}
 106 | 				steps = append(steps, step)
 107 | 			case "aggregate":
 108 | 				p.next()
 109 | 				step, err := p.parseAggregate()
 110 | 				if err != nil {
 111 | 					return nil, err
 112 | 				}
 113 | 				steps = append(steps, step)
 114 | 			case "window":
 115 | 				// Skip window block for now (handled downstream).
 116 | 				p.next()
 117 | 				p.skipNewlines()
 118 | 				if p.peekIs(LPAREN) {
 119 | 					p.next()
 120 | 					p.collectUntilMatching(RPAREN)
 121 | 				}
 122 | 			case "take":
 123 | 				p.next()
 124 | 				step, err := p.parseTake()
 125 | 				if err != nil {
 126 | 					return nil, err
 127 | 				}
 128 | 				steps = append(steps, step)
 129 | 			case "append":
 130 | 				p.next()
 131 | 				step, err := p.parseAppend()
 132 | 				if err != nil {
 133 | 					return nil, err
 134 | 				}
 135 | 				steps = append(steps, step)
 136 | 			case "remove":
 137 | 				p.next()
 138 | 				step, err := p.parseRemove()
 139 | 				if err != nil {
 140 | 					return nil, err
 141 | 				}
 142 | 				steps = append(steps, step)
 143 | 			case "group":
 144 | 				p.next()
 145 | 				step, err := p.parseGroup()
 146 | 				if err != nil {
 147 | 					return nil, err
 148 | 				}
 149 | 				steps = append(steps, step)
 150 | 			case "loop":
 151 | 				p.next()
 152 | 				step, err := p.parseLoop()
 153 | 				if err != nil {
 154 | 					return nil, err
 155 | 				}
 156 | 				steps = append(steps, step)
 157 | 			case "join":
 158 | 				p.next()
 159 | 				step, err := p.parseJoin()
 160 | 				if err != nil {
 161 | 					return nil, err
 162 | 				}
 163 | 				steps = append(steps, step)
 164 | 			case "distinct":
 165 | 				p.next()
 166 | 				steps = append(steps, &ast.DistinctStep{})
 167 | 			case "sort":
 168 | 				p.next()
 169 | 				step, err := p.parseSort()
 170 | 				if err != nil {
 171 | 					return nil, err
 172 | 				}
 173 | 				steps = append(steps, step)
 174 | 			default:
 175 | 				return nil, fmt.Errorf("unexpected token %q", tok.Lit)
 176 | 			}
 177 | 		case NEWLINE:
 178 | 			p.next()
 179 | 		default:
 180 | 			return nil, fmt.Errorf("unexpected token %v at pos %d", tok, p.pos)
 181 | 		}
 182 | 	}
 183 | 
 184 | 	return &ast.Query{
 185 | 		From:     source,
 186 | 		Steps:    steps,
 187 | 		Target:   target,
 188 | 		Bindings: bindings,
 189 | 	}, nil
 190 | }
 191 | 
 192 | func (p *Parser) parseTargetValue() (string, error) {
 193 | 	p.skipNewlines()
 194 | 	if !p.peekIs(IDENT) {
 195 | 		return "", fmt.Errorf("expected identifier after target")
 196 | 	}
 197 | 	var parts []string
 198 | 	parts = append(parts, p.next().Lit)
 199 | 	for p.peekIs(DOT) {
 200 | 		p.next()
 201 | 		if !p.peekIs(IDENT) {
 202 | 			return "", fmt.Errorf("expected identifier after '.' in target")
 203 | 		}
 204 | 		parts = append(parts, p.next().Lit)
 205 | 	}
 206 | 	return strings.ToLower(strings.Join(parts, ".")), nil
 207 | }
 208 | 
 209 | func (p *Parser) parseLetBinding() (ast.Binding, bool, error) {
 210 | 	p.skipNewlines()
 211 | 	if !p.peekIs(IDENT) {
 212 | 		return ast.Binding{}, false, fmt.Errorf("expected identifier after let")
 213 | 	}
 214 | 	name := p.next().Lit
 215 | 	if !p.peekIs(EQUAL) {
 216 | 		return ast.Binding{}, false, fmt.Errorf("expected '=' in let binding")
 217 | 	}
 218 | 	p.next()
 219 | 	p.skipNewlines()
 220 | 	if !p.peekIs(LPAREN) {
 221 | 		p.skipLetRemainder()
 222 | 		return ast.Binding{}, false, nil
 223 | 	}
 224 | 	p.next()
 225 | 	p.skipNewlines()
 226 | 	if !p.peekIs(IDENT) {
 227 | 		p.collectUntilMatching(RPAREN)
 228 | 		return ast.Binding{}, false, nil
 229 | 	}
 230 | 	head := p.peek().Lit
 231 | 	if head != "from" && head != "from_text" && !strings.HasPrefix(head, "s\"") {
 232 | 		p.collectUntilMatching(RPAREN)
 233 | 		return ast.Binding{}, false, nil
 234 | 	}
 235 | 	subTokens := p.collectUntilMatching(RPAREN)
 236 | 	subParser := &Parser{tokens: subTokens}
 237 | 	subQuery, err := subParser.parseQuery()
 238 | 	if err != nil {
 239 | 		return ast.Binding{}, false, err
 240 | 	}
 241 | 	return ast.Binding{Name: name, Query: subQuery}, true, nil
 242 | }
 243 | 
 244 | func (p *Parser) skipLetRemainder() {
 245 | 	depth := 0
 246 | 	for !p.peekIs(EOF) {
 247 | 		tok := p.next()
 248 | 		switch tok.Typ {
 249 | 		case LPAREN, LBRACE, LBRACKET:
 250 | 			depth++
 251 | 		case RPAREN, RBRACE, RBRACKET:
 252 | 			if depth > 0 {
 253 | 				depth--
 254 | 			}
 255 | 		case NEWLINE:
 256 | 			if depth == 0 {
 257 | 				return
 258 | 			}
 259 | 		}
 260 | 	}
 261 | }
 262 | 
 263 | func (p *Parser) parseSource() (ast.Source, error) {
 264 | 	p.skipNewlines()
 265 | 	if inline, ok, err := p.parseInlineRowsSource(); ok || err != nil {
 266 | 		return inline, err
 267 | 	}
 268 | 	if p.peekIs(LPAREN) {
 269 | 		p.next()
 270 | 		expr, err := p.parseExpr(0)
 271 | 		if err != nil {
 272 | 			return ast.Source{}, err
 273 | 		}
 274 | 		if !p.peekIs(RPAREN) {
 275 | 			return ast.Source{}, fmt.Errorf("expected ) after inline source")
 276 | 		}
 277 | 		p.next()
 278 | 		if call, ok := expr.(*ast.Call); ok {
 279 | 			if name := exprToIdent(call.Func); name == "read_csv" && len(call.Args) == 1 {
 280 | 				if lit, ok := call.Args[0].(*ast.StringLit); ok {
 281 | 					path := strings.ReplaceAll(lit.Value, "'", "''")
 282 | 					table := fmt.Sprintf("SELECT\n    *\n  FROM\n    read_csv('%s')", path)
 283 | 					return ast.Source{Table: table}, nil
 284 | 				}
 285 | 			}
 286 | 		}
 287 | 		return ast.Source{}, fmt.Errorf("unsupported inline source")
 288 | 	}
 289 | 
 290 | 	tok := p.next()
 291 | 	if tok.Typ == IDENT && strings.HasPrefix(tok.Lit, "from_text") {
 292 | 		// from_text format:json '...'
 293 | 		if !p.peekIs(IDENT) {
 294 | 			return ast.Source{}, fmt.Errorf("from_text expects format")
 295 | 		}
 296 | 		format := p.next().Lit
 297 | 		if !strings.Contains(strings.ToLower(format), "json") {
 298 | 			return ast.Source{}, fmt.Errorf("from_text only supports json in this stub")
 299 | 		}
 300 | 		if !p.peekIs(STRING) {
 301 | 			return ast.Source{}, fmt.Errorf("from_text expects string literal")
 302 | 		}
 303 | 		raw := p.next().Lit
 304 | 		rows, err := parseJSONTable(raw)
 305 | 		if err != nil {
 306 | 			return ast.Source{}, err
 307 | 		}
 308 | 		return ast.Source{Rows: rows}, nil
 309 | 	}
 310 | 	if tok.Typ == IDENT && tok.Lit == "s" && p.peekIs(STRING) {
 311 | 		sql := p.next().Lit
 312 | 		return ast.Source{Table: sql}, nil
 313 | 	}
 314 | 	if tok.Typ == IDENT && strings.HasPrefix(tok.Lit, "s\"") {
 315 | 		inner := strings.Trim(tok.Lit, "s\"")
 316 | 		return ast.Source{Table: "SELECT " + strings.TrimPrefix(inner, "SELECT ")}, nil
 317 | 	}
 318 | 	if tok.Typ != IDENT {
 319 | 		return ast.Source{}, fmt.Errorf("expected source after from, got %v", tok)
 320 | 	}
 321 | 	if p.peekIs(EQUAL) {
 322 | 		alias := tok.Lit
 323 | 		p.next()
 324 | 		srcTok := p.next()
 325 | 		table := srcTok.Lit
 326 | 		if srcTok.Typ == IDENT && strings.HasPrefix(srcTok.Lit, "s\"") {
 327 | 			inner := strings.Trim(srcTok.Lit, "s\"")
 328 | 			table = "SELECT " + strings.TrimPrefix(inner, "SELECT ")
 329 | 		}
 330 | 		return ast.Source{Table: fmt.Sprintf("%s AS %s", table, alias)}, nil
 331 | 	}
 332 | 	return ast.Source{Table: tok.Lit}, nil
 333 | }
 334 | 
 335 | func (p *Parser) parseInlineRowsSource() (ast.Source, bool, error) {
 336 | 	p.skipNewlines()
 337 | 	if !p.peekIs(LBRACKET) {
 338 | 		return ast.Source{}, false, nil
 339 | 	}
 340 | 	p.next()
 341 | 	var rows []ast.InlineRow
 342 | 	for {
 343 | 		p.skipNewlines()
 344 | 		if p.peekIs(RBRACE) {
 345 | 			p.next()
 346 | 			continue
 347 | 		}
 348 | 		if p.peekIs(RBRACKET) {
 349 | 			p.next()
 350 | 			break
 351 | 		}
 352 | 		if !p.peekIs(LBRACE) {
 353 | 			return ast.Source{}, false, fmt.Errorf("expected { in inline rows")
 354 | 		}
 355 | 		p.next()
 356 | 		rec, err := p.parseRecord()
 357 | 		if err != nil {
 358 | 			return ast.Source{}, false, err
 359 | 		}
 360 | 		rows = append(rows, rec)
 361 | 		p.skipNewlines()
 362 | 		if p.peekIs(COMMA) {
 363 | 			p.next()
 364 | 		}
 365 | 		p.skipNewlines()
 366 | 		if p.peekIs(RBRACKET) {
 367 | 			p.next()
 368 | 			break
 369 | 		}
 370 | 	}
 371 | 	return ast.Source{Rows: rows}, true, nil
 372 | }
 373 | 
 374 | func (p *Parser) parseLoop() (ast.Step, error) {
 375 | 	p.skipNewlines()
 376 | 	hasParens := false
 377 | 	if p.peekIs(LPAREN) {
 378 | 		hasParens = true
 379 | 		p.next()
 380 | 	}
 381 | 	prev := p.stopAtPipe
 382 | 	p.stopAtPipe = true
 383 | 	defer func() { p.stopAtPipe = prev }()
 384 | 	var steps []ast.Step
 385 | 	for {
 386 | 		p.skipNewlines()
 387 | 		if hasParens {
 388 | 			if p.peekIs(RPAREN) {
 389 | 				p.next()
 390 | 				break
 391 | 			}
 392 | 			if p.peekIs(EOF) {
 393 | 				return nil, fmt.Errorf("unterminated loop body")
 394 | 			}
 395 | 		} else if p.peekIs(EOF) {
 396 | 			break
 397 | 		}
 398 | 		if !p.peekIs(IDENT) {
 399 | 			return nil, fmt.Errorf("unexpected token %v in loop", p.peek())
 400 | 		}
 401 | 		switch p.peek().Lit {
 402 | 		case "filter":
 403 | 			p.next()
 404 | 			step, err := p.parseFilter()
 405 | 			if err != nil {
 406 | 				return nil, err
 407 | 			}
 408 | 			steps = append(steps, step)
 409 | 		case "derive":
 410 | 			p.next()
 411 | 			step, err := p.parseDerive()
 412 | 			if err != nil {
 413 | 				return nil, err
 414 | 			}
 415 | 			steps = append(steps, step)
 416 | 		case "select":
 417 | 			p.next()
 418 | 			step, err := p.parseSelect()
 419 | 			if err != nil {
 420 | 				return nil, err
 421 | 			}
 422 | 			steps = append(steps, step)
 423 | 		case "sort":
 424 | 			p.next()
 425 | 			step, err := p.parseSort()
 426 | 			if err != nil {
 427 | 				return nil, err
 428 | 			}
 429 | 			steps = append(steps, step)
 430 | 		default:
 431 | 			return nil, fmt.Errorf("unsupported statement %q in loop", p.peek().Lit)
 432 | 		}
 433 | 		p.skipNewlines()
 434 | 		if hasParens && p.peekIs(PIPE) {
 435 | 			p.next()
 436 | 		}
 437 | 	}
 438 | 	return &ast.LoopStep{Body: steps}, nil
 439 | }
 440 | 
 441 | func (p *Parser) parseFilter() (ast.Step, error) {
 442 | 	p.skipNewlines()
 443 | 	expr, err := p.parseExpr(0)
 444 | 	if err != nil {
 445 | 		return nil, err
 446 | 	}
 447 | 	p.skipToLineEnd()
 448 | 	return &ast.FilterStep{Expr: expr}, nil
 449 | }
 450 | 
 451 | func (p *Parser) parseDerive() (ast.Step, error) {
 452 | 	p.skipNewlines()
 453 | 	var assigns []ast.Assignment
 454 | 	if p.peekIs(LBRACE) {
 455 | 		p.next() // consume {
 456 | 		for {
 457 | 			p.skipNewlines()
 458 | 			if p.peekIs(RBRACE) {
 459 | 				p.next()
 460 | 				break
 461 | 			}
 462 | 			assign, err := p.parseAssignment()
 463 | 			if err != nil {
 464 | 				return nil, err
 465 | 			}
 466 | 			assigns = append(assigns, assign)
 467 | 			if p.peekIs(COMMA) {
 468 | 				p.next()
 469 | 			}
 470 | 			p.skipNewlines()
 471 | 		}
 472 | 	} else {
 473 | 		assign, err := p.parseAssignment()
 474 | 		if err != nil {
 475 | 			return nil, err
 476 | 		}
 477 | 		assigns = append(assigns, assign)
 478 | 	}
 479 | 	p.skipToLineEnd()
 480 | 	return &ast.DeriveStep{Assignments: assigns}, nil
 481 | }
 482 | 
 483 | func (p *Parser) parseAssignment() (ast.Assignment, error) {
 484 | 	if !p.peekIs(IDENT) {
 485 | 		return ast.Assignment{}, fmt.Errorf("expected identifier in assignment")
 486 | 	}
 487 | 	name := p.next().Lit
 488 | 	if !p.peekIs(EQUAL) {
 489 | 		return ast.Assignment{}, fmt.Errorf("expected = in assignment")
 490 | 	}
 491 | 	p.next()
 492 | 	expr, err := p.parseExpr(0)
 493 | 	if err != nil {
 494 | 		return ast.Assignment{}, err
 495 | 	}
 496 | 	return ast.Assignment{Name: name, Expr: expr}, nil
 497 | }
 498 | 
 499 | func (p *Parser) parseRecord() (ast.InlineRow, error) {
 500 | 	var fields []ast.Field
 501 | 	for {
 502 | 		p.skipNewlines()
 503 | 		if p.peekIs(RBRACE) {
 504 | 			p.next()
 505 | 			break
 506 | 		}
 507 | 		if !p.peekIs(IDENT) {
 508 | 			return ast.InlineRow{}, fmt.Errorf("expected field name in record")
 509 | 		}
 510 | 		key := p.next().Lit
 511 | 		if !p.peekIs(EQUAL) {
 512 | 			return ast.InlineRow{}, fmt.Errorf("expected = after field name")
 513 | 		}
 514 | 		p.next()
 515 | 		val, err := p.parseExpr(0)
 516 | 		if err != nil {
 517 | 			return ast.InlineRow{}, err
 518 | 		}
 519 | 		fields = append(fields, ast.Field{Name: key, Expr: val})
 520 | 		p.skipNewlines()
 521 | 		if p.peekIs(COMMA) {
 522 | 			p.next()
 523 | 		}
 524 | 	}
 525 | 	return ast.InlineRow{Fields: fields}, nil
 526 | }
 527 | 
 528 | func (p *Parser) parseSelect() (ast.Step, error) {
 529 | 	p.skipNewlines()
 530 | 	var items []ast.SelectItem
 531 | 	if p.peekIs(LBRACE) {
 532 | 		p.next()
 533 | 		for {
 534 | 			p.skipNewlines()
 535 | 			if p.peekIs(RBRACE) {
 536 | 				p.next()
 537 | 				break
 538 | 			}
 539 | 			item, err := p.parseSelectItem()
 540 | 			if err != nil {
 541 | 				return nil, err
 542 | 			}
 543 | 			items = append(items, item)
 544 | 			if p.peekIs(COMMA) {
 545 | 				p.next()
 546 | 			}
 547 | 			p.skipNewlines()
 548 | 		}
 549 | 	} else {
 550 | 		item, err := p.parseSelectItem()
 551 | 		if err != nil {
 552 | 			return nil, err
 553 | 		}
 554 | 		items = append(items, item)
 555 | 	}
 556 | 	p.skipToLineEnd()
 557 | 	return &ast.SelectStep{Items: items}, nil
 558 | }
 559 | 
 560 | func (p *Parser) parseSelectItem() (ast.SelectItem, error) {
 561 | 	expr, err := p.parseExpr(0)
 562 | 	if err != nil {
 563 | 		return ast.SelectItem{}, err
 564 | 	}
 565 | 	alias := ""
 566 | 	if p.peekIs(EQUAL) {
 567 | 		p.next()
 568 | 		rhs, err := p.parseExpr(0)
 569 | 		if err != nil {
 570 | 			return ast.SelectItem{}, err
 571 | 		}
 572 | 		id, ok := expr.(*ast.Ident)
 573 | 		if !ok {
 574 | 			return ast.SelectItem{}, fmt.Errorf("left side of assignment must be identifier")
 575 | 		}
 576 | 		alias = strings.Join(id.Parts, ".")
 577 | 		expr = rhs
 578 | 	}
 579 | 	if p.peekIs(IDENT) && p.peek().Lit == "as" {
 580 | 		p.next()
 581 | 		if !p.peekIs(IDENT) {
 582 | 			return ast.SelectItem{}, fmt.Errorf("expected alias after as")
 583 | 		}
 584 | 		alias = p.next().Lit
 585 | 	}
 586 | 	return ast.SelectItem{Expr: expr, As: alias}, nil
 587 | }
 588 | 
 589 | func (p *Parser) parseAggregate() (ast.Step, error) {
 590 | 	p.skipNewlines()
 591 | 	if !p.peekIs(LBRACE) {
 592 | 		var name string
 593 | 		if p.peekIs(IDENT) && p.peekN(1).Typ == EQUAL {
 594 | 			name = p.next().Lit
 595 | 			p.next()
 596 | 		}
 597 | 		item, err := p.parseAggregateItem()
 598 | 		if err != nil {
 599 | 			return nil, err
 600 | 		}
 601 | 		if name != "" {
 602 | 			item.As = name
 603 | 		}
 604 | 		p.skipToLineEnd()
 605 | 		return &ast.AggregateStep{Items: []ast.AggregateItem{item}}, nil
 606 | 	}
 607 | 	p.next()
 608 | 	var items []ast.AggregateItem
 609 | 	for {
 610 | 		p.skipNewlines()
 611 | 		if p.peekIs(RBRACE) {
 612 | 			p.next()
 613 | 			break
 614 | 		}
 615 | 		if p.peekIs(IDENT) && p.peekN(1).Typ == EQUAL {
 616 | 			name := p.next().Lit
 617 | 			p.next()
 618 | 			item, err := p.parseAggregateItem()
 619 | 			if err != nil {
 620 | 				return nil, err
 621 | 			}
 622 | 			item.As = name
 623 | 			items = append(items, item)
 624 | 		} else {
 625 | 			item, err := p.parseAggregateItem()
 626 | 			if err != nil {
 627 | 				return nil, err
 628 | 			}
 629 | 			items = append(items, item)
 630 | 		}
 631 | 		if p.peekIs(COMMA) {
 632 | 			p.next()
 633 | 		}
 634 | 		p.skipNewlines()
 635 | 	}
 636 | 	p.skipToLineEnd()
 637 | 	return &ast.AggregateStep{Items: items}, nil
 638 | }
 639 | 
 640 | func (p *Parser) parseAggregateItem() (ast.AggregateItem, error) {
 641 | 	funcExpr, err := p.parseExpr(0)
 642 | 	if err != nil {
 643 | 		return ast.AggregateItem{}, err
 644 | 	}
 645 | 	call, ok := funcExpr.(*ast.Call)
 646 | 	if !ok {
 647 | 		if pipe, ok := funcExpr.(*ast.Pipe); ok {
 648 | 			call = &ast.Call{Func: pipe.Func, Args: append([]ast.Expr{pipe.Input}, pipe.Args...)}
 649 | 		} else {
 650 | 			return ast.AggregateItem{}, fmt.Errorf("aggregate item must be a function call")
 651 | 		}
 652 | 	}
 653 | 	fnName := exprToIdent(call.Func)
 654 | 	alias := ""
 655 | 	if p.peekIs(IDENT) && p.peek().Lit == "as" {
 656 | 		p.next()
 657 | 		if !p.peekIs(IDENT) {
 658 | 			return ast.AggregateItem{}, fmt.Errorf("expected alias after as")
 659 | 		}
 660 | 		alias = p.next().Lit
 661 | 	}
 662 | 	arg := ast.Expr(nil)
 663 | 	if len(call.Args) > 0 {
 664 | 		arg = call.Args[0]
 665 | 	}
 666 | 	args := append([]ast.Expr{}, call.Args...)
 667 | 	return ast.AggregateItem{Func: fnName, Arg: arg, Args: args, As: alias}, nil
 668 | }
 669 | 
 670 | func (p *Parser) parseTake() (ast.Step, error) {
 671 | 	p.skipNewlines()
 672 | 	// range form: number .. number
 673 | 	if p.peekIs(NUMBER) && p.peekN(1).Typ == RANGE {
 674 | 		start := p.next().Lit
 675 | 		if strings.Contains(start, ".") {
 676 | 			return nil, fmt.Errorf("`take` expected int or range, but found %s", start)
 677 | 		}
 678 | 		p.next() // ..
 679 | 		if !p.peekIs(NUMBER) {
 680 | 			return nil, fmt.Errorf("expected end of range")
 681 | 		}
 682 | 		end := p.next().Lit
 683 | 		if strings.Contains(end, ".") {
 684 | 			return nil, fmt.Errorf("`take` expected int or range, but found %s", end)
 685 | 		}
 686 | 		startInt := atoi(start)
 687 | 		endInt := atoi(end)
 688 | 		limit := endInt - startInt + 1
 689 | 		offset := startInt - 1
 690 | 		p.skipToLineEnd()
 691 | 		return &ast.TakeStep{Limit: limit, Offset: offset}, nil
 692 | 	}
 693 | 
 694 | 	if !p.peekIs(NUMBER) {
 695 | 		return nil, fmt.Errorf("take expects number or range")
 696 | 	}
 697 | 	lit := p.next().Lit
 698 | 	if strings.Contains(lit, ".") {
 699 | 		return nil, fmt.Errorf("`take` expected int or range, but found %s", lit)
 700 | 	}
 701 | 	limit := atoi(lit)
 702 | 	p.skipToLineEnd()
 703 | 	return &ast.TakeStep{Limit: limit}, nil
 704 | }
 705 | 
 706 | func (p *Parser) parseAppend() (ast.Step, error) {
 707 | 	p.skipNewlines()
 708 | 	if !p.peekIs(LPAREN) {
 709 | 		return nil, fmt.Errorf("append expects '('")
 710 | 	}
 711 | 	p.next()
 712 | 	subTokens := p.collectUntilMatching(RPAREN)
 713 | 	subParser := &Parser{tokens: subTokens}
 714 | 	subQuery, err := subParser.parseQuery()
 715 | 	if err != nil {
 716 | 		return nil, err
 717 | 	}
 718 | 	return &ast.AppendStep{Query: subQuery}, nil
 719 | }
 720 | 
 721 | func (p *Parser) parseRemove() (ast.Step, error) {
 722 | 	p.skipNewlines()
 723 | 	if !p.peekIs(LPAREN) {
 724 | 		return nil, fmt.Errorf("remove expects '('")
 725 | 	}
 726 | 	p.next()
 727 | 	subTokens := p.collectUntilMatching(RPAREN)
 728 | 	subParser := &Parser{tokens: subTokens}
 729 | 	subQuery, err := subParser.parseQuery()
 730 | 	if err != nil {
 731 | 		return nil, err
 732 | 	}
 733 | 	return &ast.RemoveStep{Query: subQuery}, nil
 734 | }
 735 | 
 736 | func (p *Parser) parseGroup() (ast.Step, error) {
 737 | 	p.skipNewlines()
 738 | 	var keyExpr ast.Expr
 739 | 	if p.peekIs(LBRACE) {
 740 | 		p.next()
 741 | 		var exprs []ast.Expr
 742 | 		for {
 743 | 			p.skipNewlines()
 744 | 			if p.peekIs(RBRACE) {
 745 | 				p.next()
 746 | 				break
 747 | 			}
 748 | 			e, err := p.parseExpr(0)
 749 | 			if err != nil {
 750 | 				return nil, err
 751 | 			}
 752 | 			exprs = append(exprs, e)
 753 | 			if p.peekIs(COMMA) {
 754 | 				p.next()
 755 | 			}
 756 | 		}
 757 | 		if len(exprs) == 1 {
 758 | 			keyExpr = exprs[0]
 759 | 		} else {
 760 | 			keyExpr = &ast.Tuple{Exprs: exprs}
 761 | 		}
 762 | 	} else {
 763 | 		if !p.peekIs(IDENT) {
 764 | 			return nil, fmt.Errorf("group expects identifier key")
 765 | 		}
 766 | 		keyTok := p.next()
 767 | 		if strings.HasSuffix(keyTok.Lit, ".") && p.peekIs(STAR) {
 768 | 			keyTok.Lit = strings.TrimSuffix(keyTok.Lit, ".")
 769 | 			p.next()
 770 | 		} else if p.peekIs(DOT) && p.peekN(1).Typ == STAR {
 771 | 			p.next()
 772 | 			p.next()
 773 | 		}
 774 | 		keyExpr = &ast.Ident{Parts: strings.Split(keyTok.Lit, ".")}
 775 | 	}
 776 | 	p.skipNewlines()
 777 | 	if !p.peekIs(LPAREN) {
 778 | 		return nil, fmt.Errorf("group expects '(' block")
 779 | 	}
 780 | 	p.next()
 781 | 	subTokens := p.collectUntilMatching(RPAREN)
 782 | 	subParser := &Parser{tokens: subTokens}
 783 | 	subQuery, err := subParser.parseGroupSteps()
 784 | 	if err != nil {
 785 | 		return nil, err
 786 | 	}
 787 | 	return &ast.GroupStep{
 788 | 		Key:   keyExpr,
 789 | 		Steps: subQuery,
 790 | 	}, nil
 791 | }
 792 | 
 793 | func (p *Parser) parseGroupSteps() ([]ast.Step, error) {
 794 | 	var steps []ast.Step
 795 | 	for !p.peekIs(EOF) {
 796 | 		p.skipNewlines()
 797 | 		if p.peekIs(EOF) {
 798 | 			break
 799 | 		}
 800 | 		if p.peekIs(PIPE) {
 801 | 			p.next()
 802 | 			continue
 803 | 		}
 804 | 		switch tok := p.peek(); tok.Typ {
 805 | 		case IDENT:
 806 | 			switch tok.Lit {
 807 | 			case "filter":
 808 | 				p.next()
 809 | 				step, err := p.parseFilter()
 810 | 				if err != nil {
 811 | 					return nil, err
 812 | 				}
 813 | 				steps = append(steps, step)
 814 | 			case "derive":
 815 | 				p.next()
 816 | 				step, err := p.parseDerive()
 817 | 				if err != nil {
 818 | 					return nil, err
 819 | 				}
 820 | 				steps = append(steps, step)
 821 | 			case "select":
 822 | 				p.next()
 823 | 				step, err := p.parseSelect()
 824 | 				if err != nil {
 825 | 					return nil, err
 826 | 				}
 827 | 				steps = append(steps, step)
 828 | 			case "aggregate":
 829 | 				p.next()
 830 | 				step, err := p.parseAggregate()
 831 | 				if err != nil {
 832 | 					return nil, err
 833 | 				}
 834 | 				steps = append(steps, step)
 835 | 			case "window":
 836 | 				// Skip window blocks within groups for now.
 837 | 				p.next()
 838 | 				p.skipNewlines()
 839 | 				if p.peekIs(IDENT) && strings.Contains(p.peek().Lit, ":") {
 840 | 					p.next()
 841 | 					p.skipNewlines()
 842 | 				}
 843 | 				if p.peekIs(LPAREN) {
 844 | 					p.next()
 845 | 					p.collectUntilMatching(RPAREN)
 846 | 				}
 847 | 			case "take":
 848 | 				p.next()
 849 | 				step, err := p.parseTake()
 850 | 				if err != nil {
 851 | 					return nil, err
 852 | 				}
 853 | 				steps = append(steps, step)
 854 | 			case "sort":
 855 | 				p.next()
 856 | 				step, err := p.parseSort()
 857 | 				if err != nil {
 858 | 					return nil, err
 859 | 				}
 860 | 				steps = append(steps, step)
 861 | 			default:
 862 | 				return nil, fmt.Errorf("unexpected token %q in group", tok.Lit)
 863 | 			}
 864 | 		case NEWLINE:
 865 | 			p.next()
 866 | 		default:
 867 | 			return nil, fmt.Errorf("unexpected token %v in group at pos %d", tok, p.pos)
 868 | 		}
 869 | 	}
 870 | 	return steps, nil
 871 | }
 872 | 
 873 | func (p *Parser) parseSort() (ast.Step, error) {
 874 | 	p.skipNewlines()
 875 | 	var items []ast.SortItem
 876 | 	if p.peekIs(LBRACE) {
 877 | 		p.next()
 878 | 		for {
 879 | 			p.skipNewlines()
 880 | 			if p.peekIs(PIPE) {
 881 | 				break
 882 | 			}
 883 | 			if p.peekIs(RBRACE) {
 884 | 				p.next()
 885 | 				break
 886 | 			}
 887 | 			item, err := p.parseSortItem()
 888 | 			if err != nil {
 889 | 				return nil, err
 890 | 			}
 891 | 			items = append(items, item)
 892 | 			if p.peekIs(COMMA) {
 893 | 				p.next()
 894 | 			}
 895 | 			p.skipNewlines()
 896 | 		}
 897 | 	} else {
 898 | 		if p.peekIs(PIPE) {
 899 | 			return &ast.SortStep{Items: items}, nil
 900 | 		}
 901 | 		item, err := p.parseSortItem()
 902 | 		if err != nil {
 903 | 			return nil, err
 904 | 		}
 905 | 		items = append(items, item)
 906 | 	}
 907 | 	return &ast.SortStep{Items: items}, nil
 908 | }
 909 | 
 910 | func (p *Parser) parseSortItem() (ast.SortItem, error) {
 911 | 	desc := false
 912 | 	if p.peekIs(MINUS) {
 913 | 		p.next()
 914 | 		desc = true
 915 | 	} else if p.peekIs(PLUS) {
 916 | 		p.next()
 917 | 	}
 918 | 	expr, err := p.parseExpr(0)
 919 | 	if err != nil {
 920 | 		return ast.SortItem{}, err
 921 | 	}
 922 | 	if p.peekIs(IDENT) && p.peek().Lit == "desc" {
 923 | 		p.next()
 924 | 		desc = true
 925 | 	}
 926 | 	return ast.SortItem{Expr: expr, Desc: desc}, nil
 927 | }
 928 | 
 929 | // Expression parsing (Pratt-style with limited operators).
 930 | var precedences = map[TokenType]int{
 931 | 	OROR:     1,
 932 | 	EQ:       2,
 933 | 	REGEXEQ:  2,
 934 | 	NEQ:      2,
 935 | 	NULLCOAL: 2,
 936 | 	RANGE:    2,
 937 | 	LT:       3,
 938 | 	GT:       3,
 939 | 	LTE:      3,
 940 | 	GTE:      3,
 941 | 	PLUS:     4,
 942 | 	MINUS:    4,
 943 | 	STAR:     5,
 944 | 	SLASH:    5,
 945 | 	FLOORDIV: 5,
 946 | 	PERCENT:  5,
 947 | 	POW:      6,
 948 | }
 949 | 
 950 | func (p *Parser) parseExpr(precedence int) (ast.Expr, error) {
 951 | 	p.skipNewlines()
 952 | 	left, err := p.parsePrefix()
 953 | 	if err != nil {
 954 | 		return nil, err
 955 | 	}
 956 | 
 957 | 	for {
 958 | 		if p.peekIs(EOF) || p.peekIs(NEWLINE) || p.peekIs(COMMA) || p.peekIs(RBRACE) || p.peekIs(RPAREN) || p.peekIs(RBRACKET) {
 959 | 			break
 960 | 		}
 961 | 
 962 | 		// Pipe operator has low precedence; handle directly.
 963 | 		if p.peekIs(PIPE) && !p.stopAtPipe {
 964 | 			if precedence > 1 {
 965 | 				break
 966 | 			}
 967 | 			p.next()
 968 | 			fn, err := p.parsePrefix()
 969 | 			if err != nil {
 970 | 				return nil, err
 971 | 			}
 972 | 			var args []ast.Expr
 973 | 			for p.canStartExpr(p.peek()) {
 974 | 				arg, err := p.parsePrefix()
 975 | 				if err != nil {
 976 | 					return nil, err
 977 | 				}
 978 | 				args = append(args, arg)
 979 | 			}
 980 | 			if p.peekIs(RANGE) && len(args) > 0 {
 981 | 				start := args[len(args)-1]
 982 | 				p.next()
 983 | 				right, err := p.parseExpr(precedences[RANGE] + 1)
 984 | 				if err != nil {
 985 | 					return nil, err
 986 | 				}
 987 | 				args[len(args)-1] = &ast.Binary{Op: "..", Left: start, Right: right}
 988 | 			}
 989 | 			left = &ast.Pipe{Input: left, Func: fn, Args: args}
 990 | 			continue
 991 | 		}
 992 | 
 993 | 		// Function application by adjacency.
 994 | 		if p.canStartExpr(p.peek()) && p.peek().Typ != MINUS {
 995 | 			arg, err := p.parsePrefix()
 996 | 			if err != nil {
 997 | 				return nil, err
 998 | 			}
 999 | 			left = appendCallArg(left, arg)
1000 | 			continue
1001 | 		}
1002 | 
1003 | 		op := p.peek()
1004 | 		opPrec, ok := precedences[op.Typ]
1005 | 		if !ok || opPrec < precedence {
1006 | 			break
1007 | 		}
1008 | 		p.next()
1009 | 		right, err := p.parseExpr(opPrec + 1)
1010 | 		if err != nil {
1011 | 			return nil, err
1012 | 		}
1013 | 		left = &ast.Binary{Op: op.Lit, Left: left, Right: right}
1014 | 	}
1015 | 
1016 | 	return left, nil
1017 | }
1018 | 
1019 | func (p *Parser) parsePrefix() (ast.Expr, error) {
1020 | 	tok := p.next()
1021 | 	switch tok.Typ {
1022 | 	case IDENT:
1023 | 		if tok.Lit == "case" && p.peekIs(LBRACKET) {
1024 | 			return p.parseCase()
1025 | 		}
1026 | 		if p.peekIs(DOT) && p.peekN(1).Typ == STAR {
1027 | 			p.next()
1028 | 			p.next()
1029 | 			return &ast.Ident{Parts: []string{tok.Lit, "*"}}, nil
1030 | 		}
1031 | 		return &ast.Ident{Parts: strings.Split(tok.Lit, ".")}, nil
1032 | 	case NUMBER:
1033 | 		return &ast.Number{Value: tok.Lit}, nil
1034 | 	case STRING:
1035 | 		return &ast.StringLit{Value: tok.Lit}, nil
1036 | 	case FSTRING:
1037 | 		return p.parseFString(tok.Lit)
1038 | 	case LPAREN:
1039 | 		expr, err := p.parseExpr(0)
1040 | 		if err != nil {
1041 | 			return nil, err
1042 | 		}
1043 | 		if !p.peekIs(RPAREN) {
1044 | 			return nil, fmt.Errorf("expected ) at pos %d", p.pos)
1045 | 		}
1046 | 		p.next()
1047 | 		return expr, nil
1048 | 	case MINUS:
1049 | 		if p.peekIs(NUMBER) {
1050 | 			num := p.next().Lit
1051 | 			return &ast.Number{Value: "-" + num}, nil
1052 | 		}
1053 | 		expr, err := p.parseExpr(precedences[MINUS])
1054 | 		if err != nil {
1055 | 			return nil, err
1056 | 		}
1057 | 		return &ast.Binary{Op: "*", Left: &ast.Number{Value: "-1"}, Right: expr}, nil
1058 | 	default:
1059 | 		return nil, fmt.Errorf("unexpected token %v at pos %d", tok, p.pos-1)
1060 | 	}
1061 | }
1062 | 
1063 | func (p *Parser) parseCase() (ast.Expr, error) {
1064 | 	p.next() // consume '['
1065 | 	var branches []ast.CaseBranch
1066 | 	for {
1067 | 		p.skipNewlines()
1068 | 		if p.peekIs(RBRACKET) {
1069 | 			p.next()
1070 | 			break
1071 | 		}
1072 | 		cond, err := p.parseExpr(0)
1073 | 		if err != nil {
1074 | 			return nil, err
1075 | 		}
1076 | 		if !p.peekIs(ARROW) {
1077 | 			return nil, fmt.Errorf("expected => in case expression")
1078 | 		}
1079 | 		p.next()
1080 | 		val, err := p.parseExpr(0)
1081 | 		if err != nil {
1082 | 			return nil, err
1083 | 		}
1084 | 		branches = append(branches, ast.CaseBranch{Cond: cond, Value: val})
1085 | 		p.skipNewlines()
1086 | 		if p.peekIs(COMMA) {
1087 | 			p.next()
1088 | 		}
1089 | 	}
1090 | 	return &ast.CaseExpr{Branches: branches}, nil
1091 | }
1092 | 
1093 | // Helpers
1094 | func (p *Parser) peek() Token {
1095 | 	return p.tokens[p.pos]
1096 | }
1097 | 
1098 | func (p *Parser) peekN(n int) Token {
1099 | 	if p.pos+n >= len(p.tokens) {
1100 | 		return Token{Typ: EOF}
1101 | 	}
1102 | 	return p.tokens[p.pos+n]
1103 | }
1104 | 
1105 | func (p *Parser) peekIs(tt TokenType) bool {
1106 | 	return p.peek().Typ == tt
1107 | }
1108 | 
1109 | func (p *Parser) next() Token {
1110 | 	t := p.tokens[p.pos]
1111 | 	p.pos++
1112 | 	return t
1113 | }
1114 | 
1115 | func (p *Parser) skipNewlines() {
1116 | 	for p.peekIs(NEWLINE) {
1117 | 		p.next()
1118 | 	}
1119 | }
1120 | 
1121 | func (p *Parser) skipToLineEnd() {
1122 | 	for !p.peekIs(EOF) && !p.peekIs(NEWLINE) {
1123 | 		if p.stopAtPipe && (p.peekIs(PIPE) || p.peekIs(RPAREN)) {
1124 | 			break
1125 | 		}
1126 | 		p.next()
1127 | 	}
1128 | 	p.skipNewlines()
1129 | }
1130 | 
1131 | func (p *Parser) matchIdent(lit string) bool {
1132 | 	if p.peekIs(IDENT) && p.peek().Lit == lit {
1133 | 		p.next()
1134 | 		return true
1135 | 	}
1136 | 	return false
1137 | }
1138 | 
1139 | func (p *Parser) canStartExpr(tok Token) bool {
1140 | 	switch tok.Typ {
1141 | 	case IDENT, NUMBER, STRING, FSTRING, LPAREN, MINUS:
1142 | 		return true
1143 | 	default:
1144 | 		return false
1145 | 	}
1146 | }
1147 | 
1148 | func (p *Parser) parseFString(lit string) (ast.Expr, error) {
1149 | 	var parts []ast.Expr
1150 | 	var sb strings.Builder
1151 | 	for i := 0; i < len(lit); i++ {
1152 | 		ch := lit[i]
1153 | 		if ch == '{' {
1154 | 			if i+1 < len(lit) && lit[i+1] == '{' {
1155 | 				sb.WriteByte('{')
1156 | 				i++
1157 | 				continue
1158 | 			}
1159 | 			if sb.Len() > 0 {
1160 | 				parts = append(parts, &ast.StringLit{Value: sb.String()})
1161 | 				sb.Reset()
1162 | 			}
1163 | 			i++
1164 | 			start := i
1165 | 			depth := 1
1166 | 			for i < len(lit) && depth > 0 {
1167 | 				if lit[i] == '{' {
1168 | 					depth++
1169 | 				} else if lit[i] == '}' {
1170 | 					depth--
1171 | 					if depth == 0 {
1172 | 						break
1173 | 					}
1174 | 				}
1175 | 				i++
1176 | 			}
1177 | 			if depth != 0 {
1178 | 				return nil, fmt.Errorf("unterminated expression in f-string")
1179 | 			}
1180 | 			exprStr := strings.TrimSpace(lit[start:i])
1181 | 			if exprStr == "" {
1182 | 				return nil, fmt.Errorf("empty expression in f-string")
1183 | 			}
1184 | 			expr, err := parseExprFragment(exprStr)
1185 | 			if err != nil {
1186 | 				return nil, err
1187 | 			}
1188 | 			parts = append(parts, expr)
1189 | 		} else if ch == '}' {
1190 | 			if i+1 < len(lit) && lit[i+1] == '}' {
1191 | 				sb.WriteByte('}')
1192 | 				i++
1193 | 				continue
1194 | 			}
1195 | 			return nil, fmt.Errorf("single } in f-string")
1196 | 		} else {
1197 | 			sb.WriteByte(ch)
1198 | 		}
1199 | 	}
1200 | 	if sb.Len() > 0 {
1201 | 		parts = append(parts, &ast.StringLit{Value: sb.String()})
1202 | 	}
1203 | 	if len(parts) == 0 {
1204 | 		return &ast.StringLit{Value: ""}, nil
1205 | 	}
1206 | 	if len(parts) == 1 {
1207 | 		return parts[0], nil
1208 | 	}
1209 | 	return &ast.Call{
1210 | 		Func: &ast.Ident{Parts: []string{"__concat__"}},
1211 | 		Args: parts,
1212 | 	}, nil
1213 | }
1214 | 
1215 | func parseExprFragment(src string) (ast.Expr, error) {
1216 | 	toks, err := Lex(src)
1217 | 	if err != nil {
1218 | 		return nil, err
1219 | 	}
1220 | 	parser := &Parser{tokens: toks}
1221 | 	expr, err := parser.parseExpr(0)
1222 | 	if err != nil {
1223 | 		return nil, err
1224 | 	}
1225 | 	parser.skipNewlines()
1226 | 	if !parser.peekIs(EOF) {
1227 | 		return nil, fmt.Errorf("unexpected token %v in f-string", parser.peek())
1228 | 	}
1229 | 	return expr, nil
1230 | }
1231 | 
1232 | func (p *Parser) collectUntilMatching(end TokenType) []Token {
1233 | 	var collected []Token
1234 | 	depth := 1
1235 | 	for {
1236 | 		tok := p.next()
1237 | 		if tok.Typ == EOF {
1238 | 			break
1239 | 		}
1240 | 		if tok.Typ == end {
1241 | 			depth--
1242 | 			if depth == 0 {
1243 | 				break
1244 | 			}
1245 | 		}
1246 | 		if tok.Typ == LPAREN && end == RPAREN {
1247 | 			depth++
1248 | 		}
1249 | 		if tok.Typ == LBRACE && end == RBRACE {
1250 | 			depth++
1251 | 		}
1252 | 		collected = append(collected, tok)
1253 | 	}
1254 | 	collected = append(collected, Token{Typ: EOF})
1255 | 	return collected
1256 | }
1257 | 
1258 | func atoi(s string) int {
1259 | 	var n int
1260 | 	for _, r := range s {
1261 | 		n = n*10 + int(r-'0')
1262 | 	}
1263 | 	return n
1264 | }
1265 | 
1266 | func exprToIdent(e ast.Expr) string {
1267 | 	if id, ok := e.(*ast.Ident); ok {
1268 | 		return strings.Join(id.Parts, ".")
1269 | 	}
1270 | 	return ""
1271 | }
1272 | 
1273 | func appendCallArg(fn ast.Expr, arg ast.Expr) ast.Expr {
1274 | 	if call, ok := fn.(*ast.Call); ok {
1275 | 		return &ast.Call{Func: call.Func, Args: append(call.Args, arg)}
1276 | 	}
1277 | 	return &ast.Call{Func: fn, Args: []ast.Expr{arg}}
1278 | }
1279 | 
1280 | func (p *Parser) parseJoin() (ast.Step, error) {
1281 | 	p.skipNewlines()
1282 | 	side := "inner"
1283 | 	if p.peekIs(IDENT) && strings.Contains(p.peek().Lit, "side:") {
1284 | 		side = strings.SplitN(p.next().Lit, ":", 2)[1]
1285 | 	}
1286 | 	p.skipNewlines()
1287 | 	var subQuery *ast.Query
1288 | 	if inline, ok, err := p.parseInlineRowsSource(); ok || err != nil {
1289 | 		if err != nil {
1290 | 			return nil, err
1291 | 		}
1292 | 		subQuery = &ast.Query{From: inline}
1293 | 	} else if p.peekIs(LPAREN) {
1294 | 		p.next()
1295 | 		subTokens := p.collectUntilMatching(RPAREN)
1296 | 		subParser := &Parser{tokens: subTokens}
1297 | 		q, err := subParser.parseQuery()
1298 | 		if err != nil {
1299 | 			return nil, err
1300 | 		}
1301 | 		subQuery = q
1302 | 	} else if p.peekIs(IDENT) {
1303 | 		table := p.next().Lit
1304 | 		if p.peekIs(EQUAL) {
1305 | 			alias := table
1306 | 			p.next()
1307 | 			if !p.peekIs(IDENT) {
1308 | 				return nil, fmt.Errorf("join expects table after alias")
1309 | 			}
1310 | 			tableTok := p.next()
1311 | 			table = fmt.Sprintf("%s AS %s", tableTok.Lit, alias)
1312 | 		}
1313 | 		subQuery = &ast.Query{From: ast.Source{Table: table}}
1314 | 	} else {
1315 | 		return nil, fmt.Errorf("join expects '(' source")
1316 | 	}
1317 | 	p.skipNewlines()
1318 | 	if p.peekIs(IDENT) && strings.Contains(p.peek().Lit, "side:") {
1319 | 		side = strings.SplitN(p.next().Lit, ":", 2)[1]
1320 | 	}
1321 | 	p.skipNewlines()
1322 | 	if !p.peekIs(LPAREN) {
1323 | 		return nil, fmt.Errorf("join expects '(' condition")
1324 | 	}
1325 | 	p.next()
1326 | 	condTokens := p.collectUntilMatching(RPAREN)
1327 | 	var cond ast.Expr
1328 | 	if len(condTokens) > 0 && condTokens[0].Typ == EQ && len(condTokens) > 1 && condTokens[1].Typ == IDENT {
1329 | 		name := condTokens[1].Lit
1330 | 		cond = &ast.Binary{
1331 | 			Op:   "==",
1332 | 			Left: &ast.Ident{Parts: []string{"this", name}},
1333 | 			Right: &ast.Ident{
1334 | 				Parts: []string{"that", name},
1335 | 			},
1336 | 		}
1337 | 	} else {
1338 | 		condParser := &Parser{tokens: condTokens}
1339 | 		var err error
1340 | 		cond, err = condParser.parseExpr(0)
1341 | 		if err != nil {
1342 | 			return nil, err
1343 | 		}
1344 | 	}
1345 | 	return &ast.JoinStep{Side: side, Query: subQuery, On: cond}, nil
1346 | }
1347 | 


--------------------------------------------------------------------------------