├── go.sum
├── go.work
├── go.mod
├── .gitignore
├── fuzz
    ├── README.md
    ├── go.mod
    ├── fuzz_test.go
    └── go.sum
├── .github
    ├── workflows
    │   ├── bump-release.yml
    │   └── create-release.yml
    └── bump.yml
├── render.go
├── release-process.md
├── cmd
    └── main.go
├── pkg
    ├── driver
    │   ├── postgresql.go
    │   ├── postgresql_test.go
    │   ├── base.go
    │   └── renderfn.go
    └── lucene
    │   ├── expr
    │       ├── operator.go
    │       ├── renderer.go
    │       ├── expression_test.go
    │       ├── validator.go
    │       └── expression.go
    │   └── reduce
    │       └── reduce.go
├── README.md
├── internal
    └── lex
    │   ├── lext_test.go
    │   └── lex.go
├── parse.go
├── LICENSE
├── parse_test.go
└── postgresql_test.go


/go.sum:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/go.work:
--------------------------------------------------------------------------------
1 | go 1.22
2 | 
3 | use (
4 | 	.
5 | 	./fuzz
6 | )
7 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/grindlemire/go-lucene
2 | 
3 | go 1.22
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # vendor/
15 | .vscode
16 | .cursor


--------------------------------------------------------------------------------
/fuzz/README.md:
--------------------------------------------------------------------------------
1 | # What is this package?
2 | 
3 | This package contains all the necessary code to fuzz test go-lucene. However it requires a few imports
4 | to do so and uses pg_query to validate the produced queries. Moving it to this directory allows the top level
5 | mod file to remain clean of dependencies while still allowing for the fuzz testing.


--------------------------------------------------------------------------------
/fuzz/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/grindlemire/go-lucene/fuzz
 2 | 
 3 | go 1.22
 4 | 
 5 | require (
 6 | 	github.com/grindlemire/go-lucene v0.0.14
 7 | 	github.com/pganalyze/pg_query_go/v4 v4.2.3
 8 | )
 9 | 
10 | require (
11 | 	github.com/golang/protobuf v1.4.2 // indirect
12 | 	google.golang.org/protobuf v1.23.0 // indirect
13 | )
14 | 
15 | // Always just use the local version of go-lucene
16 | replace github.com/grindlemire/go-lucene => ../
17 | 


--------------------------------------------------------------------------------
/.github/workflows/bump-release.yml:
--------------------------------------------------------------------------------
 1 | name: Bump Release
 2 | 
 3 | on:
 4 |     push:
 5 |         branches:
 6 |             - 'main'
 7 | 
 8 | jobs:
 9 |     build:
10 |         runs-on: ubuntu-latest
11 |         steps:
12 |             - name: checkout repo
13 |               uses: actions/checkout@v3
14 | 
15 |             - name: setup go
16 |               uses: actions/setup-go@v3
17 |               with:
18 |                   go-version: 'stable'
19 | 
20 |             - name: test
21 |               run: go test -v ./...
22 | 
23 |             - name: fuzz test
24 |               run: go test -v ./fuzz
25 | 
26 |             - uses: grindlemire/bump-release-action@master
27 |               with:
28 |                   config_path: '.github/bump.yml'
29 | 


--------------------------------------------------------------------------------
/render.go:
--------------------------------------------------------------------------------
 1 | package lucene
 2 | 
 3 | import "github.com/grindlemire/go-lucene/pkg/driver"
 4 | 
 5 | var (
 6 | 	postgres = driver.NewPostgresDriver()
 7 | )
 8 | 
 9 | // ToPostgres is a wrapper that will render the lucene expression string as a postgres sql filter string.
10 | func ToPostgres(in string, opts ...Opt) (string, error) {
11 | 	e, err := Parse(in, opts...)
12 | 	if err != nil {
13 | 		return "", err
14 | 	}
15 | 
16 | 	return postgres.Render(e)
17 | }
18 | 
19 | // ToParameterizedPostgres is a wrapper that will render the lucene expression string as a postgres sql filter string with parameters.
20 | // The returned string will contain placeholders for the parameters that can be passed directly to a Query statement.
21 | func ToParameterizedPostgres(in string, opts ...Opt) (s string, params []any, err error) {
22 | 	e, err := Parse(in, opts...)
23 | 	if err != nil {
24 | 		return "", nil, err
25 | 	}
26 | 
27 | 	return postgres.RenderParam(e)
28 | }
29 | 


--------------------------------------------------------------------------------
/release-process.md:
--------------------------------------------------------------------------------
 1 | # Release Process
 2 | 
 3 | ### Note this might be out of date, I have to figure out what is going on here
 4 | 
 5 | ## Rules for release branches:
 6 | 
 7 | -   If you are releasing a new major version you need to branch off of master into a branch `release-branch.v#` (example `release-branch.v2` for a 2.x release)
 8 | -   If you are releasing a minor or patch update to an existing major release make sure to merge master into the release branch
 9 | 
10 | ## Rules for tagging and publishing the release
11 | 
12 | When you are ready to publish the release make sure you...
13 | 
14 | 1. Merge your changes into the correct release branch.
15 | 2. Check out the release branch locally (example: `git pull origin release-branch.v3`)
16 | 3. Create a new tag for the specific release version you will publish (example: `git tag v3.0.1`)
17 | 4. Push the tag up to github (example: `git push origin v3.0.1`)
18 | 5. Check that the github action successfully finished and created a release
19 | 


--------------------------------------------------------------------------------
/cmd/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"os"
 7 | 
 8 | 	"github.com/grindlemire/go-lucene"
 9 | 	"github.com/grindlemire/go-lucene/pkg/driver"
10 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
11 | )
12 | 
13 | func main() {
14 | 	if len(os.Args) < 2 {
15 | 		fmt.Printf("Please provide a lucene query\n")
16 | 		os.Exit(1)
17 | 	}
18 | 
19 | 	e, err := lucene.Parse(os.Args[1])
20 | 	if err != nil {
21 | 		fmt.Printf("Error parsing: %s\n", err)
22 | 		os.Exit(1)
23 | 	}
24 | 
25 | 	fmt.Printf("Parsed  input: %s\n", e)
26 | 	fmt.Printf("Verbose input: %#v\n", e)
27 | 
28 | 	s, err := json.MarshalIndent(e, "", "  ")
29 | 	if err != nil {
30 | 		fmt.Printf("Error marshalling to json: %s\n", err)
31 | 		os.Exit(1)
32 | 	}
33 | 
34 | 	fmt.Printf("\n%s\n", s)
35 | 
36 | 	var e1 expr.Expression
37 | 	err = json.Unmarshal(s, &e1)
38 | 	if err != nil {
39 | 		fmt.Printf("Error unmarshalling from json: %s\n", err)
40 | 		os.Exit(1)
41 | 	}
42 | 
43 | 	sq, err := driver.NewPostgresDriver().Render(e)
44 | 	if err != nil {
45 | 		fmt.Printf("Error rendering sql: %s\n", err)
46 | 		os.Exit(1)
47 | 	}
48 | 
49 | 	fmt.Printf("Reparsed input: %v\n", e1)
50 | 	fmt.Printf("Verbose  input: %#v\n", e1)
51 | 	fmt.Printf("SQL     output: %s\n", sq)
52 | }
53 | 


--------------------------------------------------------------------------------
/pkg/driver/postgresql.go:
--------------------------------------------------------------------------------
 1 | package driver
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 
 7 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
 8 | )
 9 | 
10 | // PostgresDriver transforms a parsed lucene expression to a postgres sql filter.
11 | type PostgresDriver struct {
12 | 	Base
13 | }
14 | 
15 | // NewPostgresDriver creates a new driver that will output postgres filter strings from parsed lucene expressions.
16 | func NewPostgresDriver() PostgresDriver {
17 | 	fns := map[expr.Operator]RenderFN{
18 | 		expr.Literal: literal,
19 | 	}
20 | 
21 | 	for op, sharedFN := range Shared {
22 | 		_, found := fns[op]
23 | 		if !found {
24 | 			fns[op] = sharedFN
25 | 		}
26 | 	}
27 | 
28 | 	return PostgresDriver{
29 | 		Base{
30 | 			RenderFNs: fns,
31 | 		},
32 | 	}
33 | }
34 | 
35 | // RenderParam will render the expression into a parameterized query using PostgreSQL's $N placeholder format.
36 | // The returned string will contain $1, $2, $3, etc. placeholders and the params will contain the values
37 | // that should be passed to the query.
38 | func (p PostgresDriver) RenderParam(e *expr.Expression) (s string, params []any, err error) {
39 | 	// First, use the base implementation to get the result with ? placeholders
40 | 	str, params, err := p.Base.RenderParam(e)
41 | 	if err != nil {
42 | 		return s, params, err
43 | 	}
44 | 
45 | 	// Then convert ? placeholders to $N format
46 | 	paramIndex := 1
47 | 	result := strings.Builder{}
48 | 	i := 0
49 | 	for i < len(str) {
50 | 		if str[i] == '?' {
51 | 			result.WriteString(fmt.Sprintf("$%d", paramIndex))
52 | 			paramIndex++
53 | 		} else {
54 | 			result.WriteByte(str[i])
55 | 		}
56 | 		i++
57 | 	}
58 | 
59 | 	return result.String(), params, nil
60 | }
61 | 


--------------------------------------------------------------------------------
/.github/workflows/create-release.yml:
--------------------------------------------------------------------------------
 1 | name: Create Release
 2 | 
 3 | on:
 4 |     push:
 5 |         tags:
 6 |             - 'v*.*.*'
 7 | 
 8 | jobs:
 9 |     build:
10 |         runs-on: ubuntu-latest
11 |         steps:
12 |             - name: checkout repo
13 |               uses: actions/checkout@v3
14 | 
15 |             - name: setup go
16 |               uses: actions/setup-go@v3
17 |               with:
18 |                   go-version: 'stable'
19 | 
20 |             - name: set tagname
21 |               id: tag
22 |               run: echo "version=$(echo $GITHUB_REF | cut -d / -f 3)" >> $GITHUB_OUTPUT
23 | 
24 |             - name: set major version name
25 |               id: major_version
26 |               run: echo "version=$(echo $GITHUB_REF | cut -d / -f 3 | cut -d . -f 1)" >> $GITHUB_OUTPUT
27 | 
28 |             - name: test
29 |               run: go test -v ./...
30 | 
31 |             - name: fuzz test
32 |               run: go test -v ./fuzz
33 | 
34 |             - name: release
35 |               uses: softprops/action-gh-release@v1
36 |               with:
37 |                   tag_name: ${{ steps.tag.outputs.version }}
38 | 
39 |             - name: create release branch
40 |               uses: peterjgrainger/action-create-branch@v2.2.0
41 |               env:
42 |                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
43 |               with:
44 |                   branch: release-branch.${{ steps.major_version.outputs.version }}
45 | 
46 |             - name: merge to release branch
47 |               uses: tukasz/direct-merge-action@v2.0.2
48 |               with:
49 |                   source-branch: main
50 |                   target-branch: release-branch.${{ steps.major_version.outputs.version }}
51 |                   commit-message: Automatic merge from main for release ${{ steps.tag.outputs.version }}
52 |                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
53 | 


--------------------------------------------------------------------------------
/pkg/lucene/expr/operator.go:
--------------------------------------------------------------------------------
 1 | package expr
 2 | 
 3 | // Operator is an enum over the different valid lucene operations
 4 | type Operator int
 5 | 
 6 | // operations that can be used
 7 | // To add a new operator, do the following:
 8 | // 1. Add it to the iota here
 9 | // 2. Add it to the string maps below
10 | // 3. Add a render function for it at least in base, perhaps in all the drivers as well
11 | // 4. Update the json parsing and tests to support the new operator
12 | // 5. Add tests in parse_test and expression_test
13 | const (
14 | 	Undefined Operator = iota
15 | 	And
16 | 	Or
17 | 	Equals
18 | 	Like
19 | 	Not
20 | 	Range
21 | 	Must
22 | 	MustNot
23 | 	Boost
24 | 	Fuzzy
25 | 	Literal
26 | 	Wild
27 | 	Regexp
28 | 	Greater
29 | 	Less
30 | 	GreaterEq
31 | 	LessEq
32 | 	In
33 | 	List
34 | )
35 | 
36 | // String renders the operator as a string
37 | func (o Operator) String() string {
38 | 	return toString[o]
39 | }
40 | 
41 | var fromString = map[string]Operator{
42 | 	"AND":        And,
43 | 	"OR":         Or,
44 | 	"EQUALS":     Equals,
45 | 	"LIKE":       Like,
46 | 	"NOT":        Not,
47 | 	"RANGE":      Range,
48 | 	"MUST":       Must,
49 | 	"MUST_NOT":   MustNot,
50 | 	"BOOST":      Boost,
51 | 	"FUZZY":      Fuzzy,
52 | 	"LITERAL":    Literal,
53 | 	"WILD":       Wild,
54 | 	"REGEXP":     Regexp,
55 | 	"GREATER":    Greater,
56 | 	"LESS":       Less,
57 | 	"GREATER_EQ": GreaterEq,
58 | 	"LESS_EQ":    LessEq,
59 | 	"IN":         In,
60 | 	"LIST":       List,
61 | }
62 | 
63 | var toString = map[Operator]string{
64 | 	And:       "AND",
65 | 	Or:        "OR",
66 | 	Equals:    "EQUALS",
67 | 	Like:      "LIKE",
68 | 	Not:       "NOT",
69 | 	Range:     "RANGE",
70 | 	Must:      "MUST",
71 | 	MustNot:   "MUST_NOT",
72 | 	Boost:     "BOOST",
73 | 	Fuzzy:     "FUZZY",
74 | 	Literal:   "LITERAL",
75 | 	Wild:      "WILD",
76 | 	Regexp:    "REGEXP",
77 | 	Greater:   "GREATER",
78 | 	Less:      "LESS",
79 | 	GreaterEq: "GREATER_EQ",
80 | 	LessEq:    "LESS_EQ",
81 | 	In:        "IN",
82 | 	List:      "LIST",
83 | }
84 | 


--------------------------------------------------------------------------------
/.github/bump.yml:
--------------------------------------------------------------------------------
 1 | release:
 2 |     title-prefix: 'v'
 3 |     initial-version: '0.0.1'
 4 |     tag-prefix: 'v'
 5 |     commit-note-replacers:
 6 |         - replace-prefix: 'breaking: '
 7 |           new-prefix: ''
 8 |         - replace-prefix: 'feature: '
 9 |           new-prefix: ''
10 |         - replace-prefix: 'change: '
11 |           new-prefix: ''
12 |         - replace-prefix: 'fix: '
13 |           new-prefix: ''
14 |         - replace-prefix: 'document: '
15 |           new-prefix: ''
16 |         - replace-prefix: 'dependency: '
17 |           new-prefix: ''
18 | branch:
19 |     base-branch: main
20 |     version-branch-prefix: 'v'
21 |     bump-version-commit-prefix: 'v'
22 | categories:
23 |     - title: 'Breaking Changes!'
24 |       labels:
25 |           - 'BreakingChange'
26 |       commits:
27 |           - 'breaking:'
28 |       changes-prefix: ':warning: '
29 | 
30 |     - title: 'Changes'
31 |       labels:
32 |           - 'Feature'
33 |       commits:
34 |           - 'feature:'
35 |       changes-prefix: ':gift: '
36 | 
37 |     - title: 'Changes'
38 |       labels:
39 |           - Maintenance
40 |       commits:
41 |           - 'change:'
42 |       changes-prefix: ':hammer: '
43 | 
44 |     - title: 'Bug Fixes'
45 |       labels:
46 |           - 'Bug'
47 |       commits:
48 |           - 'fix:'
49 |       changes-prefix: ':ambulance: '
50 | 
51 |     - title: 'Changes'
52 |       labels:
53 |           - 'Documentation'
54 |       commits:
55 |           - 'document:'
56 |       changes-prefix: ':blue_book: '
57 | 
58 |     - title: 'Dependency Updates'
59 |       labels:
60 |           - 'Dependencies'
61 |       skip-label: 'Development'
62 |       commits:
63 |           - 'dependency:'
64 |       changes-prefix: ':green_book: '
65 | bump:
66 |     default: 'patch'
67 |     major:
68 |         labels:
69 |             - 'BreakingChange'
70 |         commits:
71 |             - 'breaking:'
72 |     minor:
73 |         labels:
74 |             - 'Feature'
75 |         commits:
76 |             - 'feature:'
77 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_test.go:
--------------------------------------------------------------------------------
 1 | package fuzz
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/grindlemire/go-lucene"
 8 | 	"github.com/grindlemire/go-lucene/pkg/driver"
 9 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
10 | 	pg_query "github.com/pganalyze/pg_query_go/v4"
11 | )
12 | 
13 | func FuzzPostgresDriver(f *testing.F) {
14 | 	tcs := []string{
15 | 		"A:B AND C:D",
16 | 		"+foo OR (NOT(B))",
17 | 		"A:bar",
18 | 		"NOT(b:c)",
19 | 		"z:[* TO 10]",
20 | 		"x:[10 TO *] AND NOT(y:[1 TO 5]",
21 | 		"(+a:b -c:d) OR (z:[1 TO *] NOT(foo))",
22 | 		`+bbq:"woo yay"`,
23 | 		`-bbq:"woo"`,
24 | 		`(a:b)^10`,
25 | 		`a:foo~`,
26 | 	}
27 | 	for _, tc := range tcs {
28 | 		f.Add(tc)
29 | 	}
30 | 
31 | 	f.Fuzz(func(t *testing.T, in string) {
32 | 		e, err := lucene.Parse(in)
33 | 		if err != nil {
34 | 			// Ignore invalid expressions.
35 | 			return
36 | 		}
37 | 
38 | 		validateRender(t, e)
39 | 
40 | 		// Test the default field option.
41 | 		e, err = lucene.Parse(in, lucene.WithDefaultField("default"))
42 | 		if err != nil {
43 | 			// Ignore invalid expressions.
44 | 			return
45 | 		}
46 | 
47 | 		validateRender(t, e)
48 | 	})
49 | }
50 | 
51 | func validateRender(t *testing.T, e *expr.Expression) {
52 | 	f, err := driver.NewPostgresDriver().Render(e)
53 | 	if err != nil {
54 | 		// Ignore errors that are expected.
55 | 		if strings.Contains(err.Error(), "unable to render operator") ||
56 | 			strings.Contains(err.Error(), "literal contains invalid utf8") ||
57 | 			strings.Contains(err.Error(), "literal contains null byte") ||
58 | 			strings.Contains(err.Error(), "column name contains a double quote") ||
59 | 			strings.Contains(err.Error(), "column name is empty") ||
60 | 			strings.Contains(err.Error(), "the BETWEEN operator needs a two item list in the right hand side") {
61 | 			return
62 | 		}
63 | 
64 | 		t.Fatal(err)
65 | 	}
66 | 
67 | 	j, err := pg_query.ParseToJSON("SELECT * FROM test WHERE a = b AND (" + f + ")")
68 | 	if err != nil {
69 | 		t.Fatal(err)
70 | 	}
71 | 
72 | 	if strings.Contains(j, "CommentStmt") {
73 | 		t.Fatal("CommentStmt found")
74 | 	}
75 | }
76 | 


--------------------------------------------------------------------------------
/fuzz/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
 2 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
 3 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
 4 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
 5 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 6 | github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
 7 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
 8 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 9 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
10 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
11 | github.com/google/go-cmp v0.5.1 h1:JFrFEBb2xKufg6XkJsJr+WbKb4FQlURi5RUcBveYu9k=
12 | github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
13 | github.com/grindlemire/go-lucene v0.0.14 h1:sYR1mxzvWlaQ1GTYbV6EFgswIyPQMweR2dVmUjV/pc8=
14 | github.com/grindlemire/go-lucene v0.0.14/go.mod h1:pJrIjVA07GCtlDuWTodRGcLOAiQDyWZfhHQY8DBT4jk=
15 | github.com/pganalyze/pg_query_go/v4 v4.2.3 h1:cNLqyiVMasV7YGWyYV+fkXyHp32gDfXVNCqoHztEGNk=
16 | github.com/pganalyze/pg_query_go/v4 v4.2.3/go.mod h1:aEkDNOXNM5j0YGzaAapwJ7LB3dLNj+bvbWcLv1hOVqA=
17 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
18 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
19 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
20 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
21 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
22 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
23 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
24 | google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
25 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
26 | 


--------------------------------------------------------------------------------
/pkg/driver/postgresql_test.go:
--------------------------------------------------------------------------------
  1 | package driver
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
  7 | )
  8 | 
  9 | const errTemplate = "%s:\n    wanted %s\n    got    %s"
 10 | 
 11 | func TestSQLDriver(t *testing.T) {
 12 | 	type tc struct {
 13 | 		input *expr.Expression
 14 | 		want  string
 15 | 	}
 16 | 
 17 | 	tcs := map[string]tc{
 18 | 		"simple_equals": {
 19 | 			input: expr.Eq("a", 5),
 20 | 			want:  `"a" = 5`,
 21 | 		},
 22 | 		"simple_and": {
 23 | 			input: expr.AND(expr.Eq("a", 5), expr.Eq("b", "foo")),
 24 | 			want:  `("a" = 5) AND ("b" = 'foo')`,
 25 | 		},
 26 | 		"simple_or": {
 27 | 			input: expr.OR(expr.Eq("a", 5), expr.Eq("b", "foo")),
 28 | 			want:  `("a" = 5) OR ("b" = 'foo')`,
 29 | 		},
 30 | 		"simple_not": {
 31 | 			input: expr.NOT(expr.Eq("a", 1)),
 32 | 			want:  `NOT("a" = 1)`,
 33 | 		},
 34 | 		"simple_like": {
 35 | 			input: expr.LIKE("a", "%(b|d)%"),
 36 | 			want:  `"a" SIMILAR TO '%(b|d)%'`,
 37 | 		},
 38 | 		"string_range": {
 39 | 			input: expr.Rang("a", "foo", "bar", true),
 40 | 			want:  `"a" BETWEEN 'foo' AND 'bar'`,
 41 | 		},
 42 | 		"mixed_number_range": {
 43 | 			input: expr.Rang("a", 1.1, 10, true),
 44 | 			want:  `"a" >= 1.10 AND "a" <= 10.00`,
 45 | 		},
 46 | 		"mixed_number_range_exlusive": {
 47 | 			input: expr.Rang("a", 1, 10.1, false),
 48 | 			want:  `"a" > 1.00 AND "a" < 10.10`,
 49 | 		},
 50 | 		"int_range": {
 51 | 			input: expr.Rang("a", 1, 10, true),
 52 | 			want:  `"a" >= 1 AND "a" <= 10`,
 53 | 		},
 54 | 		"int_range_exlusive": {
 55 | 			input: expr.Rang("a", 1, 10, false),
 56 | 			want:  `"a" > 1 AND "a" < 10`,
 57 | 		},
 58 | 		"float_range": {
 59 | 			input: expr.Rang("a", 1.0, 10.0, true),
 60 | 			want:  `"a" >= 1 AND "a" <= 10`,
 61 | 		},
 62 | 		"float_range_exlusive": {
 63 | 			input: expr.Rang("a", 1.0, 10.0, false),
 64 | 			want:  `"a" > 1 AND "a" < 10`,
 65 | 		},
 66 | 		"lt_range": {
 67 | 			input: expr.Rang("a", "*", 10, false),
 68 | 			want:  `"a" < 10`,
 69 | 		},
 70 | 		"lte_range": {
 71 | 			input: expr.Rang("a", "*", 10, true),
 72 | 			want:  `"a" <= 10`,
 73 | 		},
 74 | 		"gt_range": {
 75 | 			input: expr.Rang("a", 1, "*", false),
 76 | 			want:  `"a" > 1`,
 77 | 		},
 78 | 		"gte_range": {
 79 | 			input: expr.Rang("a", 1, "*", true),
 80 | 			want:  `"a" >= 1`,
 81 | 		},
 82 | 		"lt": {
 83 | 			input: expr.LESS("a", 10),
 84 | 			want:  `"a" < 10`,
 85 | 		},
 86 | 		"lte": {
 87 | 			input: expr.LESSEQ("a", 10),
 88 | 			want:  `"a" <= 10`,
 89 | 		},
 90 | 		"gt": {
 91 | 			input: expr.GREATER("a", 10),
 92 | 			want:  `"a" > 10`,
 93 | 		},
 94 | 		"gte": {
 95 | 			input: expr.GREATEREQ("a", 10),
 96 | 			want:  `"a" >= 10`,
 97 | 		},
 98 | 		"must_ignored": {
 99 | 			input: expr.MUST(expr.Eq("a", 1)),
100 | 			want:  `"a" = 1`,
101 | 		},
102 | 		"nested_filter": {
103 | 			input: expr.Expr(
104 | 				expr.Expr(
105 | 					expr.Expr(
106 | 						"a",
107 | 						expr.Equals,
108 | 						"foo",
109 | 					),
110 | 					expr.Or,
111 | 					expr.Expr(
112 | 						"b",
113 | 						expr.Equals,
114 | 						expr.REGEXP("/b*ar/"),
115 | 					),
116 | 				),
117 | 				expr.And,
118 | 				expr.Expr(
119 | 					expr.Rang("c", "aaa", "*", false),
120 | 					expr.Not,
121 | 				),
122 | 			),
123 | 			want: `(("a" = 'foo') OR ("b" ~ '/b*ar/')) AND (NOT("c" BETWEEN 'aaa' AND '*'))`,
124 | 		},
125 | 		"space_in_fieldname": {
126 | 			input: expr.Eq("a b", 1),
127 | 			want:  `"a b" = 1`,
128 | 		},
129 | 		"equals_in_equals": {
130 | 			input: expr.Eq("a", expr.Eq("b", 1)),
131 | 			want:  `"a" = ("b" = 1)`,
132 | 		},
133 | 		"regexp": {
134 | 			input: expr.REGEXP("/b*ar/"),
135 | 			want:  `'/b*ar/'`,
136 | 		},
137 | 	}
138 | 
139 | 	for name, tc := range tcs {
140 | 		t.Run(name, func(t *testing.T) {
141 | 			got, err := NewPostgresDriver().Render(tc.input)
142 | 			if err != nil {
143 | 				t.Fatalf("got an unexpected error when rendering: %v", err)
144 | 			}
145 | 
146 | 			if tc.want != got {
147 | 				t.Fatalf(errTemplate, "generated sql does not match", tc.want, got)
148 | 			}
149 | 		})
150 | 	}
151 | }
152 | 


--------------------------------------------------------------------------------
/pkg/lucene/expr/renderer.go:
--------------------------------------------------------------------------------
  1 | package expr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | )
  7 | 
  8 | type renderer func(e *Expression, verbose bool) string
  9 | 
 10 | var renderers = map[Operator]renderer{
 11 | 	Equals:    renderEquals,
 12 | 	And:       renderBasic,
 13 | 	Or:        renderBasic,
 14 | 	Not:       renderWrapper,
 15 | 	Range:     renderRange,
 16 | 	Must:      renderMust,
 17 | 	MustNot:   renderMustNot,
 18 | 	Boost:     renderBoost,
 19 | 	Fuzzy:     renderFuzzy,
 20 | 	Literal:   renderLiteral,
 21 | 	Wild:      renderLiteral,
 22 | 	Regexp:    renderLiteral,
 23 | 	Greater:   renderBasic,
 24 | 	Less:      renderBasic,
 25 | 	GreaterEq: renderBasic,
 26 | 	LessEq:    renderBasic,
 27 | 	Like:      renderBasic,
 28 | 	In:        renderBasic,
 29 | 	List:      renderList,
 30 | }
 31 | 
 32 | func renderEquals(e *Expression, verbose bool) string {
 33 | 	if verbose {
 34 | 		return fmt.Sprintf("%#v:%#v", e.Left, e.Right)
 35 | 	}
 36 | 	return fmt.Sprintf("%s:%s", e.Left, e.Right)
 37 | }
 38 | 
 39 | func renderBasic(e *Expression, verbose bool) string {
 40 | 	if verbose {
 41 | 		return fmt.Sprintf("(%#v) %s (%#v)", e.Left, toString[e.Op], e.Right)
 42 | 	}
 43 | 	return fmt.Sprintf("%s %s %s", e.Left, toString[e.Op], e.Right)
 44 | }
 45 | 
 46 | func renderWrapper(e *Expression, verbose bool) string {
 47 | 	if verbose {
 48 | 		return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left)
 49 | 	}
 50 | 	return fmt.Sprintf("%s(%s)", toString[e.Op], e.Left)
 51 | }
 52 | 
 53 | func renderMustNot(e *Expression, verbose bool) string {
 54 | 	if verbose {
 55 | 		return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left)
 56 | 	}
 57 | 	return fmt.Sprintf("-%s", e.Left)
 58 | }
 59 | 
 60 | func renderMust(e *Expression, verbose bool) string {
 61 | 	if verbose {
 62 | 		return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left)
 63 | 	}
 64 | 	return fmt.Sprintf("+%s", e.Left)
 65 | }
 66 | 
 67 | func renderBoost(e *Expression, verbose bool) string {
 68 | 	if verbose {
 69 | 		if e.boostPower > 1 {
 70 | 			return fmt.Sprintf("%s(%#v^%.1f)", toString[e.Op], e.Left, e.boostPower)
 71 | 		}
 72 | 
 73 | 		return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left)
 74 | 	}
 75 | 
 76 | 	if e.boostPower > 1 {
 77 | 		return fmt.Sprintf("%s^%.1f", e.Left, e.boostPower)
 78 | 	}
 79 | 
 80 | 	return fmt.Sprintf("%s^", e.Left)
 81 | }
 82 | 
 83 | func renderFuzzy(e *Expression, verbose bool) string {
 84 | 	if verbose {
 85 | 		if e.fuzzyDistance > 1 {
 86 | 			return fmt.Sprintf("%s(%#v~%d)", toString[e.Op], e.Left, e.fuzzyDistance)
 87 | 		}
 88 | 
 89 | 		return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left)
 90 | 	}
 91 | 
 92 | 	if e.fuzzyDistance > 1 {
 93 | 		return fmt.Sprintf("%s~%d", e.Left, e.fuzzyDistance)
 94 | 	}
 95 | 
 96 | 	return fmt.Sprintf("%s~", e.Left)
 97 | }
 98 | 
 99 | func renderRange(e *Expression, verbose bool) string {
100 | 	boundary := e.Right.(*RangeBoundary)
101 | 	if verbose {
102 | 		if boundary.Inclusive {
103 | 			return fmt.Sprintf("%#v:[%#v TO %#v]", e.Left, boundary.Min, boundary.Max)
104 | 		}
105 | 
106 | 		return fmt.Sprintf("%#v:{%#v TO %#v}", e.Left, boundary.Min, boundary.Max)
107 | 	}
108 | 	if boundary.Inclusive {
109 | 		return fmt.Sprintf("%s:[%s TO %s]", e.Left, boundary.Min, boundary.Max)
110 | 	}
111 | 
112 | 	return fmt.Sprintf("%s:{%s TO %s}", e.Left, boundary.Min, boundary.Max)
113 | }
114 | 
115 | func renderList(e *Expression, verbose bool) string {
116 | 	vals := e.Left.([]*Expression)
117 | 	strs := []string{}
118 | 	for _, v := range vals {
119 | 		if verbose {
120 | 			strs = append(strs, fmt.Sprintf("%#v", v.Left))
121 | 			continue
122 | 		}
123 | 		strs = append(strs, fmt.Sprintf("%s", v.Left))
124 | 	}
125 | 
126 | 	if verbose {
127 | 		return fmt.Sprintf("LIST(%s)", strings.Join(strs, ", "))
128 | 	}
129 | 
130 | 	return fmt.Sprintf("(%s)", strings.Join(strs, ", "))
131 | }
132 | 
133 | func renderLiteral(e *Expression, verbose bool) string {
134 | 	if verbose {
135 | 		return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left)
136 | 	}
137 | 
138 | 	s, isStr := e.Left.(string)
139 | 	if isStr && strings.ContainsAny(s, " ") {
140 | 		return fmt.Sprintf(`"%s"`, s)
141 | 	}
142 | 
143 | 	return fmt.Sprintf("%v", e.Left)
144 | }
145 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # go-lucene
  2 | 
  3 | [![Go Reference](https://pkg.go.dev/badge/github.com/grindlemire/go-lucene.svg)](https://pkg.go.dev/github.com/grindlemire/go-lucene)
  4 | 
  5 | A zero-dependency Lucene query parser for Go that converts Lucene syntax into SQL queries.
  6 | 
  7 | ## Features
  8 | 
  9 | - Full Lucene syntax support (compatible with [Apache Lucene 9.4.2](https://lucene.apache.org/core/9_4_2/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package.description))
 10 | - SQL injection safe with parameterized queries
 11 | - Zero dependencies
 12 | - Extensible with custom SQL drivers
 13 | - PostgreSQL support out of the box
 14 | 
 15 | ## Installation
 16 | 
 17 | ```bash
 18 | go get github.com/grindlemire/go-lucene
 19 | ```
 20 | 
 21 | ## Basic Usage
 22 | 
 23 | ```go
 24 | query := `name:"John Doe" AND age:[25 TO 35]`
 25 | filter, err := lucene.ToPostgres(query)
 26 | // Result: (("name" = 'John Doe') AND ("age" >= 25 AND "age" <= 35))
 27 | ```
 28 | 
 29 | 
 30 | ## API Methods
 31 | 
 32 | ### Direct SQL Generation
 33 | ```go
 34 | filter, err := lucene.ToPostgres(query)
 35 | ```
 36 | 
 37 | ### Parameterized Queries (Recommended)
 38 | ```go
 39 | filter, params, err := lucene.ToParameterizedPostgres(query)
 40 | db.Query(sql, params...)
 41 | ```
 42 | 
 43 | ### Default Fields
 44 | ```go
 45 | filter, err := lucene.ToPostgres("red OR green", lucene.WithDefaultField("color"))
 46 | // Result: ("color" = 'red') OR ("color" = 'green')
 47 | ```
 48 | 
 49 | ## Lucene to SQL Operator Mapping
 50 | 
 51 | | Lucene Query | SQL Output | Description |
 52 | |--------------|------------|-------------|
 53 | | `field:value` | `"field" = 'value'` | Exact match |
 54 | | `field:"phrase with spaces"` | `"field" = 'phrase with spaces'` | Quoted phrase |
 55 | | `field1:value1 AND field2:value2` | `("field1" = 'value1') AND ("field2" = 'value2')` | Boolean AND |
 56 | | `field1:value1 OR field2:value2` | `("field1" = 'value1') OR ("field2" = 'value2')` | Boolean OR |
 57 | | `NOT field:value` | `NOT("field" = 'value')` | Boolean NOT |
 58 | | `+field:value` | `"field" = 'value'` | Required (equivalent to no operator) |
 59 | | `-field:value` | `NOT("field" = 'value')` | Prohibited (equivalent to NOT) |
 60 | | `field:[min TO max]` | `"field" >= min AND "field" <= max` | Inclusive range |
 61 | | `field:{min TO max}` | `"field" BETWEEN 'min' AND 'max'` (strings) or `"field" > min AND "field" < max` (numbers) | Exclusive range |
 62 | | `field:[min TO *]` | `"field" >= min` | Open-ended range (min to infinity) |
 63 | | `field:[* TO max]` | `"field" <= max` | Open-ended range (negative infinity to max) |
 64 | | `field:*` | `"field" SIMILAR TO '%'` | Wildcard match (matches anything) |
 65 | | `field:pattern*` | `"field" SIMILAR TO 'pattern%'` | Wildcard suffix |
 66 | | `field:pattern?` | `"field" SIMILAR TO 'pattern_'` | Single character wildcard |
 67 | | `field:/regex/` | `"field" ~ '/regex/'` | Regular expression match |
 68 | | `(field1:value1 OR field2:value2) AND field3:value3` | `(("field1" = 'value1') OR ("field2" = 'value2')) AND ("field3" = 'value3')` | Grouping |
 69 | 
 70 | ## Examples
 71 | 
 72 | ### Complex Query
 73 | ```go
 74 | query := `name:"John Doe" AND age:[25 TO 35] AND NOT status:inactive`
 75 | // SQL: (("name" = 'John Doe') AND ("age" >= 25 AND "age" <= 35)) AND (NOT("status" = 'inactive'))
 76 | ```
 77 | 
 78 | ### Parameterized Output
 79 | ```go
 80 | filter, params, err := lucene.ToParameterizedPostgres(`color:red AND type:"gala"`)
 81 | // SQL: ("color" = $1) AND ("type" = $2)
 82 | // Params: ["red", "gala"]
 83 | ```
 84 | 
 85 | ### Wildcard Queries
 86 | ```go
 87 | filter, err := lucene.ToPostgres(`name:John* AND email:*@example.com`)
 88 | // SQL: ("name" SIMILAR TO 'John%') AND ("email" SIMILAR TO '%@example.com')
 89 | ```
 90 | 
 91 | ### Regular Expression Queries
 92 | ```go
 93 | filter, err := lucene.ToPostgres(`url:/example\.com\/.*\/`)
 94 | // SQL: "url" ~ '/example\.com\/.*\/'
 95 | ```
 96 | 
 97 | ## Custom SQL Drivers
 98 | 
 99 | Extend the library for different SQL dialects by creating custom drivers:
100 | 
101 | ```go
102 | import (
103 |     "github.com/grindlemire/go-lucene/pkg/driver"
104 |     "github.com/grindlemire/go-lucene/pkg/lucene/expr"
105 | )
106 | 
107 | type MySQLDriver struct {
108 |     driver.Base
109 | }
110 | 
111 | func NewMySQLDriver() MySQLDriver {
112 |     fns := map[expr.Operator]driver.RenderFN{
113 |         expr.Equals: func(left, right string) (string, error) {
114 |             return fmt.Sprintf("`%s` = %s", left, right), nil
115 |         },
116 |     }
117 | 
118 |     // Use shared functions for other operators
119 |     for op, sharedFN := range driver.Shared {
120 |         if _, exists := fns[op]; !exists {
121 |             fns[op] = sharedFN
122 |         }
123 |     }
124 | 
125 |     return MySQLDriver{Base: driver.Base{RenderFNs: fns}}
126 | }
127 | 
128 | // Usage
129 | mysqlDriver := NewMySQLDriver()
130 | expr, _ := lucene.Parse(`color:red`)
131 | filter, _ := mysqlDriver.Render(expr)
132 | // Result: `color` = 'red'
133 | ```
134 | 


--------------------------------------------------------------------------------
/internal/lex/lext_test.go:
--------------------------------------------------------------------------------
  1 | package lex
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"strings"
  6 | 	"testing"
  7 | )
  8 | 
  9 | const errTemplate = "%s:\n    wanted %v\n    got    %v"
 10 | 
 11 | func TestLex(t *testing.T) {
 12 | 	type tc struct {
 13 | 		in       string
 14 | 		expected []Token
 15 | 	}
 16 | 	tcs := map[string]tc{
 17 | 		"empty_returns_eof": {
 18 | 			in:       "",
 19 | 			expected: []Token{tok(TEOF, "EOF")},
 20 | 		},
 21 | 		"negatives": {
 22 | 			in:       "-1",
 23 | 			expected: []Token{tok(TLiteral, "-1")},
 24 | 		},
 25 | 		"negatives_mixed_with_minus": {
 26 | 			in: "a:-1 AND -b:c",
 27 | 			expected: []Token{
 28 | 				tok(TLiteral, "a"),
 29 | 				tok(TColon, ":"),
 30 | 				tok(TLiteral, "-1"),
 31 | 				tok(TAnd, "AND"),
 32 | 				tok(TMinus, "-"),
 33 | 				tok(TLiteral, "b"),
 34 | 				tok(TColon, ":"),
 35 | 				tok(TLiteral, "c"),
 36 | 			},
 37 | 		},
 38 | 		"negatives_in_elastic_comparison": {
 39 | 			in: "a:<-10 AND -b:>=20",
 40 | 			expected: []Token{
 41 | 				tok(TLiteral, "a"),
 42 | 				tok(TColon, ":"),
 43 | 				tok(TLess, "<"),
 44 | 				tok(TLiteral, "-10"),
 45 | 				tok(TAnd, "AND"),
 46 | 				tok(TMinus, "-"),
 47 | 				tok(TLiteral, "b"),
 48 | 				tok(TColon, ":"),
 49 | 				tok(TGreater, ">"),
 50 | 				tok(TEqual, "="),
 51 | 				tok(TLiteral, "20"),
 52 | 			},
 53 | 		},
 54 | 		"literals": {
 55 | 			in:       "abc",
 56 | 			expected: []Token{tok(TLiteral, "abc")},
 57 | 		},
 58 | 		"spaces_ignored": {
 59 | 			in: "ab c",
 60 | 			expected: []Token{
 61 | 				tok(TLiteral, "ab"),
 62 | 				tok(TLiteral, "c"),
 63 | 			},
 64 | 		},
 65 | 		"quotes_single_token": {
 66 | 			in: `"abc"`,
 67 | 			expected: []Token{
 68 | 				tok(TQuoted, "\"abc\""),
 69 | 			},
 70 | 		},
 71 | 		"single_quotes_single_token": {
 72 | 			in: `'abc'`,
 73 | 			expected: []Token{
 74 | 				tok(TQuoted, "'abc'"),
 75 | 			},
 76 | 		},
 77 | 		"quotes_single_token_with_spaces": {
 78 | 			in: `"ab c"`,
 79 | 			expected: []Token{
 80 | 				tok(TQuoted, "\"ab c\""),
 81 | 			},
 82 | 		},
 83 | 		"single_quotes_single_token_with_spaces": {
 84 | 			in: `'ab c'`,
 85 | 			expected: []Token{
 86 | 				tok(TQuoted, "'ab c'"),
 87 | 			},
 88 | 		},
 89 | 		"parens_tokenized": {
 90 | 			in: `(ABC)`,
 91 | 			expected: []Token{
 92 | 				tok(TLParen, "("),
 93 | 				tok(TLiteral, "ABC"),
 94 | 				tok(TRParen, ")"),
 95 | 			},
 96 | 		},
 97 | 		"equals_operator_tokenized_in_stream": {
 98 | 			in: `a = b`,
 99 | 			expected: []Token{
100 | 				tok(TLiteral, "a"),
101 | 				tok(TEqual, "="),
102 | 				tok(TLiteral, "b"),
103 | 			},
104 | 		},
105 | 		"equals_operator_lucene_tokenized_in_stream": {
106 | 			in: `a:b`,
107 | 			expected: []Token{
108 | 				tok(TLiteral, "a"),
109 | 				tok(TColon, ":"),
110 | 				tok(TLiteral, "b"),
111 | 			},
112 | 		},
113 | 		"and_boolean_tokenized": {
114 | 			in: `a AND b`,
115 | 			expected: []Token{
116 | 				tok(TLiteral, "a"),
117 | 				tok(TAnd, "AND"),
118 | 				tok(TLiteral, "b"),
119 | 			},
120 | 		},
121 | 		"or_boolean_tokenized": {
122 | 			in: `a OR b`,
123 | 			expected: []Token{
124 | 				tok(TLiteral, "a"),
125 | 				tok(TOr, "OR"),
126 | 				tok(TLiteral, "b"),
127 | 			},
128 | 		},
129 | 		"not_boolean_tokenized": {
130 | 			in: `NOT a`,
131 | 			expected: []Token{
132 | 				tok(TNot, "NOT"),
133 | 				tok(TLiteral, "a"),
134 | 			},
135 | 		},
136 | 		"to_tokenized": {
137 | 			in: `a TO b`,
138 | 			expected: []Token{
139 | 				tok(TLiteral, "a"),
140 | 				tok(TTO, "TO"),
141 | 				tok(TLiteral, "b"),
142 | 			},
143 | 		},
144 | 		"regexp_tokenized": {
145 | 			in: `/a[b]*/`,
146 | 			expected: []Token{
147 | 				tok(TRegexp, "/a[b]*/"),
148 | 			},
149 | 		},
150 | 		"regexp_tokenized_with_escaped_chars": {
151 | 			in: `/.*example.com\/article\/.*/`,
152 | 			expected: []Token{
153 | 				tok(TRegexp, `/.*example.com\/article\/.*/`),
154 | 			},
155 | 		},
156 | 		"symbols_tokenized": {
157 | 			in: `()[]{}:+-=><`,
158 | 			expected: []Token{
159 | 				tok(TLParen, "("),
160 | 				tok(TRParen, ")"),
161 | 				tok(TLSquare, "["),
162 | 				tok(TRSquare, "]"),
163 | 				tok(TLCurly, "{"),
164 | 				tok(TRCurly, "}"),
165 | 				tok(TColon, ":"),
166 | 				tok(TPlus, "+"),
167 | 				tok(TMinus, "-"),
168 | 				tok(TEqual, "="),
169 | 				tok(TGreater, ">"),
170 | 				tok(TLess, "<"),
171 | 			},
172 | 		},
173 | 		"token_boost": {
174 | 			in: "a:b^2 foo^4",
175 | 			expected: []Token{
176 | 				tok(TLiteral, "a"),
177 | 				tok(TColon, ":"),
178 | 				tok(TLiteral, "b"),
179 | 				tok(TCarrot, "^"),
180 | 				tok(TLiteral, "2"),
181 | 				tok(TLiteral, "foo"),
182 | 				tok(TCarrot, "^"),
183 | 				tok(TLiteral, "4"),
184 | 			},
185 | 		},
186 | 		"token_boost_floats": {
187 | 			in: "a:b^2.1 foo^4.40",
188 | 			expected: []Token{
189 | 				tok(TLiteral, "a"),
190 | 				tok(TColon, ":"),
191 | 				tok(TLiteral, "b"),
192 | 				tok(TCarrot, "^"),
193 | 				tok(TLiteral, "2.1"),
194 | 				tok(TLiteral, "foo"),
195 | 				tok(TCarrot, "^"),
196 | 				tok(TLiteral, "4.40"),
197 | 			},
198 | 		},
199 | 		"entire_stream_tokenized": {
200 | 			in: `(+k1:v1 AND -k2:v2) OR k3:"foo bar"^2 OR k4:a*~10`,
201 | 			expected: []Token{
202 | 				tok(TLParen, "("),
203 | 				tok(TPlus, "+"),
204 | 				tok(TLiteral, "k1"),
205 | 				tok(TColon, ":"),
206 | 				tok(TLiteral, "v1"),
207 | 				tok(TAnd, "AND"),
208 | 				tok(TMinus, "-"),
209 | 				tok(TLiteral, "k2"),
210 | 				tok(TColon, ":"),
211 | 				tok(TLiteral, "v2"),
212 | 				tok(TRParen, ")"),
213 | 				tok(TOr, "OR"),
214 | 				tok(TLiteral, "k3"),
215 | 				tok(TColon, ":"),
216 | 				tok(TQuoted, "\"foo bar\""),
217 | 				tok(TCarrot, "^"),
218 | 				tok(TLiteral, "2"),
219 | 				tok(TOr, "OR"),
220 | 				tok(TLiteral, "k4"),
221 | 				tok(TColon, ":"),
222 | 				tok(TLiteral, "a*"),
223 | 				tok(TTilde, "~"),
224 | 				tok(TLiteral, "10"),
225 | 			},
226 | 		},
227 | 		"escape_sequence_tokenized": {
228 | 			in: `\(1\+1\)\:2`,
229 | 			expected: []Token{
230 | 				tok(TLiteral, `\(1\+1\)\:2`),
231 | 			},
232 | 		},
233 | 		"quoted_sequence_tokensized": {
234 | 			in: `"foo bar":"works well"`,
235 | 			expected: []Token{
236 | 				tok(TQuoted, "\"foo bar\""),
237 | 				tok(TColon, ":"),
238 | 				tok(TQuoted, "\"works well\""),
239 | 			},
240 | 		},
241 | 	}
242 | 
243 | 	for name, tc := range tcs {
244 | 		t.Run(name, func(t *testing.T) {
245 | 			tokens := consumeAll(tc.in)
246 | 			tc.expected = finalizeExpected(tc.in, tc.expected)
247 | 			if !reflect.DeepEqual(tc.expected, tokens) {
248 | 				t.Fatalf(errTemplate, "token streams don't match", tc.expected, tokens)
249 | 			}
250 | 		})
251 | 	}
252 | }
253 | 
254 | func finalizeExpected(in string, tokens []Token) (out []Token) {
255 | 	// if we are testing just the EOF return early and don't do anything
256 | 	if tokens[0].Typ == TEOF {
257 | 		return tokens
258 | 	}
259 | 
260 | 	offset := 0
261 | 	for idx, token := range tokens {
262 | 		sliced := in[offset:]
263 | 
264 | 		// if its an error then we don't have any offset to calculate
265 | 		if token.Typ == TErr {
266 | 			tokens[idx].pos = offset
267 | 			continue
268 | 		}
269 | 
270 | 		// calculate the position of the new token in the string
271 | 		tokens[idx].pos = strings.Index(sliced, token.Val) + offset
272 | 
273 | 		// handle the whitespace that pops up so we keep the offset in sync
274 | 		whitespaceOffset := movePastWhitespace(sliced)
275 | 		offset += len(token.Val) + whitespaceOffset
276 | 	}
277 | 
278 | 	// if we didn't end in an error, add in an EOF token at the end
279 | 	if tokens[len(tokens)-1].Typ != TErr {
280 | 		tokens = append(tokens, Token{TEOF, len(in), "EOF"})
281 | 	}
282 | 	return tokens
283 | }
284 | 
285 | func movePastWhitespace(in string) (count int) {
286 | 	for _, c := range in {
287 | 		if !isSpace(c) {
288 | 			return count
289 | 		}
290 | 		count++
291 | 	}
292 | 	return count
293 | }
294 | 
295 | func consumeAll(in string) (toks []Token) {
296 | 	l := Lex(in)
297 | 	for {
298 | 		tok := l.Next()
299 | 		toks = append(toks, tok)
300 | 		if tok.Typ == TEOF || tok.Typ == TErr {
301 | 			return toks
302 | 		}
303 | 	}
304 | }
305 | 
306 | func tok(typ TokType, val string) Token {
307 | 	return Token{
308 | 		Typ: typ,
309 | 		// there is intentionally no pos set because we are doing it in generate
310 | 		Val: val,
311 | 	}
312 | }
313 | 


--------------------------------------------------------------------------------
/pkg/driver/base.go:
--------------------------------------------------------------------------------
  1 | package driver
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 
  7 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
  8 | )
  9 | 
 10 | // Shared is the shared set of render functions that can be used as a base and overriden
 11 | // for each flavor of sql
 12 | var Shared = map[expr.Operator]RenderFN{
 13 | 	expr.Literal: literal,
 14 | 	expr.And:     basicCompound(expr.And),
 15 | 	expr.Or:      basicCompound(expr.Or),
 16 | 	expr.Not:     basicWrap(expr.Not),
 17 | 	expr.Equals:  equals,
 18 | 	expr.Range:   rang,
 19 | 	expr.Must:    noop,                // must doesn't really translate to sql
 20 | 	expr.MustNot: basicWrap(expr.Not), // must not is really just a negation
 21 | 	// expr.Fuzzy:     unsupported,
 22 | 	// expr.Boost:     unsupported,
 23 | 	expr.Wild:      literal,
 24 | 	expr.Regexp:    literal,
 25 | 	expr.Like:      like,
 26 | 	expr.Greater:   greater,
 27 | 	expr.GreaterEq: greaterEq,
 28 | 	expr.Less:      less,
 29 | 	expr.LessEq:    lessEq,
 30 | 	expr.In:        inFn,
 31 | 	expr.List:      list,
 32 | }
 33 | 
 34 | // Base is the base driver that is embedded in each driver
 35 | type Base struct {
 36 | 	RenderFNs map[expr.Operator]RenderFN
 37 | }
 38 | 
 39 | // RenderParam will render the expression into a parameterized query. The returned string will contain placeholders
 40 | // and the params will contain the values that should be passed to the query.
 41 | func (b Base) RenderParam(e *expr.Expression) (s string, params []any, err error) {
 42 | 	if e == nil {
 43 | 		return "", params, nil
 44 | 	}
 45 | 
 46 | 	left, lparams, err := b.serializeParams(e.Left)
 47 | 	if err != nil {
 48 | 		return s, params, err
 49 | 	}
 50 | 
 51 | 	right, rparams, err := b.serializeParams(e.Right)
 52 | 	if err != nil {
 53 | 		return s, params, err
 54 | 	}
 55 | 
 56 | 	// edge case for a standalone wildcard on a like operator.
 57 | 	// Convert to a regular expression that matches anything
 58 | 	if right == "'*'" && e.Op == expr.Like {
 59 | 		right = "?"
 60 | 		rparams = []any{"%"}
 61 | 	}
 62 | 
 63 | 	// if we are in a regular expression we need to convert the * to % and ? to _
 64 | 	if e.Op == expr.Like && len(rparams) > 0 {
 65 | 		rval := rparams[0].(string)
 66 | 		// keep the regexp intact if it is a // regexp
 67 | 		if len(rval) < 4 || rval[0] != '/' || rval[len(rval)-1] != '/' {
 68 | 			rval = strings.ReplaceAll(rval, "*", "%")
 69 | 			rval = strings.ReplaceAll(rval, "?", "_")
 70 | 			rparams[0] = rval
 71 | 		}
 72 | 	}
 73 | 
 74 | 	params = append(lparams, rparams...)
 75 | 
 76 | 	if e.Op != expr.Range &&
 77 | 		e.Op != expr.Not &&
 78 | 		e.Op != expr.List &&
 79 | 		e.Op != expr.In &&
 80 | 		e.Op != expr.Literal &&
 81 | 		e.Op != expr.Must &&
 82 | 		e.Op != expr.MustNot {
 83 | 		if !b.isSimple(e.Left) {
 84 | 			left = "(" + left + ")"
 85 | 		}
 86 | 		if !b.isSimple(e.Right) {
 87 | 			right = "(" + right + ")"
 88 | 		}
 89 | 	}
 90 | 
 91 | 	// if we have a like operator then we need to use the likeParam function instead of the default
 92 | 	// since we are replacing all the * with % and ? with _
 93 | 	if e.Op == expr.Like {
 94 | 		str, err := likeParam(left, right, rparams)
 95 | 		return str, params, err
 96 | 	}
 97 | 
 98 | 	// if we have a range operator then we need to use the rangParam function instead of the default
 99 | 	// since we need to be able to infer the param types that are injected
100 | 	if e.Op == expr.Range {
101 | 		str, err := rangParam(left, right, rparams)
102 | 		return str, params, err
103 | 	}
104 | 
105 | 	fn, ok := b.RenderFNs[e.Op]
106 | 	if !ok {
107 | 		return s, params, fmt.Errorf("unable to render operator [%s]", e.Op)
108 | 	}
109 | 
110 | 	str, err := fn(left, right)
111 | 	return str, params, err
112 | }
113 | 
114 | // Render will render the expression based on the renderFNs provided by the driver.
115 | func (b Base) Render(e *expr.Expression) (s string, err error) {
116 | 	if e == nil {
117 | 		return "", nil
118 | 	}
119 | 
120 | 	left, err := b.serialize(e.Left)
121 | 	if err != nil {
122 | 		return s, err
123 | 	}
124 | 
125 | 	right, err := b.serialize(e.Right)
126 | 	if err != nil {
127 | 		return s, err
128 | 	}
129 | 
130 | 	if e.Op != expr.Range &&
131 | 		e.Op != expr.Not &&
132 | 		e.Op != expr.List &&
133 | 		e.Op != expr.In &&
134 | 		e.Op != expr.Literal &&
135 | 		e.Op != expr.Must &&
136 | 		e.Op != expr.MustNot {
137 | 		if !b.isSimple(e.Left) {
138 | 			left = "(" + left + ")"
139 | 		}
140 | 		if !b.isSimple(e.Right) {
141 | 			right = "(" + right + ")"
142 | 		}
143 | 	}
144 | 
145 | 	fn, ok := b.RenderFNs[e.Op]
146 | 	if !ok {
147 | 		return s, fmt.Errorf("unable to render operator [%s]", e.Op)
148 | 	}
149 | 
150 | 	return fn(left, right)
151 | }
152 | 
153 | func (b Base) isSimple(in any) bool {
154 | 	switch v := in.(type) {
155 | 	case *expr.Expression:
156 | 		return v.Op == expr.Undefined || v.Op == expr.Literal || v.Op == expr.Regexp || v.Op == expr.Wild
157 | 	case expr.Column:
158 | 		return true
159 | 	case nil:
160 | 		return true
161 | 	case string, int, float64:
162 | 		return true
163 | 	default:
164 | 		return false
165 | 	}
166 | }
167 | 
168 | func (b Base) serialize(in any) (s string, err error) {
169 | 	if in == nil {
170 | 		return "", nil
171 | 	}
172 | 
173 | 	switch v := in.(type) {
174 | 	case *expr.Expression:
175 | 		return b.Render(v)
176 | 	case []*expr.Expression:
177 | 		strs := []string{}
178 | 		for _, e := range v {
179 | 			s, err = b.Render(e)
180 | 			if err != nil {
181 | 				return s, err
182 | 			}
183 | 			strs = append(strs, s)
184 | 		}
185 | 		return strings.Join(strs, ", "), nil
186 | 	case *expr.RangeBoundary:
187 | 		min, err := b.serialize(v.Min)
188 | 		if err != nil {
189 | 			return "", err
190 | 		}
191 | 		max, err := b.serialize(v.Max)
192 | 		if err != nil {
193 | 			return "", err
194 | 		}
195 | 
196 | 		if v.Inclusive {
197 | 			return fmt.Sprintf("[%s, %s]", min, max), nil
198 | 		}
199 | 		return fmt.Sprintf("(%s, %s)", min, max), nil
200 | 
201 | 	case expr.Column:
202 | 		if len(v) == 0 {
203 | 			return "", fmt.Errorf("column name is empty")
204 | 		}
205 | 		if strings.ContainsRune(string(v), '"') {
206 | 			return "", fmt.Errorf("column name contains a double quote: %q", v)
207 | 		}
208 | 		// Always escape column names with double quotes,
209 | 		// otherwise we need to know the reserved words
210 | 		// which might change in the future.
211 | 		return fmt.Sprintf(`"%s"`, string(v)), nil
212 | 	case string:
213 | 		// escape single quotes with double single quotes
214 | 		return fmt.Sprintf("'%s'", strings.ReplaceAll(v, "'", "''")), nil
215 | 	default:
216 | 		return fmt.Sprintf("%v", v), nil
217 | 	}
218 | }
219 | 
220 | func (b Base) serializeParams(in any) (s string, params []any, err error) {
221 | 	if in == nil {
222 | 		return "", params, nil
223 | 	}
224 | 
225 | 	switch v := in.(type) {
226 | 	case *expr.Expression:
227 | 		return b.RenderParam(v)
228 | 	case []*expr.Expression:
229 | 		strs := []string{}
230 | 		for _, e := range v {
231 | 			s, eparams, err := b.RenderParam(e)
232 | 			if err != nil {
233 | 				return s, params, err
234 | 			}
235 | 			strs = append(strs, s)
236 | 			params = append(params, eparams...)
237 | 		}
238 | 		return strings.Join(strs, ", "), params, nil
239 | 	case *expr.RangeBoundary:
240 | 		min, minParams, err := b.serializeParams(v.Min)
241 | 		if err != nil {
242 | 			return "", params, err
243 | 		}
244 | 		max, maxParams, err := b.serializeParams(v.Max)
245 | 		if err != nil {
246 | 			return "", params, err
247 | 		}
248 | 		params = append(minParams, maxParams...)
249 | 
250 | 		if v.Inclusive {
251 | 			return fmt.Sprintf("[%s, %s]", min, max), params, nil
252 | 		}
253 | 		return fmt.Sprintf("(%s, %s)", min, max), params, nil
254 | 
255 | 	case expr.Column:
256 | 		if len(v) == 0 {
257 | 			return "", params, fmt.Errorf("column name is empty")
258 | 		}
259 | 		if strings.ContainsRune(string(v), '"') {
260 | 			return "", params, fmt.Errorf("column name contains a double quote: %q", v)
261 | 		}
262 | 		// Always escape column names with double quotes,
263 | 		// otherwise we need to know the reserved words
264 | 		// which might change in the future.
265 | 		return fmt.Sprintf(`"%s"`, string(v)), params, nil
266 | 	case string:
267 | 		// if we have a '*' then we don't want to insert a param since
268 | 		// it can be used either in a regexp or a range operator.
269 | 		if v == "*" {
270 | 			return "'*'", params, nil
271 | 		}
272 | 
273 | 		// escape single quotes with double single quotes
274 | 		return "?", []any{v}, nil
275 | 	default:
276 | 		return "?", []any{v}, nil
277 | 	}
278 | }
279 | 


--------------------------------------------------------------------------------
/parse.go:
--------------------------------------------------------------------------------
  1 | package lucene
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"strconv"
  7 | 	"strings"
  8 | 
  9 | 	"github.com/grindlemire/go-lucene/internal/lex"
 10 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
 11 | 	"github.com/grindlemire/go-lucene/pkg/lucene/reduce"
 12 | )
 13 | 
 14 | type Opt func(*parser)
 15 | 
 16 | // WithDefaultField sets the default field to equate literals to.
 17 | // For example a:b AND "c" will be parsed as a:b AND myfield:"c"
 18 | func WithDefaultField(field string) Opt {
 19 | 	return func(p *parser) {
 20 | 		p.defaultField = field
 21 | 	}
 22 | }
 23 | 
 24 | // Parse will parse a lucene expression string using a buffer and the shift reduce algorithm. The returned expression
 25 | // is an AST that can be rendered to a variety of different formats.
 26 | func Parse(input string, opts ...Opt) (e *expr.Expression, err error) {
 27 | 	p := &parser{
 28 | 		lex:          lex.Lex(input),
 29 | 		stack:        []any{},
 30 | 		nonTerminals: []lex.Token{{Typ: lex.TStart}},
 31 | 	}
 32 | 
 33 | 	for _, opt := range opts {
 34 | 		opt(p)
 35 | 	}
 36 | 
 37 | 	ex, err := p.parse()
 38 | 	if err != nil {
 39 | 		return e, err
 40 | 	}
 41 | 
 42 | 	err = expr.Validate(ex)
 43 | 	if err != nil {
 44 | 		return e, err
 45 | 	}
 46 | 
 47 | 	return ex, nil
 48 | }
 49 | 
 50 | type parser struct {
 51 | 	lex          *lex.Lexer
 52 | 	stack        []any
 53 | 	nonTerminals []lex.Token
 54 | 
 55 | 	defaultField string
 56 | }
 57 | 
 58 | func (p *parser) parse() (e *expr.Expression, err error) {
 59 | 	for {
 60 | 		next := p.lex.Peek()
 61 | 		if p.shouldAccept(next) {
 62 | 			if len(p.stack) != 1 {
 63 | 				return e, fmt.Errorf("multiple expressions left after parsing: %v", p.stack)
 64 | 			}
 65 | 			final, ok := p.stack[0].(*expr.Expression)
 66 | 			if !ok {
 67 | 				return e, fmt.Errorf(
 68 | 					"final parse didn't return an expression: %s [type: %s]",
 69 | 					p.stack[0],
 70 | 					reflect.TypeOf(final),
 71 | 				)
 72 | 			}
 73 | 
 74 | 			// edge case for a single literal in the expression and a default field specified
 75 | 			if final.Op == expr.Literal && p.defaultField != "" {
 76 | 				final = expr.Expr(p.defaultField, expr.Equals, final.Left)
 77 | 			}
 78 | 
 79 | 			return final, nil
 80 | 		}
 81 | 
 82 | 		if p.shouldShift(next) {
 83 | 			tok := p.shift()
 84 | 			if lex.IsTerminal(tok) {
 85 | 				// if we have a terminal parse it and put it on the stack
 86 | 				lit, err := parseLiteral(tok)
 87 | 				if err != nil {
 88 | 					return e, err
 89 | 				}
 90 | 
 91 | 				// we should always check if the current top of the stack is another token
 92 | 				// if it isn't then we have an implicit AND we need to inject.
 93 | 				if len(p.stack) > 0 {
 94 | 					_, isTopToken := p.stack[len(p.stack)-1].(lex.Token)
 95 | 					if !isTopToken {
 96 | 						implAnd := lex.Token{Typ: lex.TAnd, Val: "AND"}
 97 | 						// act as if we just saw an AND and check if we need to reduce the
 98 | 						// current token stack first.
 99 | 						if !p.shouldShift(implAnd) {
100 | 							err = p.reduce()
101 | 							if err != nil {
102 | 								return e, err
103 | 							}
104 | 						}
105 | 
106 | 						// if we have a literal as the previous parsed thing then
107 | 						// we must be in an implicit AND and should reduce
108 | 						p.stack = append(p.stack, implAnd)
109 | 						p.nonTerminals = append(p.nonTerminals, implAnd)
110 | 					}
111 | 				}
112 | 
113 | 				p.stack = append(p.stack, lit)
114 | 				continue
115 | 			}
116 | 			// otherwise just push the token on the stack
117 | 			p.stack = append(p.stack, tok)
118 | 			p.nonTerminals = append(p.nonTerminals, tok)
119 | 			continue
120 | 		}
121 | 
122 | 		err = p.reduce()
123 | 		if err != nil {
124 | 			return e, err
125 | 		}
126 | 	}
127 | }
128 | 
129 | func (p *parser) shift() (tok lex.Token) {
130 | 	return p.lex.Next()
131 | }
132 | 
133 | // shouldShift determines if the parser should shift or not. This might end up in the grammar specific
134 | // packages and implemented for each grammar this parser supports but for now it can live at the top level.
135 | func (p *parser) shouldShift(next lex.Token) bool {
136 | 	if next.Typ == lex.TEOF {
137 | 		return false
138 | 	}
139 | 
140 | 	if next.Typ == lex.TErr {
141 | 		return false
142 | 	}
143 | 
144 | 	curr := p.nonTerminals[len(p.nonTerminals)-1]
145 | 
146 | 	// if we have a terminal symbol then we always want to shift since it won't be
147 | 	// matched by any rule
148 | 	if lex.IsTerminal(next) {
149 | 		return true
150 | 	}
151 | 
152 | 	// if we have an open grouping or the next one is we want to always shift
153 | 	if anyOpenBracket(curr, next) {
154 | 		return true
155 | 	}
156 | 
157 | 	// we need the closing bracket to reduce the range subexpression so shift that on
158 | 	// if we see it
159 | 	if endingRangeSubExpr(next) {
160 | 		return true
161 | 	}
162 | 
163 | 	// if we are ever attempting to move past a subexpr we need to parse it before moving on
164 | 	if anyClosingBracket(curr) {
165 | 		return false
166 | 	}
167 | 
168 | 	// shift if our current token has less precedence than the next token
169 | 	return lex.HasLessPrecedence(curr, next)
170 | }
171 | 
172 | func anyOpenBracket(curr, next lex.Token) bool {
173 | 	return curr.Typ == lex.TLSquare ||
174 | 		next.Typ == lex.TLSquare ||
175 | 		curr.Typ == lex.TLCurly ||
176 | 		next.Typ == lex.TLCurly ||
177 | 		curr.Typ == lex.TLParen ||
178 | 		next.Typ == lex.TLParen
179 | }
180 | 
181 | func anyClosingBracket(curr lex.Token) bool {
182 | 	return curr.Typ == lex.TRParen ||
183 | 		curr.Typ == lex.TRSquare ||
184 | 		curr.Typ == lex.TRCurly
185 | }
186 | 
187 | func endingRangeSubExpr(next lex.Token) bool {
188 | 	return next.Typ == lex.TRSquare || next.Typ == lex.TRCurly
189 | }
190 | 
191 | func (p *parser) shouldAccept(next lex.Token) bool {
192 | 	return len(p.stack) == 1 &&
193 | 		next.Typ == lex.TEOF
194 | }
195 | 
196 | func (p *parser) reduce() (err error) {
197 | 	top := []any{}
198 | 	for {
199 | 		if len(p.stack) == 0 {
200 | 			return fmt.Errorf("error parsing, no items left to reduce, current state: %v", top)
201 | 		}
202 | 
203 | 		// pull the top off the stack
204 | 		s := p.stack[len(p.stack)-1]
205 | 		p.stack = p.stack[:len(p.stack)-1]
206 | 
207 | 		// keep the original ordering when building up our subslice
208 | 		top = append([]any{s}, top...)
209 | 
210 | 		// try to reduce with all our reducers
211 | 		var reduced bool
212 | 		top, p.nonTerminals, reduced = reduce.Reduce(top, p.nonTerminals, p.defaultField)
213 | 
214 | 		// if we consumed some non terminals during the reduce it means we successfully reduced
215 | 		if reduced {
216 | 			// If the reducer returned multiple elements and the first two are both expressions,
217 | 			// we need to inject an implicit AND between them (this happens when fuzzy/boost
218 | 			// does a partial reduction like [FUZZY(...), other-expr])
219 | 			if len(top) >= 2 {
220 | 				_, isFirstExpr := top[0].(*expr.Expression)
221 | 				_, isSecondExpr := top[1].(*expr.Expression)
222 | 				if isFirstExpr && isSecondExpr {
223 | 					// Insert AND between the two expressions: [expr1, expr2] -> [expr1, AND, expr2]
224 | 					implAnd := lex.Token{Typ: lex.TAnd, Val: "AND"}
225 | 					newTop := append([]any{top[0]}, implAnd)
226 | 					newTop = append(newTop, top[1:]...)
227 | 					top = newTop
228 | 					p.nonTerminals = append(p.nonTerminals, implAnd)
229 | 				}
230 | 			}
231 | 
232 | 			// if we successfully reduced re-add it to the top of the stack and return
233 | 			p.stack = append(p.stack, top...)
234 | 			return nil
235 | 		}
236 | 	}
237 | }
238 | 
239 | func parseLiteral(token lex.Token) (e any, err error) {
240 | 	// if it is a quote then remove escape
241 | 	if token.Typ == lex.TQuoted {
242 | 		return expr.Lit(strings.ReplaceAll(token.Val, "\"", "")), nil
243 | 	}
244 | 
245 | 	// if it is a regexp then parse it
246 | 	if token.Typ == lex.TRegexp {
247 | 		return expr.REGEXP(token.Val), nil
248 | 	}
249 | 
250 | 	// attempt to parse it as an integer
251 | 	ival, err := strconv.Atoi(token.Val)
252 | 	if err == nil {
253 | 		return expr.Lit(ival), nil
254 | 	}
255 | 
256 | 	// attempt to parse it as a float
257 | 	fval, err := strconv.ParseFloat(token.Val, 64)
258 | 	if err == nil {
259 | 		return expr.Lit(fval), nil
260 | 	}
261 | 
262 | 	// if it contains unescaped wildcards then it is a wildcard string
263 | 	if strings.ContainsAny(token.Val, "*?") {
264 | 		return expr.WILD(token.Val), nil
265 | 	}
266 | 
267 | 	// if it contains an escape string then strip it out now
268 | 	if strings.Contains(token.Val, `\`) {
269 | 		return expr.Lit(strings.ReplaceAll(token.Val, `\`, "")), nil
270 | 	}
271 | 
272 | 	return expr.Lit(token.Val), nil
273 | }
274 | 


--------------------------------------------------------------------------------
/pkg/lucene/expr/expression_test.go:
--------------------------------------------------------------------------------
  1 | package expr
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"reflect"
  6 | 	"strings"
  7 | 	"testing"
  8 | )
  9 | 
 10 | const (
 11 | 	errTemplate     = "%s:\n    wanted %#v\n    got    %#v"
 12 | 	jsonErrTemplate = "%s:\n    wanted %s\n    got    %s"
 13 | )
 14 | 
 15 | func TestExprJSON(t *testing.T) {
 16 | 	type tc struct {
 17 | 		input string
 18 | 		want  *Expression
 19 | 	}
 20 | 
 21 | 	tcs := map[string]tc{
 22 | 		"flat_literal": {
 23 | 			input: `"a"`,
 24 | 			want:  Lit("a"),
 25 | 		},
 26 | 		"flat_wildcard": {
 27 | 			input: `"a*"`,
 28 | 			want:  WILD("a*"),
 29 | 		},
 30 | 		"flat_equals": {
 31 | 			input: `{"left": "a", "operator": "EQUALS", "right": "b"}`,
 32 | 			want:  Eq(Lit("a"), Lit("b")),
 33 | 		},
 34 | 		"flat_regexp": {
 35 | 			input: `{
 36 | 				"left": "a",
 37 | 				"operator": "LIKE",
 38 | 				"right": "/b [c]/"
 39 | 			  }`,
 40 | 			want: LIKE(Lit("a"), REGEXP("/b [c]/")),
 41 | 		},
 42 | 		"flat_inclusive_range": {
 43 | 			input: `{
 44 | 				"left": "a",
 45 | 				"operator": "RANGE",
 46 | 				"right": {
 47 | 					"min": 1,
 48 | 					"max": 2,
 49 | 					"inclusive": true
 50 | 				}
 51 | 			  }`,
 52 | 			want: Rang("a", 1, 2, true),
 53 | 		},
 54 | 		"flat_exclusive_range": {
 55 | 			input: `{
 56 | 				"left": "a",
 57 | 				"operator": "RANGE",
 58 | 				"right": {
 59 | 					"min": 1,
 60 | 					"max": 2,
 61 | 					"inclusive": false
 62 | 				}
 63 | 			  }`,
 64 | 			want: Rang("a", 1, 2, false),
 65 | 		},
 66 | 		"flat_range_with_float": {
 67 | 			input: `{
 68 | 				"left": "a",
 69 | 				"operator": "RANGE",
 70 | 				"right": {
 71 | 					"min": 1.1,
 72 | 					"max": 2.2,
 73 | 					"inclusive": true
 74 | 				}
 75 | 			  }`,
 76 | 			want: Rang("a", 1.1, 2.2, true),
 77 | 		},
 78 | 		"must_wrapping_range": {
 79 | 			input: `{
 80 | 				"left": {
 81 | 					"left": "c",
 82 | 					"operator": "RANGE",
 83 | 					"right": {
 84 | 						"min": "*",
 85 | 						"max": "foo",
 86 | 						"inclusive": false
 87 | 					}
 88 | 				},
 89 | 				"operator": "MUST"
 90 | 			}`,
 91 | 			want: MUST(Rang("c", "*", "foo", false)),
 92 | 		},
 93 | 		"flat_must": {
 94 | 			input: `{
 95 | 				"left": "a",
 96 | 				"operator": "MUST"
 97 | 			}`,
 98 | 			want: MUST(Lit("a")),
 99 | 		},
100 | 		"flat_must_not": {
101 | 			input: `{
102 | 				"left": "a",
103 | 				"operator": "MUST_NOT"
104 | 			}`,
105 | 			want: MUSTNOT(Lit("a")),
106 | 		},
107 | 		"flat_not": {
108 | 			input: `{
109 | 				"left": "a",
110 | 				"operator": "NOT"
111 | 			}`,
112 | 			want: NOT(Lit("a")),
113 | 		},
114 | 		"flat_boost": {
115 | 			input: `{
116 | 				"left": "a",
117 | 				"operator": "BOOST"
118 | 			}`,
119 | 			want: BOOST(Lit("a")),
120 | 		},
121 | 		"flat_boost_explicit_power": {
122 | 			input: `{
123 | 				"left": "a",
124 | 				"operator": "BOOST",
125 | 				"power": 0.8
126 | 			}`,
127 | 			want: BOOST(Lit("a"), 0.8),
128 | 		},
129 | 		"flat_fuzzy": {
130 | 			input: `{
131 | 				"left": "a",
132 | 				"operator": "FUZZY"
133 | 			}`,
134 | 			want: FUZZY(Lit("a")),
135 | 		},
136 | 		"flat_fuzzy_explicit_power": {
137 | 			input: `{
138 | 				"left": "a",
139 | 				"operator": "FUZZY",
140 | 				"distance": 2
141 | 			}`,
142 | 			want: FUZZY("a", 2),
143 | 		},
144 | 		"flat_in_list": {
145 | 			input: `{
146 | 				"left": "a",
147 | 				"operator": "IN",
148 | 				"right": {
149 | 					"left": ["b", "c"],
150 | 					"operator": "LIST"
151 | 				}	
152 | 			}`,
153 | 			want: IN("a", LIST(Lit("b"), Lit("c"))),
154 | 		},
155 | 		"basic_and": {
156 | 			input: `{
157 | 				"left": {
158 | 					"left": "a",
159 | 					"operator": "EQUALS",
160 | 					"right": "b"
161 | 				},
162 | 				"operator": "AND",
163 | 				"right": {
164 | 					"left": "c",
165 | 					"operator": "EQUALS",
166 | 					"right": "d"
167 | 				}
168 | 			}`,
169 | 			want: AND(
170 | 				Eq("a", "b"),
171 | 				Eq("c", "d"),
172 | 			),
173 | 		},
174 | 		"basic_or": {
175 | 			input: `{
176 | 				"left": {
177 | 					"left": "a",
178 | 					"operator": "EQUALS",
179 | 					"right": "b"
180 | 				},
181 | 				"operator": "OR",
182 | 				"right": {
183 | 					"left": "c",
184 | 					"operator": "EQUALS",
185 | 					"right": "d"
186 | 				}
187 | 			}`,
188 | 			want: OR(
189 | 				Eq("a", "b"),
190 | 				Eq("c", "d"),
191 | 			),
192 | 		},
193 | 		"preserves_precedence": {
194 | 			input: `{
195 | 				"left": {
196 | 					"left": {
197 | 						"left": "a",
198 | 						"operator": "AND",
199 | 						"right": "b"
200 | 					},
201 | 					"operator": "OR",
202 | 					"right": {
203 | 						"left": "c",
204 | 						"operator": "AND",
205 | 						"right": "d"
206 | 					}
207 | 				},
208 | 				"operator": "OR",
209 | 				"right": "e"
210 | 			}`,
211 | 			want: OR(
212 | 				OR(
213 | 					AND("a", "b"),
214 | 					AND("c", "d"),
215 | 				),
216 | 				"e",
217 | 			),
218 | 		},
219 | 		"compound_using_range": {
220 | 			input: `{
221 | 				"left": {
222 | 					"left": {
223 | 						"left": "c",
224 | 						"operator": "RANGE",
225 | 						"right": {
226 | 							"min": "*",
227 | 							"max": "foo",
228 | 							"inclusive": false
229 | 						}
230 | 					},
231 | 					"operator": "MUST"
232 | 				},
233 | 				"operator": "OR",
234 | 				"right": {
235 | 					"left": {
236 | 						"left": {
237 | 							"left": "d",
238 | 							"operator": "EQUALS",
239 | 							"right": {
240 | 								"left": "bar",
241 | 								"operator": "FUZZY",
242 | 								"distance": 3
243 | 							}
244 | 						},
245 | 						"operator": "NOT"
246 | 					},
247 | 					"operator": "MUST_NOT"
248 | 				}
249 | 			}`,
250 | 			want: OR(
251 | 				MUST(Rang("c", "*", "foo", false)),
252 | 				MUSTNOT(NOT(Eq("d", FUZZY("bar", 3)))),
253 | 			),
254 | 		},
255 | 		"large_blob": {
256 | 			input: `{
257 | 				"left": "a",
258 | 				"operator": "OR",
259 | 				"right": {
260 | 					"left": {
261 | 						"left": "c",
262 | 						"operator": "RANGE",
263 | 						"right": {
264 | 							"min": "*",
265 | 							"max": "foo",
266 | 							"inclusive": false
267 | 						}
268 | 					},
269 | 					"operator": "OR",
270 | 					"right": "b"
271 | 				}
272 | 			}`,
273 | 			want: OR(
274 | 				"a",
275 | 				OR(
276 | 					Rang("c", "*", "foo", false),
277 | 					"b",
278 | 				),
279 | 			),
280 | 		},
281 | 		"every_operator_combined": {
282 | 			input: `{
283 | 				"left": {
284 | 					"left": {
285 | 						"left": "a",
286 | 						"operator": "RANGE",
287 | 						"right": {
288 | 							"min": 1,
289 | 							"max": "*",
290 | 							"inclusive": true
291 | 						}
292 | 					},
293 | 					"operator": "AND",
294 | 					"right": {
295 | 						"left": {
296 | 							"left": {
297 | 								"left": "b",
298 | 								"operator": "LIKE",
299 | 								"right": "/foo?ar.*/"
300 | 							},
301 | 							"operator": "NOT"
302 | 						},
303 | 						"operator": "BOOST"
304 | 					}
305 | 				},
306 | 				"operator": "OR",
307 | 				"right": {
308 | 					"left": {
309 | 						"left": {
310 | 							"left": "c",
311 | 							"operator": "RANGE",
312 | 							"right": {
313 | 								"min": "*",
314 | 								"max": "foo",
315 | 								"inclusive": false
316 | 							}
317 | 						},
318 | 						"operator": "MUST"
319 | 					},
320 | 					"operator": "OR",
321 | 					"right": {
322 | 						"left": {
323 | 							"left": {
324 | 								"left": "d",
325 | 								"operator": "EQUALS",
326 | 								"right": {
327 | 									"left": "bar",
328 | 									"operator": "FUZZY",
329 | 									"distance": 3
330 | 								}
331 | 							},
332 | 							"operator": "NOT"
333 | 						},
334 | 						"operator": "MUST_NOT"
335 | 					}
336 | 				}
337 | 			}`,
338 | 			want: OR(
339 | 				AND(
340 | 					Rang("a", 1, "*", true),
341 | 					BOOST(NOT(LIKE("b", REGEXP("/foo?ar.*/")))),
342 | 				),
343 | 				OR(
344 | 					MUST(Rang("c", "*", "foo", false)),
345 | 					MUSTNOT(NOT(Eq("d", FUZZY("bar", 3)))),
346 | 				),
347 | 			),
348 | 		},
349 | 	}
350 | 
351 | 	for name, tc := range tcs {
352 | 		t.Run(name, func(t *testing.T) {
353 | 			got := &Expression{}
354 | 			err := json.Unmarshal([]byte(tc.input), got)
355 | 			if err != nil {
356 | 				t.Fatalf("expected no error during unmarshal but got [%s]", err)
357 | 			}
358 | 
359 | 			if !reflect.DeepEqual(tc.want, got) {
360 | 				t.Fatalf(errTemplate, "parsed expression doesn't match", tc.want, got)
361 | 			}
362 | 
363 | 			gotSerialized, err := json.Marshal(got)
364 | 			if err != nil {
365 | 				t.Fatalf("expected no error during marshal but got [%s]", err)
366 | 			}
367 | 
368 | 			if !jsonEqual(string(gotSerialized), tc.input) {
369 | 				t.Fatalf(
370 | 					jsonErrTemplate,
371 | 					"serialized expressions don't match",
372 | 					stripWhitespace(tc.input),
373 | 					stripWhitespace(string(gotSerialized)),
374 | 				)
375 | 			}
376 | 		})
377 | 	}
378 | }
379 | 
380 | func jsonEqual(got string, want string) bool {
381 | 	return stripWhitespace(got) == stripWhitespace(want)
382 | }
383 | 
384 | func stripWhitespace(in string) string {
385 | 	return strings.Join(strings.Fields(in), "")
386 | }
387 | 


--------------------------------------------------------------------------------
/pkg/driver/renderfn.go:
--------------------------------------------------------------------------------
  1 | package driver
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strconv"
  6 | 	"strings"
  7 | 	"unicode/utf8"
  8 | 
  9 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
 10 | )
 11 | 
 12 | // RenderFN is a rendering function. It takes the left and right side of the operator serialized to a string
 13 | // and serializes the entire expression
 14 | type RenderFN func(left, right string) (string, error)
 15 | 
 16 | func literal(left, right string) (string, error) {
 17 | 	if !utf8.ValidString(left) {
 18 | 		return "", fmt.Errorf("literal contains invalid utf8: %q", left)
 19 | 	}
 20 | 	if strings.ContainsRune(left, 0) {
 21 | 		return "", fmt.Errorf("literal contains null byte: %q", left)
 22 | 	}
 23 | 
 24 | 	return left, nil
 25 | }
 26 | 
 27 | func equals(left, right string) (string, error) {
 28 | 	return fmt.Sprintf("%s = %s", left, right), nil
 29 | }
 30 | 
 31 | func noop(left, right string) (string, error) {
 32 | 	return left, nil
 33 | }
 34 | 
 35 | func like(left, right string) (string, error) {
 36 | 	if len(right) >= 4 && right[1] == '/' && right[len(right)-2] == '/' {
 37 | 		return fmt.Sprintf("%s ~ %s", left, right), nil
 38 | 	}
 39 | 
 40 | 	right = strings.ReplaceAll(right, "*", "%")
 41 | 	right = strings.ReplaceAll(right, "?", "_")
 42 | 	return fmt.Sprintf("%s SIMILAR TO %s", left, right), nil
 43 | }
 44 | 
 45 | func likeParam(left, right string, params []any) (string, error) {
 46 | 	if len(params) == 1 {
 47 | 		pright := params[0].(string)
 48 | 		if len(pright) >= 4 && pright[0] == '/' && pright[len(pright)-1] == '/' {
 49 | 			return fmt.Sprintf("%s ~ %s", left, right), nil
 50 | 		}
 51 | 	}
 52 | 
 53 | 	return fmt.Sprintf("%s SIMILAR TO %s", left, right), nil
 54 | }
 55 | 
 56 | func inFn(left, right string) (string, error) {
 57 | 	return fmt.Sprintf("%s IN %s", left, right), nil
 58 | }
 59 | 
 60 | func list(left, right string) (string, error) {
 61 | 	return fmt.Sprintf("(%s)", left), nil
 62 | }
 63 | 
 64 | func greater(left, right string) (string, error) {
 65 | 	return fmt.Sprintf("%s > %s", left, right), nil
 66 | }
 67 | 
 68 | func less(left, right string) (string, error) {
 69 | 	return fmt.Sprintf("%s < %s", left, right), nil
 70 | }
 71 | 
 72 | func greaterEq(left, right string) (string, error) {
 73 | 	return fmt.Sprintf("%s >= %s", left, right), nil
 74 | }
 75 | 
 76 | func lessEq(left, right string) (string, error) {
 77 | 	return fmt.Sprintf("%s <= %s", left, right), nil
 78 | }
 79 | 
 80 | // rang is more complicated than the others because it has to handle inclusive and exclusive ranges,
 81 | // number and string ranges, and ranges that only have one bound
 82 | func rang(left, right string) (string, error) {
 83 | 	inclusive := true
 84 | 	if right[0] == '(' && right[len(right)-1] == ')' {
 85 | 		inclusive = false
 86 | 	}
 87 | 
 88 | 	stripped := right[1 : len(right)-1]
 89 | 	rangeSlice := strings.Split(stripped, ",")
 90 | 
 91 | 	if len(rangeSlice) != 2 {
 92 | 		return "", fmt.Errorf("the BETWEEN operator needs a two item list in the right hand side, have %s", right)
 93 | 	}
 94 | 
 95 | 	rawMin := strings.Trim(rangeSlice[0], " ")
 96 | 	rawMax := strings.Trim(rangeSlice[1], " ")
 97 | 
 98 | 	iMin, iMax, err := toInts(rawMin, rawMax)
 99 | 	if err == nil {
100 | 		if rawMin == "'*'" {
101 | 			if inclusive {
102 | 				return fmt.Sprintf("%s <= %d", left, iMax), nil
103 | 			}
104 | 			return fmt.Sprintf("%s < %d", left, iMax), nil
105 | 		}
106 | 
107 | 		if rawMax == "'*'" {
108 | 			if inclusive {
109 | 				return fmt.Sprintf("%s >= %d", left, iMin), nil
110 | 			}
111 | 			return fmt.Sprintf("%s > %d", left, iMin), nil
112 | 		}
113 | 
114 | 		if inclusive {
115 | 			return fmt.Sprintf("%s >= %d AND %s <= %d",
116 | 					left,
117 | 					iMin,
118 | 					left,
119 | 					iMax,
120 | 				),
121 | 				nil
122 | 		}
123 | 
124 | 		return fmt.Sprintf("%s > %d AND %s < %d",
125 | 				left,
126 | 				iMin,
127 | 				left,
128 | 				iMax,
129 | 			),
130 | 			nil
131 | 	}
132 | 
133 | 	fMin, fMax, err := toFloats(rawMin, rawMax)
134 | 	if err == nil {
135 | 		if rawMin == "'*'" {
136 | 			if inclusive {
137 | 				return fmt.Sprintf("%s <= %.2f", left, fMax), nil
138 | 			}
139 | 			return fmt.Sprintf("%s < %.2f", left, fMax), nil
140 | 		}
141 | 
142 | 		if rawMax == "'*'" {
143 | 			if inclusive {
144 | 				return fmt.Sprintf("%s >= %.2f", left, fMin), nil
145 | 			}
146 | 			return fmt.Sprintf("%s > %.2f", left, fMin), nil
147 | 		}
148 | 
149 | 		if inclusive {
150 | 			return fmt.Sprintf("%s >= %.2f AND %s <= %.2f",
151 | 					left,
152 | 					fMin,
153 | 					left,
154 | 					fMax,
155 | 				),
156 | 				nil
157 | 		}
158 | 
159 | 		return fmt.Sprintf("%s > %.2f AND %s < %.2f",
160 | 				left,
161 | 				fMin,
162 | 				left,
163 | 				fMax,
164 | 			),
165 | 			nil
166 | 	}
167 | 
168 | 	return fmt.Sprintf(`%s BETWEEN %s AND %s`,
169 | 			left,
170 | 			strings.Trim(rangeSlice[0], " "),
171 | 			strings.Trim(rangeSlice[1], " "),
172 | 		),
173 | 		nil
174 | }
175 | 
176 | func rangParam(left, right string, params []any) (string, error) {
177 | 	inclusive := true
178 | 	if right[0] == '(' && right[len(right)-1] == ')' {
179 | 		inclusive = false
180 | 	}
181 | 
182 | 	stripped := right[1 : len(right)-1]
183 | 	rangeSlice := strings.Split(stripped, ",")
184 | 
185 | 	if len(rangeSlice) != 2 {
186 | 		return "", fmt.Errorf("the BETWEEN operator needs a two item list in the right hand side, have %s", right)
187 | 	}
188 | 
189 | 	rawMin := strings.Trim(rangeSlice[0], " ")
190 | 	rawMax := strings.Trim(rangeSlice[1], " ")
191 | 
192 | 	// if we have a parameterized input then we need to check the type
193 | 	if rawMin == "?" || rawMax == "?" {
194 | 		switch params[0].(type) {
195 | 		case int, float64, float32:
196 | 			if rawMin == "'*'" {
197 | 				if inclusive {
198 | 					return fmt.Sprintf("%s <= %s", left, rawMax), nil
199 | 				}
200 | 				return fmt.Sprintf("%s < %s", left, rawMax), nil
201 | 			}
202 | 
203 | 			if rawMax == "'*'" {
204 | 				if inclusive {
205 | 					return fmt.Sprintf("%s >= %s", left, rawMin), nil
206 | 				}
207 | 				return fmt.Sprintf("%s > %s", left, rawMin), nil
208 | 			}
209 | 
210 | 			if inclusive {
211 | 				return fmt.Sprintf("%s >= %s AND %s <= %s",
212 | 						left,
213 | 						rawMin,
214 | 						left,
215 | 						rawMax,
216 | 					),
217 | 					nil
218 | 			}
219 | 
220 | 			return fmt.Sprintf("%s > %s AND %s < %s",
221 | 					left,
222 | 					rawMin,
223 | 					left,
224 | 					rawMax,
225 | 				),
226 | 				nil
227 | 		default:
228 | 			return fmt.Sprintf(`%s BETWEEN %s AND %s`,
229 | 					left,
230 | 					strings.Trim(rangeSlice[0], " "),
231 | 					strings.Trim(rangeSlice[1], " "),
232 | 				),
233 | 				nil
234 | 		}
235 | 
236 | 	}
237 | 
238 | 	iMin, iMax, err := toInts(rawMin, rawMax)
239 | 	if err == nil {
240 | 		if rawMin == "'*'" {
241 | 			if inclusive {
242 | 				return fmt.Sprintf("%s <= %d", left, iMax), nil
243 | 			}
244 | 			return fmt.Sprintf("%s < %d", left, iMax), nil
245 | 		}
246 | 
247 | 		if rawMax == "'*'" {
248 | 			if inclusive {
249 | 				return fmt.Sprintf("%s >= %d", left, iMin), nil
250 | 			}
251 | 			return fmt.Sprintf("%s > %d", left, iMin), nil
252 | 		}
253 | 
254 | 		if inclusive {
255 | 			return fmt.Sprintf("%s >= %d AND %s <= %d",
256 | 					left,
257 | 					iMin,
258 | 					left,
259 | 					iMax,
260 | 				),
261 | 				nil
262 | 		}
263 | 
264 | 		return fmt.Sprintf("%s > %d AND %s < %d",
265 | 				left,
266 | 				iMin,
267 | 				left,
268 | 				iMax,
269 | 			),
270 | 			nil
271 | 	}
272 | 
273 | 	fMin, fMax, err := toFloats(rawMin, rawMax)
274 | 	if err == nil {
275 | 		if rawMin == "'*'" {
276 | 			if inclusive {
277 | 				return fmt.Sprintf("%s <= %.2f", left, fMax), nil
278 | 			}
279 | 			return fmt.Sprintf("%s < %.2f", left, fMax), nil
280 | 		}
281 | 
282 | 		if rawMax == "'*'" {
283 | 			if inclusive {
284 | 				return fmt.Sprintf("%s >= %.2f", left, fMin), nil
285 | 			}
286 | 			return fmt.Sprintf("%s > %.2f", left, fMin), nil
287 | 		}
288 | 
289 | 		if inclusive {
290 | 			return fmt.Sprintf("%s >= %.2f AND %s <= %.2f",
291 | 					left,
292 | 					fMin,
293 | 					left,
294 | 					fMax,
295 | 				),
296 | 				nil
297 | 		}
298 | 
299 | 		return fmt.Sprintf("%s > %.2f AND %s < %.2f",
300 | 				left,
301 | 				fMin,
302 | 				left,
303 | 				fMax,
304 | 			),
305 | 			nil
306 | 	}
307 | 
308 | 	return fmt.Sprintf(`%s BETWEEN %s AND %s`,
309 | 			left,
310 | 			strings.Trim(rangeSlice[0], " "),
311 | 			strings.Trim(rangeSlice[1], " "),
312 | 		),
313 | 		nil
314 | }
315 | 
316 | func basicCompound(op expr.Operator) RenderFN {
317 | 	return func(left, right string) (string, error) {
318 | 		return fmt.Sprintf("%s %s %s", left, op, right), nil
319 | 	}
320 | }
321 | 
322 | func basicWrap(op expr.Operator) RenderFN {
323 | 	return func(left, right string) (string, error) {
324 | 		return fmt.Sprintf("%s(%s)", op, left), nil
325 | 	}
326 | }
327 | 
328 | func toInts(rawMin, rawMax string) (iMin, iMax int, err error) {
329 | 	iMin, err = strconv.Atoi(rawMin)
330 | 	if rawMin != "'*'" && err != nil {
331 | 		return 0, 0, err
332 | 	}
333 | 
334 | 	iMax, err = strconv.Atoi(rawMax)
335 | 	if rawMax != "'*'" && err != nil {
336 | 		return 0, 0, err
337 | 	}
338 | 
339 | 	return iMin, iMax, nil
340 | }
341 | 
342 | func toFloats(rawMin, rawMax string) (fMin, fMax float64, err error) {
343 | 	fMin, err = strconv.ParseFloat(rawMin, 64)
344 | 	if rawMin != "*" && err != nil {
345 | 		return 0, 0, err
346 | 	}
347 | 
348 | 	fMax, err = strconv.ParseFloat(rawMax, 64)
349 | 	if rawMax != "*" && err != nil {
350 | 		return 0, 0, err
351 | 	}
352 | 
353 | 	return fMin, fMax, nil
354 | }
355 | 


--------------------------------------------------------------------------------
/pkg/lucene/expr/validator.go:
--------------------------------------------------------------------------------
  1 | package expr
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"reflect"
  7 | )
  8 | 
  9 | type validator = func(*Expression) (err error)
 10 | 
 11 | var validators = map[Operator]validator{
 12 | 	Equals:    validateEquals,
 13 | 	And:       validateAnd,
 14 | 	Or:        validateOr,
 15 | 	Not:       validateNot,
 16 | 	Range:     validateRange,
 17 | 	Must:      validateMust,
 18 | 	MustNot:   validateMustNot,
 19 | 	Boost:     validateBoost,
 20 | 	Fuzzy:     validateFuzzy,
 21 | 	Literal:   validateLiteral,
 22 | 	Wild:      validateWild,
 23 | 	Regexp:    validateRegexp,
 24 | 	Greater:   validateCompare,
 25 | 	Less:      validateCompare,
 26 | 	GreaterEq: validateCompare,
 27 | 	LessEq:    validateCompare,
 28 | 	Like:      validateLike,
 29 | 	In:        validateIn,
 30 | 	List:      validateList,
 31 | }
 32 | 
 33 | func validateEquals(e *Expression) (err error) {
 34 | 	if e == nil {
 35 | 		return nil
 36 | 	}
 37 | 
 38 | 	if e.Op != Equals {
 39 | 		return errors.New("EQUALS validation error: must have equals operator")
 40 | 	}
 41 | 
 42 | 	if !isLiteralExpr(e.Left) {
 43 | 		return errors.New("EQUALS validation: left value must be a literal expression")
 44 | 	}
 45 | 
 46 | 	return nil
 47 | }
 48 | 
 49 | func validateCompare(e *Expression) (err error) {
 50 | 	if e == nil {
 51 | 		return nil
 52 | 	}
 53 | 
 54 | 	if e.Op != Greater && e.Op != Less && e.Op != GreaterEq && e.Op != LessEq {
 55 | 		return errors.New("COMPARE validation error: must have comparison operator operator")
 56 | 	}
 57 | 
 58 | 	if !isLiteralExpr(e.Left) {
 59 | 		return errors.New("COMPARE validation: left value must be a literal expression")
 60 | 	}
 61 | 
 62 | 	return nil
 63 | }
 64 | 
 65 | func validateAnd(e *Expression) (err error) {
 66 | 	if e == nil {
 67 | 		return nil
 68 | 	}
 69 | 
 70 | 	if e.Left == nil {
 71 | 		return errors.New("AND validation: left value must not be nil")
 72 | 	}
 73 | 
 74 | 	if e.Right == nil {
 75 | 		return errors.New("AND validation: right value must not be nil")
 76 | 	}
 77 | 
 78 | 	return nil
 79 | }
 80 | 
 81 | func validateOr(e *Expression) (err error) {
 82 | 	if e == nil {
 83 | 		return nil
 84 | 	}
 85 | 
 86 | 	if e.Left == nil {
 87 | 		return errors.New("OR validation: left value must not be nil")
 88 | 	}
 89 | 
 90 | 	if e.Right == nil {
 91 | 		return errors.New("OR validation: right value must not be nil")
 92 | 	}
 93 | 
 94 | 	return nil
 95 | }
 96 | 
 97 | func validateNot(e *Expression) (err error) {
 98 | 	if e == nil {
 99 | 		return nil
100 | 	}
101 | 
102 | 	if e.Left == nil {
103 | 		return errors.New("NOT validation: sub expression must not be nil")
104 | 	}
105 | 
106 | 	if e.Right != nil {
107 | 		return errors.New("NOT validation: must not have two sub expressions")
108 | 	}
109 | 
110 | 	return nil
111 | }
112 | 
113 | func validateRange(e *Expression) (err error) {
114 | 	if e == nil {
115 | 		return nil
116 | 	}
117 | 
118 | 	if e.Left == nil {
119 | 		return errors.New("RANGE validation: term value must not be nil")
120 | 	}
121 | 
122 | 	if e.Right == nil {
123 | 		return errors.New("RANGE validation: boundary value must not be nil")
124 | 	}
125 | 
126 | 	if !isLiteralExpr(e.Left) {
127 | 		return errors.New("RANGE validation: term value must be a literal")
128 | 	}
129 | 
130 | 	boundary, isBoundary := e.Right.(*RangeBoundary)
131 | 	if !isBoundary {
132 | 		return fmt.Errorf("RANGE validation: invalid range boundary - incorrect type [%s]", reflect.TypeOf(e.Right))
133 | 	}
134 | 
135 | 	if boundary == nil {
136 | 		return errors.New("RANGE validation: range boundary must not be nil")
137 | 	}
138 | 
139 | 	if boundary.Min == nil {
140 | 		return errors.New("RANGE validation: range boundary must have a minimum")
141 | 	}
142 | 
143 | 	if boundary.Max == nil {
144 | 		return errors.New("RANGE validation: range boundary must have a maximum")
145 | 	}
146 | 
147 | 	return nil
148 | }
149 | 
150 | func validateMust(e *Expression) (err error) {
151 | 	if e == nil {
152 | 		return nil
153 | 	}
154 | 
155 | 	if e.Left == nil {
156 | 		return errors.New("MUST validation: sub expression must not be nil")
157 | 	}
158 | 
159 | 	if e.Right != nil {
160 | 		return errors.New("MUST validation: must not have two sub expressions")
161 | 	}
162 | 
163 | 	return nil
164 | }
165 | 
166 | func validateMustNot(e *Expression) (err error) {
167 | 	if e == nil {
168 | 		return nil
169 | 	}
170 | 
171 | 	if e.Left == nil {
172 | 		return errors.New("MUST_NOT validation: sub expression must not be nil")
173 | 	}
174 | 
175 | 	if e.Right != nil {
176 | 		return errors.New("MUST_NOT validation: must not have two sub expressions")
177 | 	}
178 | 
179 | 	return nil
180 | }
181 | 
182 | func validateBoost(e *Expression) (err error) {
183 | 	if e == nil {
184 | 		return nil
185 | 	}
186 | 
187 | 	if e.Left == nil {
188 | 		return errors.New("BOOST validation: sub expression must not be nil")
189 | 	}
190 | 
191 | 	if e.Right != nil {
192 | 		return errors.New("BOOST validation: must not have two sub expressions")
193 | 	}
194 | 
195 | 	return nil
196 | }
197 | 
198 | func validateFuzzy(e *Expression) (err error) {
199 | 	if e == nil {
200 | 		return nil
201 | 	}
202 | 
203 | 	if e.Left == nil {
204 | 		return errors.New("FUZZY validation: sub expression must not be nil")
205 | 	}
206 | 
207 | 	if e.Right != nil {
208 | 		return errors.New("FUZZY validation: must not have two sub expressions")
209 | 	}
210 | 
211 | 	return nil
212 | }
213 | 
214 | func validateLiteral(e *Expression) (err error) {
215 | 	if e == nil {
216 | 		return nil
217 | 	}
218 | 
219 | 	if e.Left == nil {
220 | 		return errors.New("LITERAL validation: value must not be nil")
221 | 	}
222 | 
223 | 	if e.Right != nil {
224 | 		return errors.New("LITERAL validation: must not have two values")
225 | 	}
226 | 
227 | 	if !isLiteral(e.Left) {
228 | 		return fmt.Errorf("LITERAL validation: value must be a literal, not %s", reflect.TypeOf(e.Left))
229 | 	}
230 | 
231 | 	return nil
232 | }
233 | 
234 | func validateWild(e *Expression) (err error) {
235 | 	if e == nil {
236 | 		return nil
237 | 	}
238 | 
239 | 	if e.Left == nil {
240 | 		return errors.New("WILDCARD validation: value must not be nil")
241 | 	}
242 | 
243 | 	if e.Right != nil {
244 | 		return errors.New("WILDCARD validation: must not have two values")
245 | 	}
246 | 
247 | 	if !isLiteral(e.Left) {
248 | 		return fmt.Errorf("WILDCARD validation: value must be a literal, not %s", reflect.TypeOf(e.Left))
249 | 	}
250 | 
251 | 	return nil
252 | }
253 | 
254 | func validateRegexp(e *Expression) (err error) {
255 | 	if e == nil {
256 | 		return nil
257 | 	}
258 | 
259 | 	if e.Left == nil {
260 | 		return errors.New("REGEXP validation: value must not be nil")
261 | 	}
262 | 
263 | 	if e.Right != nil {
264 | 		return errors.New("REGEXP validation: must not have two values")
265 | 	}
266 | 
267 | 	if !isLiteral(e.Left) {
268 | 		return fmt.Errorf("REGEXP validation: value must be a literal, not %s", reflect.TypeOf(e.Left))
269 | 	}
270 | 
271 | 	return nil
272 | }
273 | 
274 | func validateLike(e *Expression) (err error) {
275 | 	if e == nil {
276 | 		return nil
277 | 	}
278 | 
279 | 	if e.Left == nil {
280 | 		return errors.New("LIKE validation: column must not be nil")
281 | 	}
282 | 
283 | 	if !isLiteralExpr(e.Left) {
284 | 		return fmt.Errorf("LIKE validation: value must be a literal, not %s", reflect.TypeOf(e.Left))
285 | 	}
286 | 
287 | 	if e.Right == nil {
288 | 		return errors.New("LIKE validation: must have two values")
289 | 	}
290 | 
291 | 	right, ok := e.Right.(*Expression)
292 | 	if !ok {
293 | 		return fmt.Errorf("LIKE validation: right side must be an expression, not %s", reflect.TypeOf(e.Right))
294 | 	}
295 | 
296 | 	if right.Op != Wild && right.Op != Regexp {
297 | 		return fmt.Errorf("LIKE validation: right side must be a wildcard or regexp, not %s", right.Op)
298 | 	}
299 | 
300 | 	return nil
301 | }
302 | 
303 | func validateIn(e *Expression) (err error) {
304 | 	if e == nil {
305 | 		return nil
306 | 	}
307 | 
308 | 	if e.Left == nil {
309 | 		return errors.New("IN validation: column must not be nil")
310 | 	}
311 | 
312 | 	if !isLiteralExpr(e.Left) {
313 | 		return fmt.Errorf("IN validation: value must be a literal, not %s", reflect.TypeOf(e.Left))
314 | 	}
315 | 
316 | 	if e.Right == nil {
317 | 		return errors.New("IN validation: must have two values")
318 | 	}
319 | 
320 | 	right, ok := e.Right.(*Expression)
321 | 	if !ok {
322 | 		return fmt.Errorf("IN validation: right side must be an expression, not %s", reflect.TypeOf(e.Right))
323 | 	}
324 | 
325 | 	if right.Op != List {
326 | 		return fmt.Errorf("IN validation: right side must be a list, not %s", right.Op)
327 | 	}
328 | 
329 | 	return nil
330 | }
331 | 
332 | func validateList(e *Expression) (err error) {
333 | 	if e == nil {
334 | 		return nil
335 | 	}
336 | 
337 | 	if e.Left == nil {
338 | 		return errors.New("LIST validation: value must not be nil")
339 | 	}
340 | 
341 | 	if e.Right != nil {
342 | 		return errors.New("LIST validation: must not have two values")
343 | 	}
344 | 
345 | 	if !isListOfLiteralExprs(e.Left) {
346 | 		return fmt.Errorf("LIST validation: value must be a list of literals, not %s", reflect.TypeOf(e.Left))
347 | 	}
348 | 
349 | 	return nil
350 | }
351 | 
352 | func isListOfLiteralExprs(in any) bool {
353 | 	e, isList := in.([]*Expression)
354 | 	if !isList {
355 | 		return false
356 | 	}
357 | 	for _, v := range e {
358 | 		if !isLiteralExpr(v) {
359 | 			return false
360 | 		}
361 | 	}
362 | 	return true
363 | }
364 | 
365 | func isLiteralExpr(in any) bool {
366 | 	e, isExpr := in.(*Expression)
367 | 	return isExpr && (e.Op == Literal || e.Op == Wild || e.Op == Regexp) && isLiteral(e.Left)
368 | }
369 | 
370 | func isLiteral(in any) bool {
371 | 	return isString(in) || isNum(in) || isBool(in) || isColumn(in)
372 | }
373 | 
374 | func isColumn(in any) bool {
375 | 	_, is := in.(Column)
376 | 	return is
377 | }
378 | 
379 | func isString(in any) bool {
380 | 	_, is := in.(string)
381 | 	return is
382 | }
383 | 
384 | func isNum(in any) bool {
385 | 	return isInt(in) || isFloat(in)
386 | }
387 | 
388 | func isBool(in any) bool {
389 | 	_, is := in.(bool)
390 | 	return is
391 | }
392 | 
393 | func isInt(in any) bool {
394 | 	switch in.(type) {
395 | 	case int, int32, int64, uint, uint8, uint16, uint32, uint64:
396 | 		return true
397 | 	default:
398 | 		return false
399 | 	}
400 | }
401 | 
402 | func isFloat(in any) bool {
403 | 	switch in.(type) {
404 | 	case float32, float64:
405 | 		return true
406 | 	default:
407 | 		return false
408 | 	}
409 | }
410 | 


--------------------------------------------------------------------------------
/internal/lex/lex.go:
--------------------------------------------------------------------------------
  1 | package lex
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 	"unicode"
  7 | 	"unicode/utf8"
  8 | )
  9 | 
 10 | const eof = -1
 11 | 
 12 | // Token is a parsed token from the input buffer sent to the lexer
 13 | type Token struct {
 14 | 	Typ TokType // the type of the item
 15 | 	pos int     // the position of the item in the string
 16 | 	Val string  // the value of the item
 17 | }
 18 | 
 19 | // String is a string representation of a lex item
 20 | func (i Token) String() string {
 21 | 	switch {
 22 | 	case i.Typ == TErr:
 23 | 		return i.Val
 24 | 	case len(i.Val) > 10:
 25 | 		return fmt.Sprintf("%.10q...", i.Val)
 26 | 	}
 27 | 	return fmt.Sprintf("%q", i.Val)
 28 | }
 29 | 
 30 | // precedence : > ) > + > - > ~ > ^ > NOT > AND > OR > (
 31 | 
 32 | // TokType is an enum of token types that can be parsed by the lexer. Order matters here for non terminals
 33 | // with a lower number meaning a higher precedence.
 34 | type TokType int
 35 | 
 36 | // types of tokens that can be parsed
 37 | const (
 38 | 	// terminal characters
 39 | 	TErr TokType = iota
 40 | 	TLiteral
 41 | 	TQuoted
 42 | 	TRegexp
 43 | 
 44 | 	// precedence of operators. Order matters here. This might need to be abstracted
 45 | 	// to a grammar specific precedence but for now it is fine here.
 46 | 	TEqual
 47 | 	TGreater
 48 | 	TLess
 49 | 	TColon
 50 | 	TPlus
 51 | 	TMinus
 52 | 	TTilde
 53 | 	TCarrot
 54 | 	TNot
 55 | 	TAnd
 56 | 	TOr
 57 | 	TRParen
 58 | 	TLParen
 59 | 
 60 | 	// operators that do not have a set precedence because we specifically handle them
 61 | 	// due to ambiguities in the grammar
 62 | 	TLCurly
 63 | 	TRCurly
 64 | 	TTO
 65 | 	TLSquare
 66 | 	TRSquare
 67 | 
 68 | 	// start and end operators
 69 | 	TEOF
 70 | 	TStart
 71 | )
 72 | 
 73 | var symbols = map[rune]TokType{
 74 | 	'(': TLParen,
 75 | 	')': TRParen,
 76 | 	'[': TLSquare,
 77 | 	']': TRSquare,
 78 | 	'{': TLCurly,
 79 | 	'}': TRCurly,
 80 | 	':': TColon,
 81 | 	'+': TPlus,
 82 | 	'=': TEqual,
 83 | 	'>': TGreater,
 84 | 	'~': TTilde,
 85 | 	'^': TCarrot,
 86 | 	'<': TLess,
 87 | 	// minus is not included because we have to special case it for negative numbers
 88 | 	// '-': tMINUS,
 89 | }
 90 | 
 91 | var tokStrings = map[TokType]string{
 92 | 	TErr:     "tERR",
 93 | 	TLiteral: "tLITERAL",
 94 | 	TQuoted:  "tQUOTED",
 95 | 	TRegexp:  "tREGEXP",
 96 | 	TEqual:   "tEQUAL",
 97 | 	TLParen:  "tLPAREN",
 98 | 	TRParen:  "tRPAREN",
 99 | 	TAnd:     "tAND",
100 | 	TOr:      "tOR",
101 | 	TNot:     "tNOT",
102 | 	TLSquare: "tLSQUARE",
103 | 	TRSquare: "tRSQUARE",
104 | 	TLCurly:  "tLCURLY",
105 | 	TRCurly:  "tRCURLY",
106 | 	TTO:      "tTO",
107 | 	TColon:   "tCOLON",
108 | 	TPlus:    "tPLUS",
109 | 	TMinus:   "tMINUS",
110 | 	TGreater: "tGREATER",
111 | 	TLess:    "tLESS",
112 | 	TTilde:   "tTILDE",
113 | 	TCarrot:  "tCARROT",
114 | 	TEOF:     "tEOF",
115 | 	TStart:   "tSTART",
116 | }
117 | 
118 | func (tt TokType) String() string {
119 | 	return tokStrings[tt]
120 | }
121 | 
122 | // terminalTokens contains a map of terminal tokens.
123 | // Uses empty struct value to conserve memory.
124 | var terminalTokens = map[TokType]struct{}{
125 | 	TErr:     {},
126 | 	TLiteral: {},
127 | 	TQuoted:  {},
128 | 	TRegexp:  {},
129 | 	TEOF:     {},
130 | }
131 | 
132 | // IsTerminal checks wether a specific token is a terminal token meaning
133 | // it can't be matched in the grammar.
134 | func IsTerminal(tok Token) bool {
135 | 	_, terminal := terminalTokens[tok.Typ]
136 | 
137 | 	return terminal
138 | }
139 | 
140 | // HasLessPrecedence checks if a current token has lower precedence than the next.
141 | // There is a specific ordering in the iota (lower numbers = higher precedence) indicating
142 | // whether the operator has more precedence or not.
143 | func HasLessPrecedence(current Token, next Token) bool {
144 | 	// left associative. If we see another of the same type don't add onto the pile.
145 | 	// right associative would return true here.
146 | 	if current.Typ == next.Typ {
147 | 		return false
148 | 	}
149 | 
150 | 	// lower numbers mean higher precedence
151 | 	return current.Typ > next.Typ
152 | }
153 | 
154 | type tokenStateFn func(*Lexer) tokenStateFn
155 | 
156 | // Lexer is a lexer that will parse an input string into tokens for consumption by a
157 | // grammar parser.
158 | type Lexer struct {
159 | 	input string // the input to parse
160 | 
161 | 	pos      int   // the position of the cursor
162 | 	start    int   // the start of the current token
163 | 	currItem Token // the current item being worked on
164 | 	atEOF    bool  // whether we have finished parsing the string or not
165 | }
166 | 
167 | // Lex creates a lexer for an input string
168 | func Lex(input string) *Lexer {
169 | 	return &Lexer{
170 | 		input: input,
171 | 		pos:   0,
172 | 		start: 0,
173 | 	}
174 | }
175 | 
176 | // Next parses and returns just the next token in the input.
177 | func (l *Lexer) Next() Token {
178 | 	// default to returning EOF
179 | 	l.currItem = Token{
180 | 		Typ: TEOF,
181 | 		pos: l.pos,
182 | 		Val: "EOF",
183 | 	}
184 | 
185 | 	// run the state machine until we have a token
186 | 	for state := lexSpace; state != nil; {
187 | 		state = state(l)
188 | 	}
189 | 
190 | 	return l.currItem
191 | }
192 | 
193 | // Peek looks at the the next token but does not impact the lexer state
194 | // note this is intentionally not a pointer because we don't want any changes to take affect here.
195 | func (l Lexer) Peek() Token {
196 | 	if l.currItem.Typ == TEOF {
197 | 		return l.currItem
198 | 	}
199 | 
200 | 	return l.Next()
201 | }
202 | 
203 | // lexSpace is the first state that we always start with
204 | func lexSpace(l *Lexer) tokenStateFn {
205 | 	for {
206 | 		switch l.next() {
207 | 		case eof:
208 | 			return nil
209 | 		case ' ', '\t', '\r', '\n':
210 | 			continue
211 | 		default:
212 | 			// transition to being in a value
213 | 			l.backup()
214 | 			return lexVal
215 | 		}
216 | 	}
217 | }
218 | 
219 | func lexVal(l *Lexer) tokenStateFn {
220 | 	l.start = l.pos
221 | 	switch r := l.next(); {
222 | 	case isAlphaNumeric(r) || isWildcard(r) || isEscape(r):
223 | 		l.backup()
224 | 		return lexWord
225 | 	case isSymbol(r):
226 | 		return l.emit(symbols[r])
227 | 	// special case minus sign since it can be a negative number or a minus
228 | 	case r == '-':
229 | 		if !unicode.IsDigit(l.peek()) {
230 | 			return l.emit(TMinus)
231 | 		}
232 | 		l.backup()
233 | 		return lexWord
234 | 
235 | 	case r == '"' || r == '\'':
236 | 		l.backup()
237 | 		return lexPhrase
238 | 	case r == '/':
239 | 		l.backup()
240 | 		return lexRegexp
241 | 	default:
242 | 		l.errorf("error parsing token [%s]", string(r))
243 | 	}
244 | 	return nil
245 | }
246 | 
247 | func lexPhrase(l *Lexer) tokenStateFn {
248 | 	open := l.next()
249 | 
250 | 	for {
251 | 		switch r := l.next(); {
252 | 		case isAlphaNumeric(r) || isWildcard(r) || isEscape(r):
253 | 			// do nothing
254 | 		case r == ' ' || r == '\t' || r == '\r' || r == '\n':
255 | 			// do nothing
256 | 		case r == open:
257 | 			return l.emit(TQuoted)
258 | 		case r == eof:
259 | 			return l.errorf("unterminated quote")
260 | 		}
261 | 	}
262 | }
263 | 
264 | func lexRegexp(l *Lexer) tokenStateFn {
265 | 	// theoretically allow us to use anything to specify a regexp
266 | 	open := l.next()
267 | 
268 | 	for {
269 | 		switch r := l.next(); {
270 | 		case isAlphaNumeric(r) || isWildcard(r):
271 | 			// do nothing
272 | 		case isEscape(r):
273 | 			l.next() // just ignore the next character
274 | 		case r == ' ' || r == '\t' || r == '\r' || r == '\n':
275 | 			// do nothing
276 | 		case r == open:
277 | 			return l.emit(TRegexp)
278 | 		case r == eof:
279 | 			return l.errorf("unterminated regexp")
280 | 		}
281 | 	}
282 | }
283 | 
284 | func lexWord(l *Lexer) tokenStateFn {
285 | loop:
286 | 	for {
287 | 		switch r := l.next(); {
288 | 		case isAlphaNumeric(r) || isWildcard(r) || r == '.' || r == '-':
289 | 			// do nothing
290 | 		case isEscape(r):
291 | 			l.next() // just ignore the next character
292 | 		default:
293 | 			l.backup()
294 | 			break loop
295 | 		}
296 | 	}
297 | 
298 | 	switch strings.ToUpper(l.currWord()) {
299 | 	case "AND":
300 | 		return l.emit(TAnd)
301 | 	case "OR":
302 | 		return l.emit(TOr)
303 | 	case "NOT":
304 | 		return l.emit(TNot)
305 | 	case "TO":
306 | 		return l.emit(TTO)
307 | 	}
308 | 	return l.emit(TLiteral)
309 | }
310 | 
311 | func (l *Lexer) currWord() string {
312 | 	return l.input[l.start:l.pos]
313 | }
314 | 
315 | // toTok returns the item at the current input point with the specified type
316 | // and advances the input.
317 | func (l *Lexer) toTok(t TokType) Token {
318 | 	i := Token{
319 | 		Typ: t,
320 | 		pos: l.start,
321 | 		Val: l.input[l.start:l.pos],
322 | 	}
323 | 	// update the lexer's start for the next token to be the current position
324 | 	l.start = l.pos
325 | 	return i
326 | }
327 | 
328 | // emit passes the trailing text as an item back to the parser.
329 | func (l *Lexer) emit(t TokType) tokenStateFn {
330 | 	l.currItem = l.toTok(t)
331 | 	return nil
332 | }
333 | 
334 | // next moves one rune forward in the input string and returns the consumed rune
335 | func (l *Lexer) next() rune {
336 | 	if int(l.pos) >= len(l.input) {
337 | 		l.atEOF = true
338 | 		return eof
339 | 	}
340 | 	r, width := utf8.DecodeRuneInString(l.input[l.pos:])
341 | 	l.pos += width
342 | 	return r
343 | }
344 | 
345 | // peek returns but does not consume the next rune in the input.
346 | func (l *Lexer) peek() rune {
347 | 	r := l.next()
348 | 	l.backup()
349 | 	return r
350 | }
351 | 
352 | // backup steps back one rune.
353 | func (l *Lexer) backup() {
354 | 	if !l.atEOF && l.pos > 0 {
355 | 		_, width := utf8.DecodeLastRuneInString(l.input[:l.pos])
356 | 		l.pos -= width
357 | 	}
358 | }
359 | 
360 | // errorf returns an error token and terminates the scan by passing
361 | // back a nil pointer that will be the next state, terminating l.nextToken.
362 | func (l *Lexer) errorf(format string, args ...any) tokenStateFn {
363 | 	l.currItem = Token{
364 | 		Typ: TErr,
365 | 		pos: l.start,
366 | 		Val: fmt.Sprintf(format, args...),
367 | 	}
368 | 	l.start = 0
369 | 	l.pos = 0
370 | 	l.input = l.input[:0]
371 | 	return nil
372 | }
373 | 
374 | // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
375 | func isAlphaNumeric(r rune) bool {
376 | 	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
377 | }
378 | 
379 | // isWildcard checks whether the string contains any wildcard characters.
380 | func isWildcard(r rune) bool {
381 | 	return r == '*' || r == '?'
382 | }
383 | 
384 | // isSpace reports whether r is a space character.
385 | func isSpace(r rune) bool {
386 | 	return r == ' ' || r == '\t' || r == '\r' || r == '\n'
387 | }
388 | 
389 | // isEscape checks whether the character is an escape character
390 | func isEscape(r rune) bool {
391 | 	return r == '\\'
392 | }
393 | 
394 | // isSymbol checks whether the run is one of the reserved symbols
395 | func isSymbol(r rune) bool {
396 | 	_, found := symbols[r]
397 | 	return found
398 | }
399 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/pkg/lucene/reduce/reduce.go:
--------------------------------------------------------------------------------
  1 | package reduce
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strconv"
  6 | 
  7 | 	"github.com/grindlemire/go-lucene/internal/lex"
  8 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
  9 | )
 10 | 
 11 | // Reduce will reduce the elems and nonTerminals stacks using the available reducers and return
 12 | // those slices modified to contain the reduced expressions. The elems will contain the reduced
 13 | // expression the the nonTerminals will contain the modified stack of nonTerminals yet to be reduced.
 14 | func Reduce(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
 15 | 	for _, reducer := range reducers {
 16 | 		elems, nonTerminals, reduced := reducer(elems, nonTerminals, defaultField)
 17 | 		if reduced {
 18 | 			return elems, nonTerminals, true
 19 | 		}
 20 | 	}
 21 | 	return elems, nonTerminals, false
 22 | }
 23 | 
 24 | type reducer func(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool)
 25 | 
 26 | // reducers are the reducers that will be executed during the grammar parsing
 27 | var reducers = []reducer{
 28 | 	and,
 29 | 	or,
 30 | 	fuzzy,
 31 | 	boost,
 32 | 	equal,
 33 | 	compare,
 34 | 	compareEq,
 35 | 	not,
 36 | 	sub,
 37 | 	must,
 38 | 	mustNot,
 39 | 	rangeop,
 40 | }
 41 | 
 42 | func equal(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
 43 | 	if len(elems) != 3 {
 44 | 		return elems, nonTerminals, false
 45 | 	}
 46 | 
 47 | 	// ensure the middle token is an equals
 48 | 	tok, ok := elems[1].(lex.Token)
 49 | 	if !ok || (tok.Typ != lex.TEqual && tok.Typ != lex.TColon) {
 50 | 		return elems, nonTerminals, false
 51 | 	}
 52 | 
 53 | 	// make sure the left is a literal and right is an expression
 54 | 	term, ok := elems[0].(*expr.Expression)
 55 | 	if !ok {
 56 | 		return elems, nonTerminals, false
 57 | 	}
 58 | 	value, ok := elems[2].(*expr.Expression)
 59 | 	if !ok {
 60 | 		return elems, nonTerminals, false
 61 | 	}
 62 | 
 63 | 	if literals, ok := isChainedOrLiterals(value); ok && len(literals) > 1 {
 64 | 		elems = []any{
 65 | 			expr.IN(
 66 | 				term,
 67 | 				expr.LIST(literals),
 68 | 			),
 69 | 		}
 70 | 	} else {
 71 | 		elems = []any{
 72 | 			expr.Eq(
 73 | 				term,
 74 | 				value,
 75 | 			),
 76 | 		}
 77 | 	}
 78 | 	// we consumed one terminal, the =
 79 | 	return elems, drop(nonTerminals, 1), true
 80 | }
 81 | 
 82 | func isChainedOrLiterals(in *expr.Expression) (out []*expr.Expression, ok bool) {
 83 | 	if in == nil {
 84 | 		return out, false
 85 | 	}
 86 | 
 87 | 	if in.Op == expr.Literal {
 88 | 		return []*expr.Expression{in}, true
 89 | 	}
 90 | 
 91 | 	if in.Op == expr.Or {
 92 | 		left, ok := in.Left.(*expr.Expression)
 93 | 		if !ok {
 94 | 			return out, false
 95 | 		}
 96 | 		right, ok := in.Right.(*expr.Expression)
 97 | 		if !ok {
 98 | 			return out, false
 99 | 		}
100 | 
101 | 		l, isLLiterals := isChainedOrLiterals(left)
102 | 		r, isRLiterals := isChainedOrLiterals(right)
103 | 		return append(l, r...), isLLiterals && isRLiterals
104 | 	}
105 | 
106 | 	return out, false
107 | }
108 | 
109 | func compare(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
110 | 	if len(elems) != 4 {
111 | 		return elems, nonTerminals, false
112 | 	}
113 | 
114 | 	// ensure our middle tokens start with a colon
115 | 	tok, ok := elems[1].(lex.Token)
116 | 	if !ok || (tok.Typ != lex.TColon) {
117 | 		return elems, nonTerminals, false
118 | 	}
119 | 
120 | 	// ensure the colon is followed by a > or <
121 | 	tokCmp, ok := elems[2].(lex.Token)
122 | 	if !ok || (tokCmp.Typ != lex.TGreater && tokCmp.Typ != lex.TLess) {
123 | 		return elems, nonTerminals, false
124 | 	}
125 | 
126 | 	// make sure the left is a literal and right is an expression
127 | 	term, ok := elems[0].(*expr.Expression)
128 | 	if !ok {
129 | 		return elems, nonTerminals, false
130 | 	}
131 | 	value, ok := elems[3].(*expr.Expression)
132 | 	if !ok {
133 | 		return elems, nonTerminals, false
134 | 	}
135 | 
136 | 	if tokCmp.Typ == lex.TGreater {
137 | 		elems = []any{
138 | 			expr.GREATER(
139 | 				term,
140 | 				value,
141 | 			),
142 | 		}
143 | 	} else {
144 | 		elems = []any{
145 | 			expr.LESS(
146 | 				term,
147 | 				value,
148 | 			),
149 | 		}
150 | 	}
151 | 
152 | 	return elems, drop(nonTerminals, 2), true
153 | }
154 | 
155 | func compareEq(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
156 | 	if len(elems) != 5 {
157 | 		return elems, nonTerminals, false
158 | 	}
159 | 
160 | 	// ensure our middle tokens start with a colon
161 | 	tok, ok := elems[1].(lex.Token)
162 | 	if !ok || (tok.Typ != lex.TColon) {
163 | 		return elems, nonTerminals, false
164 | 	}
165 | 
166 | 	// ensure the colon is followed by a > or <
167 | 	tokCmp, ok := elems[2].(lex.Token)
168 | 	if !ok || (tokCmp.Typ != lex.TGreater && tokCmp.Typ != lex.TLess) {
169 | 		return elems, nonTerminals, false
170 | 	}
171 | 
172 | 	// ensure the middle tokens are followed by an =
173 | 	tokEp, ok := elems[3].(lex.Token)
174 | 	if !ok || (tokEp.Typ != lex.TEqual) {
175 | 		return elems, nonTerminals, false
176 | 	}
177 | 
178 | 	// make sure the left is a literal and right is an expression
179 | 	term, ok := elems[0].(*expr.Expression)
180 | 	if !ok {
181 | 		return elems, nonTerminals, false
182 | 	}
183 | 	value, ok := elems[4].(*expr.Expression)
184 | 	if !ok {
185 | 		return elems, nonTerminals, false
186 | 	}
187 | 
188 | 	if tokCmp.Typ == lex.TGreater {
189 | 		elems = []any{
190 | 			expr.GREATEREQ(
191 | 				term,
192 | 				value,
193 | 			),
194 | 		}
195 | 	} else {
196 | 		elems = []any{
197 | 			expr.LESSEQ(
198 | 				term,
199 | 				value,
200 | 			),
201 | 		}
202 | 	}
203 | 
204 | 	return elems, drop(nonTerminals, 3), true
205 | 
206 | }
207 | 
208 | func and(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
209 | 	// if we don't have 3 items in the buffer it's not an AND clause
210 | 	if len(elems) != 3 {
211 | 		return elems, nonTerminals, false
212 | 	}
213 | 
214 | 	// if the middle token is not an AND token do nothing
215 | 	operatorToken, ok := elems[1].(lex.Token)
216 | 	if !ok || operatorToken.Typ != lex.TAnd {
217 | 		return elems, nonTerminals, false
218 | 	}
219 | 
220 | 	// make sure the left and right clauses are expressions
221 | 	left, ok := elems[0].(*expr.Expression)
222 | 	if !ok {
223 | 		return elems, nonTerminals, false
224 | 	}
225 | 	right, ok := elems[2].(*expr.Expression)
226 | 	if !ok {
227 | 		return elems, nonTerminals, false
228 | 	}
229 | 
230 | 	// we have a valid AND clause. Replace it in the stack
231 | 	elems = []any{
232 | 		expr.AND(
233 | 			wrapLiteral(left, defaultField),
234 | 			wrapLiteral(right, defaultField),
235 | 		),
236 | 	}
237 | 	// we consumed one terminal, the AND
238 | 	return elems, drop(nonTerminals, 1), true
239 | }
240 | 
241 | func or(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
242 | 	// if we don't have 3 items in the buffer it's not an OR clause
243 | 	if len(elems) != 3 {
244 | 		return elems, nonTerminals, false
245 | 	}
246 | 
247 | 	// if the middle token is not an OR token do nothing
248 | 	operatorToken, ok := elems[1].(lex.Token)
249 | 	if !ok || operatorToken.Typ != lex.TOr {
250 | 		return elems, nonTerminals, false
251 | 	}
252 | 
253 | 	// make sure the left and right clauses are expressions
254 | 	left, ok := elems[0].(*expr.Expression)
255 | 	if !ok {
256 | 		return elems, nonTerminals, false
257 | 	}
258 | 	right, ok := elems[2].(*expr.Expression)
259 | 	if !ok {
260 | 		return elems, nonTerminals, false
261 | 	}
262 | 
263 | 	// we have a valid OR clause. Replace it in the stack
264 | 	elems = []any{
265 | 		expr.OR(
266 | 			wrapLiteral(left, defaultField),
267 | 			wrapLiteral(right, defaultField),
268 | 		),
269 | 	}
270 | 	// we consumed one terminal, the OR
271 | 	return elems, drop(nonTerminals, 1), true
272 | }
273 | 
274 | func not(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
275 | 	if len(elems) < 2 {
276 | 		return elems, nonTerminals, false
277 | 	}
278 | 
279 | 	// if the second to last token is not the NOT operator do nothing
280 | 	operatorToken, ok := elems[len(elems)-2].(lex.Token)
281 | 	if !ok || operatorToken.Typ != lex.TNot {
282 | 		return elems, nonTerminals, false
283 | 	}
284 | 
285 | 	// make sure the thing to be negated is already a parsed
286 | 	negated, ok := elems[len(elems)-1].(*expr.Expression)
287 | 	if !ok {
288 | 		return elems, nonTerminals, false
289 | 	}
290 | 
291 | 	elems = elems[:len(elems)-2]
292 | 	elems = append(elems,
293 | 		expr.NOT(
294 | 			wrapLiteral(negated, defaultField),
295 | 		),
296 | 	)
297 | 	// we consumed one terminal, the NOT
298 | 	return elems, drop(nonTerminals, 1), true
299 | }
300 | 
301 | func sub(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
302 | 	// all the internal terms should have reduced by the time we hit this reducer
303 | 	if len(elems) != 3 {
304 | 		return elems, nonTerminals, false
305 | 	}
306 | 
307 | 	open, ok := elems[0].(lex.Token)
308 | 	if !ok || open.Typ != lex.TLParen {
309 | 		return elems, nonTerminals, false
310 | 	}
311 | 
312 | 	closed, ok := elems[len(elems)-1].(lex.Token)
313 | 	if !ok || closed.Typ != lex.TRParen {
314 | 		return elems, nonTerminals, false
315 | 	}
316 | 
317 | 	// we consumed two terminals, the ( and )
318 | 	return []any{elems[1]}, drop(nonTerminals, 2), true
319 | }
320 | 
321 | func must(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
322 | 	if len(elems) != 2 {
323 | 		return elems, nonTerminals, false
324 | 	}
325 | 
326 | 	must, ok := elems[0].(lex.Token)
327 | 	if !ok || must.Typ != lex.TPlus {
328 | 		return elems, nonTerminals, false
329 | 	}
330 | 
331 | 	rest, ok := elems[1].(*expr.Expression)
332 | 	if !ok {
333 | 		return elems, nonTerminals, false
334 | 	}
335 | 
336 | 	// we consumed 1 terminal, the +
337 | 	return []any{expr.MUST(rest)}, drop(nonTerminals, 1), true
338 | }
339 | 
340 | func mustNot(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
341 | 	if len(elems) != 2 {
342 | 		return elems, nonTerminals, false
343 | 	}
344 | 
345 | 	must, ok := elems[0].(lex.Token)
346 | 	if !ok || must.Typ != lex.TMinus {
347 | 		return elems, nonTerminals, false
348 | 	}
349 | 
350 | 	rest, ok := elems[1].(*expr.Expression)
351 | 	if !ok {
352 | 		return elems, nonTerminals, false
353 | 	}
354 | 	// we consumed one terminal, the -
355 | 	return []any{expr.MUSTNOT(rest)}, drop(nonTerminals, 1), true
356 | }
357 | 
358 | func fuzzy(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
359 | 	if len(elems) < 2 {
360 | 		return elems, nonTerminals, false
361 | 	}
362 | 
363 | 	tilde, ok := elems[1].(lex.Token)
364 | 	if !ok || tilde.Typ != lex.TTilde {
365 | 		return elems, nonTerminals, false
366 | 	}
367 | 
368 | 	rest, ok := elems[0].(*expr.Expression)
369 | 	if !ok {
370 | 		return elems, nonTerminals, false
371 | 	}
372 | 
373 | 	// If we have exactly 2 elements, use implicit distance of 1
374 | 	if len(elems) == 2 {
375 | 		return []any{expr.FUZZY(rest, 1)}, drop(nonTerminals, 1), true
376 | 	}
377 | 
378 | 	// We have 3+ elements. Check if elems[2] is a valid numeric distance
379 | 	if distance, ok := elems[2].(*expr.Expression); ok {
380 | 		if idistance, err := strconv.Atoi(distance.String()); err == nil {
381 | 			return []any{expr.FUZZY(rest, idistance)}, drop(nonTerminals, 1), true
382 | 		}
383 | 		// elems[2] is an Expression but not a valid numeric distance
384 | 		// This means we have [expr, ~, non-numeric-expr] which should be reduced
385 | 		// to [FUZZY(expr, 1), non-numeric-expr] so the parser can inject an implicit AND
386 | 		result := append([]any{expr.FUZZY(rest, 1)}, elems[2:]...)
387 | 		return result, drop(nonTerminals, 1), true
388 | 	}
389 | 
390 | 	// elems[2] is NOT an Expression (might be a Token or something else)
391 | 	// This means we have [expr, ~, token/other] - reduce just [expr, ~] with implicit distance
392 | 	// The token/other will be handled in the next reduce cycle
393 | 	result := append([]any{expr.FUZZY(rest, 1)}, elems[2:]...)
394 | 	return result, drop(nonTerminals, 1), true
395 | }
396 | 
397 | func boost(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
398 | 	if len(elems) < 2 {
399 | 		return elems, nonTerminals, false
400 | 	}
401 | 
402 | 	carrot, ok := elems[1].(lex.Token)
403 | 	if !ok || carrot.Typ != lex.TCarrot {
404 | 		return elems, nonTerminals, false
405 | 	}
406 | 
407 | 	rest, ok := elems[0].(*expr.Expression)
408 | 	if !ok {
409 | 		return elems, nonTerminals, false
410 | 	}
411 | 
412 | 	if len(elems) == 2 {
413 | 		return []any{expr.BOOST(rest, 1.0)}, drop(nonTerminals, 1), true
414 | 	}
415 | 
416 | 	// We have 3+ elements. Check if elems[2] is a valid numeric power
417 | 	if power, ok := elems[2].(*expr.Expression); ok {
418 | 		if fpower, err := toPositiveFloat(power.String()); err == nil {
419 | 			// Valid power - reduce all 3 elements
420 | 			return []any{expr.BOOST(rest, fpower)}, drop(nonTerminals, 1), true
421 | 		}
422 | 	}
423 | 
424 | 	// elems[2] is NOT a valid power - reduce just [expr, ^] with implicit power
425 | 	// Return the remaining elements to stay on the stack for further processing
426 | 	result := append([]any{expr.BOOST(rest, 1.0)}, elems[2:]...)
427 | 	return result, drop(nonTerminals, 1), true
428 | }
429 | 
430 | func rangeop(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) {
431 | 	// we need a term, :, [, begin, TO, end, ] to have a range operator which is 7 elems
432 | 	if len(elems) != 7 {
433 | 		return elems, nonTerminals, false
434 | 	}
435 | 
436 | 	colon, ok := elems[1].(lex.Token)
437 | 	if !ok || colon.Typ != lex.TColon {
438 | 		return elems, nonTerminals, false
439 | 	}
440 | 
441 | 	open, ok := elems[2].(lex.Token)
442 | 	if !ok || (open.Typ != lex.TLSquare && open.Typ != lex.TLCurly) {
443 | 		return elems, nonTerminals, false
444 | 	}
445 | 
446 | 	closed, ok := elems[6].(lex.Token)
447 | 	if !ok || (closed.Typ != lex.TRSquare && closed.Typ != lex.TRCurly) {
448 | 		return elems, nonTerminals, false
449 | 	}
450 | 
451 | 	to, ok := elems[4].(lex.Token)
452 | 	if !ok || to.Typ != lex.TTO {
453 | 		return elems, nonTerminals, false
454 | 	}
455 | 
456 | 	term, ok := elems[0].(*expr.Expression)
457 | 	if !ok {
458 | 		return elems, nonTerminals, false
459 | 	}
460 | 
461 | 	start, ok := elems[3].(*expr.Expression)
462 | 	if !ok {
463 | 		return elems, nonTerminals, false
464 | 	}
465 | 
466 | 	end, ok := elems[5].(*expr.Expression)
467 | 	if !ok {
468 | 		return elems, nonTerminals, false
469 | 	}
470 | 
471 | 	// we consumed four terminals, the :, [, TO, and ]
472 | 	return []any{expr.Rang(
473 | 		term, start, end, (open.Typ == lex.TLSquare && closed.Typ == lex.TRSquare),
474 | 	)}, drop(nonTerminals, 4), true
475 | }
476 | 
477 | func drop[T any](stack []T, i int) []T {
478 | 	return stack[:len(stack)-i]
479 | }
480 | 
481 | func toPositiveFloat(in string) (f float64, err error) {
482 | 	i, err := strconv.Atoi(in)
483 | 	if err == nil && i > 0 {
484 | 		return float64(i), nil
485 | 	}
486 | 
487 | 	pf, err := strconv.ParseFloat(in, 64)
488 | 	if err == nil && pf > 0 {
489 | 		return float64(pf), nil
490 | 	}
491 | 
492 | 	return f, fmt.Errorf("[%v] is not a positive float", in)
493 | }
494 | 
495 | // wrapLiteral will wrap a literal expression in an equals expression for a defaultField.
496 | // we need this because we want to support lucene expressions like a:b AND "c" which needs a default
497 | // field to compare "c" against to be valid.
498 | func wrapLiteral(lit *expr.Expression, field string) *expr.Expression {
499 | 	if lit.Op == expr.Literal && field != "" {
500 | 		return expr.Eq(expr.Column(field), lit)
501 | 	}
502 | 	return lit
503 | }
504 | 


--------------------------------------------------------------------------------
/pkg/lucene/expr/expression.go:
--------------------------------------------------------------------------------
  1 | package expr
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"strconv"
  8 | 	"strings"
  9 | )
 10 | 
 11 | // Lucene Grammar:
 12 | // E ->
 13 | // 		E:E
 14 | // 		(E)
 15 | // 		+E
 16 | // 		-E
 17 | // 		E~E
 18 | // 		E^E
 19 | // 		NOT E
 20 | //      E AND E
 21 | // 		E OR E
 22 | // 		id
 23 | // 		[id TO id]
 24 | 
 25 | // Added grammar to be compatible with elastic lucene
 26 | // See https://www.elastic.co/guide/en/elasticsearch/reference/8.9/query-dsl-query-string-query.html#query-string-syntax
 27 | //      E:>E
 28 | //      E:>=E
 29 | //      E:<E
 30 | //      E:<=E
 31 | 
 32 | // Expression is an interface over all the different types of expressions
 33 | // that we can parse out of lucene
 34 | type Expression struct {
 35 | 	Left  any      `json:"left"`
 36 | 	Op    Operator `json:"-"`
 37 | 	Right any      `json:"right,omitempty"`
 38 | 
 39 | 	// these are operator specific states we have to track
 40 | 	boostPower    float64
 41 | 	fuzzyDistance int
 42 | }
 43 | 
 44 | // RangeBoundary represents the boundary conditions for a range operator
 45 | type RangeBoundary struct {
 46 | 	Min       any  `json:"min"`
 47 | 	Max       any  `json:"max"`
 48 | 	Inclusive bool `json:"inclusive"`
 49 | }
 50 | 
 51 | func (e Expression) String() string {
 52 | 	if e.Op == Undefined {
 53 | 		return ""
 54 | 	}
 55 | 	renderer, found := renderers[e.Op]
 56 | 	if !found {
 57 | 		return "ERROR: unable to render string for unsupported operator"
 58 | 	}
 59 | 	return renderer(&e, false)
 60 | }
 61 | 
 62 | // GoString prints a verbose string representation. Useful for debugging exactly
 63 | // what types were parsed. You can print this format using %#v
 64 | func (e Expression) GoString() string {
 65 | 	if e.Op == Undefined {
 66 | 		return ""
 67 | 	}
 68 | 	renderer, found := renderers[e.Op]
 69 | 	if !found {
 70 | 		return "ERROR: unable to render gostring for unsupported operator"
 71 | 	}
 72 | 	return renderer(&e, true)
 73 | }
 74 | 
 75 | // Lit represents a literal expression
 76 | func Lit(in any) *Expression {
 77 | 	return Expr(in, Literal)
 78 | }
 79 | 
 80 | // WILD represents a literal wildcard expression
 81 | func WILD(in any) *Expression {
 82 | 	return Expr(in, Wild)
 83 | }
 84 | 
 85 | // REGEXP represents a literal regular expression
 86 | func REGEXP(in any) *Expression {
 87 | 	return Expr(in, Regexp)
 88 | }
 89 | 
 90 | // Eq creates a new EQUALS expression
 91 | func Eq(a any, b any) *Expression {
 92 | 	return Expr(a, Equals, b)
 93 | }
 94 | 
 95 | func GREATER(a any, b any) *Expression {
 96 | 	return Expr(a, Greater, b)
 97 | }
 98 | 
 99 | func LESS(a any, b any) *Expression {
100 | 	return Expr(a, Less, b)
101 | }
102 | 
103 | func GREATEREQ(a any, b any) *Expression {
104 | 	return Expr(a, GreaterEq, b)
105 | }
106 | 
107 | func LESSEQ(a any, b any) *Expression {
108 | 	return Expr(a, LessEq, b)
109 | }
110 | 
111 | // LIKE creates a new fuzzy matching LIKE expression
112 | func LIKE(a any, b any) *Expression {
113 | 	return Expr(a, Like, b)
114 | }
115 | 
116 | func IN(a any, b any) *Expression {
117 | 	return Expr(a, In, b)
118 | }
119 | 
120 | func LIST(a ...any) *Expression {
121 | 	return Expr(a, List)
122 | }
123 | 
124 | // AND creates an AND expression
125 | func AND(a, b any) *Expression {
126 | 	return Expr(a, And, b)
127 | }
128 | 
129 | // OR creates a new OR expression
130 | func OR(a, b any) *Expression {
131 | 	return Expr(a, Or, b)
132 | }
133 | 
134 | // Rang creates a new range expression
135 | func Rang(term any, min, max any, inclusive bool) *Expression {
136 | 	return Expr(term, Range, min, max, inclusive)
137 | }
138 | 
139 | // NOT wraps an expression in a Not
140 | func NOT(e any) *Expression {
141 | 	return Expr(e, Not)
142 | }
143 | 
144 | // MUST wraps an expression in a Must
145 | func MUST(e any) *Expression {
146 | 	return Expr(e, Must)
147 | }
148 | 
149 | // MUSTNOT wraps an expression in a MustNot
150 | func MUSTNOT(e any) *Expression {
151 | 	return Expr(e, MustNot)
152 | }
153 | 
154 | // BOOST wraps an expression in a boost
155 | func BOOST(e any, power ...float64) *Expression {
156 | 	if len(power) > 0 {
157 | 		return Expr(e, Boost, power[0])
158 | 	}
159 | 	return Expr(e, Boost)
160 | }
161 | 
162 | // FUZZY wraps an expression in a fuzzy
163 | func FUZZY(e any, distance ...int) *Expression {
164 | 	if len(distance) > 0 {
165 | 		return Expr(e, Fuzzy, distance[0])
166 | 	}
167 | 	return Expr(e, Fuzzy)
168 | }
169 | 
170 | // IsExpr checks if the input is an expression
171 | func IsExpr(in any) bool {
172 | 	_, isExpr := in.(*Expression)
173 | 	return isExpr
174 | }
175 | 
176 | // Validate validates the expression is correctly structured.
177 | func Validate(in any) (err error) {
178 | 	e, isExpr := in.(*Expression)
179 | 	if !isExpr {
180 | 		// if we don't have an expression we must be in a leaf node
181 | 		return nil
182 | 	}
183 | 
184 | 	fn, found := validators[e.Op]
185 | 	if !found {
186 | 		return fmt.Errorf("unsupported operator %v", e.Op)
187 | 	}
188 | 	err = fn(e)
189 | 	if err != nil {
190 | 		return err
191 | 	}
192 | 
193 | 	err = Validate(e.Left)
194 | 	if err != nil {
195 | 		return err
196 | 	}
197 | 
198 | 	return Validate(e.Right)
199 | }
200 | 
201 | // Column represents a column in sql. It will not be escaped by quotes in the sql rendering
202 | type Column string
203 | 
204 | // GoString is a debug print for the column type
205 | func (c Column) GoString() string {
206 | 	return fmt.Sprintf("COLUMN(%s)", c)
207 | }
208 | 
209 | // Expr creates a general new expression. The other public functions are just helpers that call this
210 | // function underneath.
211 | func Expr(left any, op Operator, right ...any) *Expression {
212 | 	if isStringlike(left) && operatesOnColumn(op) {
213 | 		left = wrapInColumn(left)
214 | 	}
215 | 
216 | 	if isLiteral(left) && op != Literal && op != Wild && op != Regexp {
217 | 		left = literalToExpr(left)
218 | 	}
219 | 
220 | 	e := ptr(empty())
221 | 	e.Left = left
222 | 	e.Op = op
223 | 
224 | 	// support using a like operator with wildcards or regex
225 | 	if op == Equals && len(right) == 1 && shouldUseLikeOperator(right[0]) {
226 | 		e.Op = Like
227 | 		e.Right = right[0].(*Expression)
228 | 		return e
229 | 	}
230 | 
231 | 	// support changing boost power
232 | 	if op == Boost {
233 | 		e.boostPower = 1.0
234 | 		if len(right) == 1 && isFloat(right[0]) {
235 | 			e.boostPower = right[0].(float64)
236 | 		}
237 | 		return e
238 | 	}
239 | 
240 | 	// support changing fuzzy distance
241 | 	if op == Fuzzy {
242 | 		e.fuzzyDistance = 1
243 | 		if len(right) == 1 && isInt(right[0]) {
244 | 			e.fuzzyDistance = right[0].(int)
245 | 		}
246 | 		return e
247 | 	}
248 | 
249 | 	// support passing a range with inclusivity
250 | 	if op == Range && len(right) == 3 && isBool(right[2]) {
251 | 		e.Right = &RangeBoundary{
252 | 			Min:       literalToExpr(right[0]),
253 | 			Max:       literalToExpr(right[1]),
254 | 			Inclusive: right[2].(bool),
255 | 		}
256 | 		return e
257 | 	}
258 | 
259 | 	// support passing a slice to an IN operator
260 | 	if op == In && len(right) > 0 {
261 | 		e.Right = right[0].(*Expression)
262 | 		return e
263 | 	}
264 | 
265 | 	if op == List {
266 | 		// super gross but this is how go handles any types that are slices
267 | 		slice, isSlice := left.([]any)[0].([]*Expression)
268 | 		if isSlice {
269 | 			e.Left = slice
270 | 			return e
271 | 		}
272 | 
273 | 		l := left.([]any)
274 | 		vals := []*Expression{}
275 | 		for _, v := range l {
276 | 			vals = append(vals, v.(*Expression))
277 | 		}
278 | 		e.Left = vals
279 | 		return e
280 | 	}
281 | 
282 | 	// if right is present and non nil then add it to the expression
283 | 	if len(right) >= 1 && right[0] != nil {
284 | 		if isLiteral(right[0]) {
285 | 			right[0] = literalToExpr(right[0])
286 | 		}
287 | 
288 | 		e.Right = right[0]
289 | 	}
290 | 
291 | 	return e
292 | }
293 | 
294 | type jsonExpression struct {
295 | 	Left     json.RawMessage `json:"left"`
296 | 	Operator string          `json:"operator"`
297 | 	Right    json.RawMessage `json:"right,omitempty"`
298 | 
299 | 	RangeBoundary *RangeBoundary `json:"boundaries,omitempty"`
300 | 	FuzzyDistance *int           `json:"distance,omitempty"`
301 | 	BoostPower    *float64       `json:"power,omitempty"`
302 | }
303 | 
304 | // MarshalJSON is a custom JSON serialization for the Expression
305 | func (e Expression) MarshalJSON() (out []byte, err error) {
306 | 	// if we are in a leaf node just marshal the value
307 | 	if e.Op == Literal || e.Op == Wild || e.Op == Regexp {
308 | 		return json.Marshal(e.Left)
309 | 	}
310 | 
311 | 	leftRaw, err := json.Marshal(e.Left)
312 | 	if err != nil {
313 | 		return out, err
314 | 	}
315 | 
316 | 	c := jsonExpression{
317 | 		Left:     leftRaw,
318 | 		Operator: toString[e.Op],
319 | 	}
320 | 
321 | 	// this is dumb but we need it so our "null" is not event given. Otherwise the json serialization
322 | 	// will persist a null value.
323 | 	if e.Right != nil {
324 | 		rightRaw, err := json.Marshal(e.Right)
325 | 		if err != nil {
326 | 			return out, err
327 | 		}
328 | 		c.Right = rightRaw
329 | 	}
330 | 
331 | 	if e.boostPower != 1.0 {
332 | 		c.BoostPower = &e.boostPower
333 | 	}
334 | 
335 | 	if e.fuzzyDistance != 1 {
336 | 		c.FuzzyDistance = &e.fuzzyDistance
337 | 	}
338 | 
339 | 	return json.Marshal(c)
340 | }
341 | 
342 | // UnmarshalJSON is a custom JSON deserialization for the Expression
343 | func (e *Expression) UnmarshalJSON(data []byte) (err error) {
344 | 	// initalize our default values, e cannot be nil here.
345 | 	*e = empty()
346 | 	// if this does not look like an object it must be a literal
347 | 	if !isJSONObject(json.RawMessage(data)) {
348 | 		Expr, err := unmarshalLiteral(json.RawMessage(data))
349 | 		// this is required because apparently you can't swap pointers to your receiver mid method
350 | 		*e = *Expr
351 | 		return err
352 | 	}
353 | 
354 | 	// unmarshal the current layer in the json first, then worry about
355 | 	// the left and right hand subobjects
356 | 	var c jsonExpression
357 | 	err = json.Unmarshal(data, &c)
358 | 	if err != nil {
359 | 		return err
360 | 	}
361 | 
362 | 	// check if it is an array so we can parse it into literals
363 | 	if isArray(json.RawMessage(c.Left)) {
364 | 		var l []json.RawMessage
365 | 		err = json.Unmarshal(c.Left, &l)
366 | 		if err != nil {
367 | 			return err
368 | 		}
369 | 
370 | 		exprs := []*Expression{}
371 | 		for _, v := range l {
372 | 			parsedExp, err := unmarshalLiteral(v)
373 | 			if err != nil {
374 | 				return err
375 | 			}
376 | 			exprs = append(exprs, parsedExp)
377 | 		}
378 | 		e.Left = exprs
379 | 	} else {
380 | 		e.Left = ptr(empty())
381 | 		err = json.Unmarshal(c.Left, e.Left)
382 | 		if err != nil {
383 | 			return err
384 | 		}
385 | 	}
386 | 
387 | 	e.Op = fromString[c.Operator]
388 | 
389 | 	// if the left hand side is a string then it must be a column
390 | 	if isStringlike(e.Left) && operatesOnColumn(e.Op) {
391 | 		e.Left = wrapInColumn(e.Left)
392 | 	}
393 | 
394 | 	if len(c.Right) > 0 && looksLikeRangeBoundary(c.Right) {
395 | 		var boundary RangeBoundary
396 | 		err = json.Unmarshal(c.Right, &boundary)
397 | 		if err != nil {
398 | 			return err
399 | 		}
400 | 		if !IsExpr(boundary.Min) {
401 | 			boundary.Min = literalToExpr(toIntIfNecessary(boundary.Min))
402 | 		}
403 | 
404 | 		if !IsExpr(boundary.Max) {
405 | 			boundary.Max = literalToExpr(toIntIfNecessary(boundary.Max))
406 | 		}
407 | 		e.Right = &boundary
408 | 	} else if len(c.Right) > 0 {
409 | 		e.Right = ptr(empty())
410 | 		err = json.Unmarshal(c.Right, e.Right)
411 | 		if err != nil {
412 | 			return err
413 | 		}
414 | 	}
415 | 
416 | 	if e.Op == Fuzzy {
417 | 		e.fuzzyDistance = 1
418 | 		if c.FuzzyDistance != nil {
419 | 			e.fuzzyDistance = *c.FuzzyDistance
420 | 		}
421 | 	}
422 | 
423 | 	if e.Op == Boost {
424 | 		e.boostPower = 1.0
425 | 		if c.BoostPower != nil {
426 | 			e.boostPower = *c.BoostPower
427 | 		}
428 | 	}
429 | 
430 | 	return nil
431 | }
432 | 
433 | func unmarshalLiteral(in json.RawMessage) (e *Expression, err error) {
434 | 	e = ptr(empty())
435 | 
436 | 	// check if it is an int first because all ints can be parsed as floats
437 | 	i, err := strconv.Atoi(string(in))
438 | 	if err == nil {
439 | 		return Lit(i), nil
440 | 	}
441 | 
442 | 	// check if it is a float
443 | 	f, err := strconv.ParseFloat(string(in), 64)
444 | 	if err == nil {
445 | 		return Lit(f), nil
446 | 	}
447 | 
448 | 	// we know it is some sort of string so decode it
449 | 	var s string
450 | 	err = json.Unmarshal(in, &s)
451 | 	if err != nil {
452 | 		return e, err
453 | 	}
454 | 
455 | 	return literalToExpr(s), nil
456 | }
457 | 
458 | func isArray(in json.RawMessage) bool {
459 | 	trimmed := bytes.TrimSpace(in)
460 | 	if len(trimmed) == 0 {
461 | 		return false
462 | 	}
463 | 
464 | 	return trimmed[0] == '[' && trimmed[len(trimmed)-1] == ']'
465 | }
466 | 
467 | // looksLikeRangeBoundary checks whether the marshalled json has the keys for a range boundary.
468 | // This is a hack but we need to know whether to unmarshal an expression or a range boundary.
469 | func looksLikeRangeBoundary(in json.RawMessage) bool {
470 | 	// strip all the whitespace out of the input
471 | 	s := strings.Join(strings.Fields(string(in)), "")
472 | 
473 | 	return strings.Contains(s, "\"min\":") &&
474 | 		strings.Contains(s, "\"max\":") &&
475 | 		!strings.Contains(s, "\"left\":")
476 | }
477 | 
478 | func literalToExpr(in any) *Expression {
479 | 	if IsExpr(in) {
480 | 		return in.(*Expression)
481 | 	}
482 | 
483 | 	s, isStr := in.(string)
484 | 	if !isStr {
485 | 		return Lit(in)
486 | 	}
487 | 
488 | 	// if it has leading and trailing /'s then it probably is a regex.
489 | 	// Note this needs to be checked before the wildcard check as a regex
490 | 	// can contain * and ?.
491 | 	// TODO this should probably check for escaping
492 | 	if s[0] == '/' && s[len(s)-1] == '/' {
493 | 		return REGEXP(s)
494 | 	}
495 | 
496 | 	// if it contains a * or ? then it probably is a wildcard expression
497 | 	// TODO this should probably check for escaping
498 | 	if strings.ContainsAny(s, "*?") {
499 | 		return WILD(s)
500 | 	}
501 | 
502 | 	return Lit(s)
503 | }
504 | 
505 | func isJSONObject(in json.RawMessage) bool {
506 | 	trimmed := bytes.TrimSpace(in)
507 | 	if len(trimmed) == 0 {
508 | 		return false
509 | 	}
510 | 
511 | 	return trimmed[0] == '{' && trimmed[len(trimmed)-1] == '}'
512 | }
513 | 
514 | // isStringLike checks if the input is a string or is a literal wrapping a string
515 | func isStringlike(in any) bool {
516 | 	_, isStr := in.(string)
517 | 	e, isExpr := in.(*Expression)
518 | 	if isExpr {
519 | 		_, isStrLiteralExpr := e.Left.(string)
520 | 		return isStrLiteralExpr
521 | 	}
522 | 
523 | 	return isStr
524 | }
525 | 
526 | // operatesOnColumn checks if an operator can be applied to a column (the left side of the operator).
527 | // Example: equal can be applied onto a column (e.g. myColumn = 'foo') but Boost (^) cannot.
528 | func operatesOnColumn(op Operator) bool {
529 | 	return op == Equals ||
530 | 		op == Range ||
531 | 		op == Greater ||
532 | 		op == Less ||
533 | 		op == GreaterEq ||
534 | 		op == LessEq ||
535 | 		op == In ||
536 | 		op == Like
537 | }
538 | 
539 | // wrapInColumn converts a string to a column and enforces column
540 | // invariants (e.g. if the column name contains a space then it must be quoted)
541 | func wrapInColumn(in any) (out *Expression) {
542 | 	s, isStr := in.(string)
543 | 	if isStr {
544 | 		return Lit(Column(s))
545 | 	}
546 | 
547 | 	e, isExpr := in.(*Expression)
548 | 	if isExpr {
549 | 		s, isStr = e.Left.(string)
550 | 		if isStr {
551 | 			return Lit(Column(s))
552 | 		}
553 | 	}
554 | 	return e
555 | }
556 | 
557 | // apparently the json unmarshal only parses float64 values so we check if the float64
558 | // is actually a whole number. If it is then make it an int
559 | func toIntIfNecessary(in any) (out any) {
560 | 	f, isFloat := in.(float64)
561 | 	if !isFloat {
562 | 		return in
563 | 	}
564 | 
565 | 	if f == float64(int(f)) {
566 | 		return int(f)
567 | 	}
568 | 
569 | 	return f
570 | }
571 | 
572 | func empty() Expression {
573 | 	return Expression{
574 | 		fuzzyDistance: 1,
575 | 		boostPower:    1.0,
576 | 	}
577 | }
578 | 
579 | func ptr[T any](in T) *T {
580 | 	return &in
581 | }
582 | 
583 | func shouldUseLikeOperator(in any) bool {
584 | 	expr, isExpr := in.(*Expression)
585 | 	if !isExpr {
586 | 		return false
587 | 	}
588 | 	return expr.Op == Wild || expr.Op == Regexp
589 | }
590 | 


--------------------------------------------------------------------------------
/parse_test.go:
--------------------------------------------------------------------------------
  1 | package lucene
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"reflect"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/grindlemire/go-lucene/pkg/lucene/expr"
  9 | )
 10 | 
 11 | const errTemplate = "%s:\n    wanted %#v\n    got    %#v"
 12 | 
 13 | func TestParseLucene(t *testing.T) {
 14 | 	type tc struct {
 15 | 		input string
 16 | 		want  *expr.Expression
 17 | 	}
 18 | 
 19 | 	tcs := map[string]tc{
 20 | 		"single_literal": {
 21 | 			input: "a",
 22 | 			want:  expr.Lit("a"),
 23 | 		},
 24 | 		"basic_equal": {
 25 | 			input: "a:b",
 26 | 			want:  expr.Eq("a", "b"),
 27 | 		},
 28 | 		"basic_equal_with_number": {
 29 | 			input: "a:5",
 30 | 			want:  expr.Eq("a", 5),
 31 | 		},
 32 | 		"basic_greater_with_number": {
 33 | 			input: "a:>22",
 34 | 			want:  expr.GREATER("a", 22),
 35 | 		},
 36 | 		"basic_greater_eq_with_number": {
 37 | 			input: "a:>=22",
 38 | 			want:  expr.GREATEREQ("a", 22),
 39 | 		},
 40 | 		"basic_less_with_number": {
 41 | 			input: "a:<22",
 42 | 			want:  expr.LESS("a", 22),
 43 | 		},
 44 | 		"basic_less_eq_with_number": {
 45 | 			input: "a:<=22",
 46 | 			want:  expr.LESSEQ("a", 22),
 47 | 		},
 48 | 		"basic_greater_less_with_number": {
 49 | 			input: "a:<22 AND b:>33",
 50 | 			want:  expr.AND(expr.LESS("a", 22), expr.GREATER("b", 33)),
 51 | 		},
 52 | 		"basic_greater_less_eq_with_number": {
 53 | 			input: "a:<=22 AND b:>=33",
 54 | 			want:  expr.AND(expr.LESSEQ("a", 22), expr.GREATEREQ("b", 33)),
 55 | 		},
 56 | 		"basic_wild_equal_with_*": {
 57 | 			input: "a:b*",
 58 | 			want:  expr.LIKE("a", "b*"),
 59 | 		},
 60 | 		"basic_wild_equal_with_?": {
 61 | 			input: "a:b?z",
 62 | 			want:  expr.LIKE("a", expr.WILD("b?z")),
 63 | 		},
 64 | 		"basic_inclusive_range": {
 65 | 			input: "a:[* TO 5]",
 66 | 			want:  expr.Rang("a", expr.WILD("*"), 5, true),
 67 | 		},
 68 | 		"basic_exclusive_range": {
 69 | 			input: "a:{* TO 5}",
 70 | 			want:  expr.Rang("a", expr.WILD("*"), 5, false),
 71 | 		},
 72 | 		"range_over_strings": {
 73 | 			input: "a:{foo TO bar}",
 74 | 			want:  expr.Rang("a", "foo", "bar", false),
 75 | 		},
 76 | 		"basic_fuzzy": {
 77 | 			input: "b AND a~",
 78 | 			want:  expr.AND("b", expr.FUZZY("a", 1)),
 79 | 		},
 80 | 		"fuzzy_power": {
 81 | 			input: "b AND a~10",
 82 | 			want:  expr.AND("b", expr.FUZZY("a", 10)),
 83 | 		},
 84 | 		"basic_boost": {
 85 | 			input: "b AND a^",
 86 | 			want:  expr.AND("b", expr.BOOST("a", 1.0)),
 87 | 		},
 88 | 		"boost_power": {
 89 | 			input: "b AND a^10",
 90 | 			want:  expr.AND("b", expr.BOOST("a", 10.0)),
 91 | 		},
 92 | 		"regexp": {
 93 | 			input: "a:/b [c]/",
 94 | 			want:  expr.Eq("a", expr.REGEXP("/b [c]/")),
 95 | 		},
 96 | 		"regexp_with_keywords": {
 97 | 			input: `a:/b "[c]/`,
 98 | 			want:  expr.Eq("a", expr.REGEXP(`/b "[c]/`)),
 99 | 		},
100 | 		"regexp_with_escaped_chars": {
101 | 			input: `url:/example.com\/foo\/bar\/.*/`,
102 | 			want:  expr.Eq("url", expr.REGEXP(`/example.com\/foo\/bar\/.*/`)),
103 | 		},
104 | 		"basic_default_AND": {
105 | 			input: "a b",
106 | 			want:  expr.AND("a", "b"),
107 | 		},
108 | 		"default_to_AND_with_subexpressions": {
109 | 			input: "a:b c:d",
110 | 			want: expr.AND(
111 | 				expr.Eq("a", "b"),
112 | 				expr.Eq("c", "d"),
113 | 			),
114 | 		},
115 | 		"basic_and": {
116 | 			input: "a AND b",
117 | 			want:  expr.AND("a", "b"),
118 | 		},
119 | 		"and_with_nesting": {
120 | 			input: "a:foo AND b:bar",
121 | 			want: expr.AND(
122 | 				expr.Eq("a", "foo"),
123 | 				expr.Eq("b", "bar"),
124 | 			),
125 | 		},
126 | 		"basic_or": {
127 | 			input: "a OR b",
128 | 			want: expr.OR(
129 | 				"a",
130 | 				"b",
131 | 			),
132 | 		},
133 | 		"range_operator_inclusive": {
134 | 			input: "a:[1 TO 5]",
135 | 			want:  expr.Rang("a", 1, 5, true),
136 | 		},
137 | 		"range_operator_inclusive_unbound": {
138 | 			input: `a:[* TO 200]`,
139 | 			want:  expr.Rang("a", expr.WILD("*"), expr.Lit(200), true),
140 | 		},
141 | 		"range_operator_exclusive": {
142 | 			input: `a:{"ab" TO "az"}`,
143 | 			want:  expr.Rang("a", expr.Lit("ab"), expr.Lit("az"), false),
144 | 		},
145 | 		"range_operator_exclusive_unbound": {
146 | 			input: `a:{2 TO *}`,
147 | 			want:  expr.Rang("a", expr.Lit(2), expr.WILD("*"), false),
148 | 		},
149 | 		"or_with_nesting": {
150 | 			input: "a:foo OR b:bar",
151 | 			want: expr.OR(
152 | 				expr.Eq("a", "foo"),
153 | 				expr.Eq("b", "bar"),
154 | 			),
155 | 		},
156 | 		"basic_not": {
157 | 			input: "NOT b",
158 | 			want:  expr.NOT("b"),
159 | 		},
160 | 		"nested_not": {
161 | 			input: "a:foo OR NOT b:bar",
162 | 			want: expr.OR(
163 | 				expr.Eq("a", "foo"),
164 | 				expr.NOT(expr.Eq("b", "bar")),
165 | 			),
166 | 		},
167 | 		"term_grouping": {
168 | 			input: "(a:foo OR b:bar) AND c:baz",
169 | 			want: expr.AND(
170 | 				expr.OR(
171 | 					expr.Eq("a", "foo"),
172 | 					expr.Eq("b", "bar"),
173 | 				),
174 | 				expr.Eq("c", "baz"),
175 | 			),
176 | 		},
177 | 		"value_grouping": {
178 | 			input: "a:(foo OR baz OR bar)",
179 | 			want: expr.IN(
180 | 				"a",
181 | 				expr.LIST(
182 | 					expr.Lit("foo"),
183 | 					expr.Lit("baz"),
184 | 					expr.Lit("bar"),
185 | 				),
186 | 			),
187 | 		},
188 | 		"basic_must": {
189 | 			input: "+a:b",
190 | 			want: expr.MUST(
191 | 				expr.Eq("a", "b"),
192 | 			),
193 | 		},
194 | 		"basic_must_not": {
195 | 			input: "-a:b",
196 | 			want: expr.MUSTNOT(
197 | 				expr.Eq("a", "b"),
198 | 			),
199 | 		},
200 | 		"basic_nested_must_not": {
201 | 			input: "d:e AND (-a:b AND +f:e)",
202 | 			want: expr.AND(
203 | 				expr.Eq("d", "e"),
204 | 				expr.AND(
205 | 					expr.MUSTNOT(expr.Eq("a", "b")),
206 | 					expr.MUST(expr.Eq("f", "e")),
207 | 				),
208 | 			),
209 | 		},
210 | 		"basic_escaping": {
211 | 			input: `a:\(1\+1\)\:2`,
212 | 			want:  expr.Eq("a", expr.Lit(`(1+1):2`)),
213 | 		},
214 | 		"escaped_column_name": {
215 | 			input: `foo\ bar:b`,
216 | 			want:  expr.Eq(`foo bar`, "b"),
217 | 		},
218 | 		"boost_key_value": {
219 | 			input: "a:b^2 AND foo",
220 | 			want: expr.AND(
221 | 				expr.BOOST(expr.Eq("a", "b"), 2),
222 | 				"foo",
223 | 			),
224 | 		},
225 | 		"boost_literal": {
226 | 			input: "foo^4",
227 | 			want:  expr.BOOST("foo", 4),
228 | 		},
229 | 		"boost_literal_in_compound": {
230 | 			input: "a:b AND foo^4",
231 | 			want: expr.AND(
232 | 				expr.Eq("a", "b"),
233 | 				expr.BOOST("foo", 4),
234 | 			),
235 | 		},
236 | 		"boost_literal_leading": {
237 | 			input: "foo^4 AND a:b",
238 | 			want: expr.AND(
239 | 				expr.BOOST("foo", 4),
240 | 				expr.Eq("a", "b"),
241 | 			),
242 | 		},
243 | 		"boost_quoted_literal": {
244 | 			input: `"foo bar"^4 AND a:b`,
245 | 			want: expr.AND(
246 | 				expr.BOOST(expr.Lit("foo bar"), 4),
247 | 				expr.Eq("a", "b"),
248 | 			),
249 | 		},
250 | 		"boost_key_implicit_power_before_term": {
251 | 			input: "color:red^ k1:v1",
252 | 			want: expr.AND(
253 | 				expr.BOOST(expr.Eq("color", "red"), 1),
254 | 				expr.Eq("k1", "v1"),
255 | 			),
256 | 		},
257 | 		"boost_key_power_before_term": {
258 | 			input: "color:red^2 k1:v1",
259 | 			want: expr.AND(
260 | 				expr.BOOST(expr.Eq("color", "red"), 2),
261 | 				expr.Eq("k1", "v1"),
262 | 			),
263 | 		},
264 | 		"boost_sub_expression": {
265 | 			input: "(title:foo OR title:bar)^1.5 AND (body:foo OR body:bar)",
266 | 			want: expr.AND(
267 | 				expr.BOOST(
268 | 					expr.OR(
269 | 						expr.Eq("title", "foo"),
270 | 						expr.Eq("title", "bar"),
271 | 					),
272 | 					1.5),
273 | 				expr.OR(
274 | 					expr.Eq("body", "foo"),
275 | 					expr.Eq("body", "bar"),
276 | 				),
277 | 			),
278 | 		},
279 | 		"nested_sub_expressions_with_boost": {
280 | 			input: "((title:foo)^1.2 OR title:bar) AND (body:foo OR body:bar)",
281 | 			want: expr.AND(
282 | 				expr.OR(
283 | 					expr.BOOST(expr.Eq("title", "foo"), 1.2),
284 | 					expr.Eq("title", "bar"),
285 | 				),
286 | 				expr.OR(
287 | 					expr.Eq("body", "foo"),
288 | 					expr.Eq("body", "bar"),
289 | 				),
290 | 			),
291 | 		},
292 | 		"nested_sub_expressions": {
293 | 			input: "((title:foo OR title:bar) AND (body:foo OR body:bar)) OR k:v",
294 | 			want: expr.OR(
295 | 				expr.AND(
296 | 					expr.OR(
297 | 						expr.Eq("title", "foo"),
298 | 						expr.Eq("title", "bar"),
299 | 					),
300 | 
301 | 					expr.OR(
302 | 						expr.Eq("body", "foo"),
303 | 						expr.Eq("body", "bar"),
304 | 					),
305 | 				),
306 | 				expr.Eq("k", "v"),
307 | 			),
308 | 		},
309 | 		"fuzzy_key_value": {
310 | 			input: "a:b~2 AND foo",
311 | 			want: expr.AND(
312 | 				expr.FUZZY(expr.Eq("a", "b"), 2),
313 | 				"foo",
314 | 			),
315 | 		},
316 | 		"fuzzy_key_value_default": {
317 | 			input: "a:b~ AND foo",
318 | 			want: expr.AND(
319 | 				expr.FUZZY(expr.Eq("a", "b"), 1),
320 | 				"foo",
321 | 			),
322 | 		},
323 | 		"fuzzy_key_implicit_distance_before_term": {
324 | 			input: "color:red~ k1:v1",
325 | 			want: expr.AND(
326 | 				expr.FUZZY(expr.Eq("color", "red"), 1),
327 | 				expr.Eq("k1", "v1"),
328 | 			),
329 | 		},
330 | 		"fuzzy_key_distance_before_term": {
331 | 			input: "color:red~2 k1:v1",
332 | 			want: expr.AND(
333 | 				expr.FUZZY(expr.Eq("color", "red"), 2),
334 | 				expr.Eq("k1", "v1"),
335 | 			),
336 | 		},
337 | 		"fuzzy_literal": {
338 | 			input: "foo~4",
339 | 			want:  expr.FUZZY("foo", 4),
340 | 		},
341 | 		"fuzzy_literal_default": {
342 | 			input: "foo~",
343 | 			want:  expr.FUZZY("foo", 1),
344 | 		},
345 | 		"fuzzy_literal_in_compound": {
346 | 			input: "a:b AND foo~4",
347 | 			want: expr.AND(
348 | 				expr.Eq("a", "b"),
349 | 				expr.FUZZY("foo", 4),
350 | 			),
351 | 		},
352 | 		"fuzzy_literal_in_implicit_compound": {
353 | 			input: "a:b foo~4",
354 | 			want: expr.AND(
355 | 				expr.Eq("a", "b"),
356 | 				expr.FUZZY("foo", 4),
357 | 			),
358 | 		},
359 | 		"fuzzy_literal_leading": {
360 | 			input: "foo~4 AND a:b",
361 | 			want: expr.AND(
362 | 				expr.FUZZY("foo", 4),
363 | 				expr.Eq("a", "b"),
364 | 			),
365 | 		},
366 | 		"fuzzy_literal_leading_in_implicit_compound": {
367 | 			input: "foo~4 AND a:b",
368 | 			want: expr.AND(
369 | 				expr.FUZZY("foo", 4),
370 | 				expr.Eq("a", "b"),
371 | 			),
372 | 		},
373 | 		"fuzzy_quoted_literal": {
374 | 			input: `"foo bar"~4 AND a:b`,
375 | 			want: expr.AND(
376 | 				expr.FUZZY(expr.Lit("foo bar"), 4),
377 | 				expr.Eq("a", "b"),
378 | 			),
379 | 		},
380 | 		"fuzzy_sub_expression": {
381 | 			input: "(title:foo OR title:bar)~2 AND (body:foo OR body:bar)",
382 | 			want: expr.AND(
383 | 				expr.FUZZY(
384 | 					expr.OR(
385 | 						expr.Eq("title", "foo"),
386 | 						expr.Eq("title", "bar"),
387 | 					),
388 | 					2),
389 | 				expr.OR(
390 | 					expr.Eq("body", "foo"),
391 | 					expr.Eq("body", "bar"),
392 | 				),
393 | 			),
394 | 		},
395 | 		"nested_sub_expressions_with_fuzzy": {
396 | 			input: "((title:foo)~ OR title:bar) AND (body:foo OR body:bar)",
397 | 			want: expr.AND(
398 | 				expr.OR(
399 | 					expr.FUZZY(expr.Eq("title", "foo"), 1),
400 | 					expr.Eq("title", "bar"),
401 | 				),
402 | 
403 | 				expr.OR(
404 | 					expr.Eq("body", "foo"),
405 | 					expr.Eq("body", "bar"),
406 | 				),
407 | 			),
408 | 		},
409 | 		"precedence_works": {
410 | 			input: "a:b AND c:d OR e:f OR h:i AND j:k",
411 | 			want: expr.OR(
412 | 				expr.OR(
413 | 					expr.AND(
414 | 						expr.Eq("a", "b"),
415 | 						expr.Eq("c", "d"),
416 | 					),
417 | 					expr.Eq("e", "f")),
418 | 				expr.AND(
419 | 					expr.Eq("h", "i"),
420 | 					expr.Eq("j", "k"),
421 | 				),
422 | 			),
423 | 		},
424 | 		"test_precedence_weaving": {
425 | 			input: "a OR b AND c OR d",
426 | 			want: expr.OR(
427 | 				expr.OR(
428 | 					"a",
429 | 					expr.AND("b", "c"),
430 | 				),
431 | 				"d",
432 | 			),
433 | 		},
434 | 		"test_precedence_weaving_with_not": {
435 | 			input: "NOT a OR b AND NOT c OR d",
436 | 			want: expr.OR(
437 | 				expr.OR(
438 | 					expr.NOT("a"),
439 | 					expr.AND("b", expr.NOT("c")),
440 | 				),
441 | 				"d",
442 | 			),
443 | 		},
444 | 		"test_equals_in_precedence": {
445 | 			input: "a:az OR b:bz AND NOT c:z OR d",
446 | 			want: expr.OR(
447 | 				expr.OR(
448 | 					expr.Eq("a", "az"),
449 | 					expr.AND(
450 | 						expr.Eq("b", "bz"),
451 | 						expr.NOT(
452 | 							expr.Eq("c", "z"),
453 | 						),
454 | 					),
455 | 				),
456 | 				"d",
457 | 			),
458 | 		},
459 | 		"test_parens_in_precedence": {
460 | 			input: "a AND (c OR d)",
461 | 			want: expr.AND(
462 | 				"a",
463 | 				expr.OR(
464 | 					"c",
465 | 					"d",
466 | 				),
467 | 			),
468 | 		},
469 | 		"test_range_precedence_simple": {
470 | 			input: "c:[* to -1] OR d",
471 | 			want: expr.OR(
472 | 				expr.Rang("c", expr.WILD("*"), -1, true),
473 | 				"d",
474 | 			),
475 | 		},
476 | 		"test_range_precedence": {
477 | 			input: "a OR b AND c:[* to -1] OR d",
478 | 			want: expr.OR(
479 | 				expr.OR(
480 | 					"a",
481 | 					expr.AND(
482 | 						"b",
483 | 						expr.Rang("c", expr.WILD("*"), -1, true),
484 | 					),
485 | 				),
486 | 				"d",
487 | 			),
488 | 		},
489 | 		"test_full_precedence": {
490 | 			input: "a OR b AND c:[* to -1] OR d AND NOT +e:f",
491 | 			want: expr.OR(
492 | 				expr.OR(
493 | 					"a",
494 | 					expr.AND(
495 | 						"b",
496 | 						expr.Rang("c", expr.WILD("*"), -1, true),
497 | 					),
498 | 				),
499 | 				expr.AND(
500 | 					"d",
501 | 					expr.NOT(
502 | 						expr.MUST(expr.Eq("e", "f")),
503 | 					),
504 | 				),
505 | 			),
506 | 		},
507 | 		"test_full_precedence_with_suffixes": {
508 | 			input: "a OR b AND c OR d~ AND NOT +(e:f)^10",
509 | 			want: expr.OR(
510 | 				expr.OR(
511 | 					"a",
512 | 					expr.AND("b", "c"),
513 | 				),
514 | 				expr.AND(
515 | 					expr.FUZZY("d", 1),
516 | 					expr.NOT(
517 | 						expr.BOOST(
518 | 							expr.MUST(
519 | 								expr.Eq("e", "f"),
520 | 							),
521 | 							10.0,
522 | 						),
523 | 					),
524 | 				),
525 | 			),
526 | 		},
527 | 		"test_elastic_greater_than_precedence": {
528 | 			input: "a:>10 AND -b:<=-20",
529 | 			want: expr.AND(
530 | 				expr.GREATER("a", 10),
531 | 				expr.MUSTNOT(
532 | 					expr.LESSEQ("b", -20),
533 | 				),
534 | 			),
535 | 		},
536 | 	}
537 | 
538 | 	for name, tc := range tcs {
539 | 		t.Run(name, func(t *testing.T) {
540 | 			got, err := Parse(tc.input)
541 | 			if err != nil {
542 | 				t.Fatalf("wanted no error, got: %v", err)
543 | 			}
544 | 			if !reflect.DeepEqual(tc.want, got) {
545 | 				t.Fatalf(errTemplate, "parsed expression doesn't match", tc.want, got)
546 | 			}
547 | 
548 | 			raw, err := json.Marshal(got)
549 | 			if err != nil {
550 | 				t.Fatalf("wanted no error marshalling to json, got: %s", err)
551 | 			}
552 | 
553 | 			var gotSerialized expr.Expression
554 | 			err = json.Unmarshal(raw, &gotSerialized)
555 | 			if err != nil {
556 | 				t.Fatalf("wanted no error unmarshalling from json, got: %s", err)
557 | 			}
558 | 
559 | 			if !reflect.DeepEqual(got, &gotSerialized) {
560 | 				// occasionally this test fails and the error message makes the test look like
561 | 				// the want and got are equivalent. This is almost always an unexported var is different
562 | 				// Using testify/require will show the error if it shows up
563 | 				// require.Equal(t, tc.want, gotSerialized)
564 | 				t.Fatalf(errTemplate, "roundtrip serialization is not stable", tc.want, gotSerialized)
565 | 			}
566 | 		})
567 | 	}
568 | }
569 | 
570 | func TestParseLuceneWithDefaultField(t *testing.T) {
571 | 	type tc struct {
572 | 		input        string
573 | 		defaultField string
574 | 		want         *expr.Expression
575 | 	}
576 | 
577 | 	tcs := map[string]tc{
578 | 		"single_literal": {
579 | 			input:        "a",
580 | 			defaultField: "foo",
581 | 			want:         expr.Eq("foo", "a"),
582 | 		},
583 | 		"quoted_literal": {
584 | 			input:        `"a"`,
585 | 			defaultField: "foo",
586 | 			want:         expr.Eq("foo", "a"),
587 | 		},
588 | 		"number_literal": {
589 | 			input:        `7`,
590 | 			defaultField: "foo",
591 | 			want:         expr.Eq("foo", 7),
592 | 		},
593 | 		"multiple_literals": {
594 | 			input:        "a b",
595 | 			defaultField: "foo",
596 | 			want:         expr.AND(expr.Eq("foo", "a"), expr.Eq("foo", "b")),
597 | 		},
598 | 		"basic_and": {
599 | 			input:        "a AND b",
600 | 			defaultField: "foo",
601 | 			want:         expr.AND(expr.Eq("foo", "a"), expr.Eq("foo", "b")),
602 | 		},
603 | 	}
604 | 
605 | 	for name, tc := range tcs {
606 | 		t.Run(name, func(t *testing.T) {
607 | 			got, err := Parse(tc.input, WithDefaultField(tc.defaultField))
608 | 			if err != nil {
609 | 				t.Fatalf("wanted no error, got: %v", err)
610 | 			}
611 | 			if !reflect.DeepEqual(tc.want, got) {
612 | 				t.Fatalf(errTemplate, "parsed expression doesn't match", tc.want, got)
613 | 			}
614 | 		})
615 | 	}
616 | }
617 | 
618 | func TestParseFailure(t *testing.T) {
619 | 	type tc struct {
620 | 		input string
621 | 	}
622 | 
623 | 	tcs := map[string]tc{
624 | 		"unpaired_paren": {
625 | 			input: "(a AND b",
626 | 		},
627 | 		"unbalanced_paren": {
628 | 			input: "(a AND b))",
629 | 		},
630 | 		"unbalanced_nested_paren": {
631 | 			input: "(a AND (b AND c)",
632 | 		},
633 | 		"equal_without_rhs": {
634 | 			input: "a = ",
635 | 		},
636 | 		"equal_without_lhs": {
637 | 			input: "= b",
638 | 		},
639 | 		"empty_parens_nil": {
640 | 			input: "() = ()",
641 | 		},
642 | 		"and_without_rhs": {
643 | 			input: "a AND",
644 | 		},
645 | 		"and_without_lhs": {
646 | 			input: "AND a",
647 | 		},
648 | 		"or_without_rhs": {
649 | 			input: "a OR",
650 | 		},
651 | 		"or_without_lhs": {
652 | 			input: "OR a",
653 | 		},
654 | 		"not_without_subexpression_1": {
655 | 			input: "NOT",
656 | 		},
657 | 		"not_without_subexpression_2": {
658 | 			input: "NOT()",
659 | 		},
660 | 		"must_without_subexpression_1": {
661 | 			input: "+",
662 | 		},
663 | 		"must_without_subexpression_2": {
664 | 			input: "+()",
665 | 		},
666 | 		"mustnot_without_subexpression_1": {
667 | 			input: "-",
668 | 		},
669 | 		"mustnot_without_subexpression_2": {
670 | 			input: "-()",
671 | 		},
672 | 		"boost_without_subexpression_1": {
673 | 			input: "^2",
674 | 		},
675 | 		"boost_without_subexpression_2": {
676 | 			input: "()^2",
677 | 		},
678 | 		"boost_before_colon_nonterminal": {
679 | 			input: "color:red^:k1",
680 | 		},
681 | 		"fuzzy_without_subexpression_1": {
682 | 			input: "~2",
683 | 		},
684 | 		"fuzzy_without_subexpression_2": {
685 | 			input: "()~2",
686 | 		},
687 | 		"fuzzy_without_subexpression_3": {
688 | 			input: "~",
689 | 		},
690 | 		"fuzzy_without_subexpression_4": {
691 | 			input: "()~",
692 | 		},
693 | 		"fuzzy_before_colon_nonterminal": {
694 | 			input: "color:red~:k1",
695 | 		},
696 | 		"range_without_min": {
697 | 			input: "[ TO 5]",
698 | 		},
699 | 		"range_without_max": {
700 | 			input: "[* TO ]",
701 | 		},
702 | 		"range_with_invalid_min": {
703 | 			input: "[(a OR b) TO *]",
704 | 		},
705 | 		"range_with_invalid_max": {
706 | 			input: "[* TO (a OR b)]",
707 | 		},
708 | 		"nested_range_missing_max": {
709 | 			input: "(A:B AND C:(D OR E)) OR (NOT(+a:[* TO]))",
710 | 		},
711 | 		"invalid_implicit": {
712 | 			input: "a: b:c",
713 | 		},
714 | 	}
715 | 
716 | 	for name, tc := range tcs {
717 | 		t.Run(name, func(t *testing.T) {
718 | 			_, err := Parse(tc.input)
719 | 			if err == nil {
720 | 				t.Fatalf("expected error but did not get one")
721 | 			}
722 | 		})
723 | 	}
724 | }
725 | 
726 | func FuzzParse(f *testing.F) {
727 | 	tcs := []string{
728 | 		"A:B AND C:D",
729 | 		"+foo OR (NOT(B))",
730 | 		"A:bar",
731 | 		"NOT(b:c)",
732 | 		"z:[* TO 10]",
733 | 		"x:[10 TO *] AND NOT(y:[1 TO 5]",
734 | 		"(+a:b -c:d) OR (z:[1 TO *] NOT(foo))",
735 | 		`+bbq:"woo yay"`,
736 | 		`-bbq:"woo"`,
737 | 		`(a:b)^10`,
738 | 		`a:foo~`,
739 | 	}
740 | 	for _, tc := range tcs {
741 | 		f.Add(tc)
742 | 	}
743 | 	f.Fuzz(func(t *testing.T, in string) {
744 | 		_, _ = Parse(in)
745 | 	})
746 | }
747 | 


--------------------------------------------------------------------------------
/postgresql_test.go:
--------------------------------------------------------------------------------
  1 | package lucene
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestPostgresSQLEndToEnd(t *testing.T) {
  9 | 	type tc struct {
 10 | 		input        string
 11 | 		want         string
 12 | 		defaultField string
 13 | 		err          string
 14 | 	}
 15 | 
 16 | 	tcs := map[string]tc{
 17 | 		// "single_literal": {
 18 | 		// 	input: "a",
 19 | 		// 	want:  `a`,
 20 | 		// },
 21 | 		"basic_equal": {
 22 | 			input: "a:b",
 23 | 			want:  `"a" = 'b'`,
 24 | 		},
 25 | 		"basic_equal_with_number": {
 26 | 			input: "a:5",
 27 | 			want:  `"a" = 5`,
 28 | 		},
 29 | 		"basic_greater_with_number": {
 30 | 			input: "a:>22",
 31 | 			want:  `"a" > 22`,
 32 | 		},
 33 | 		"basic_greater_eq_with_number": {
 34 | 			input: "a:>=22",
 35 | 			want:  `"a" >= 22`,
 36 | 		},
 37 | 		"basic_less_with_number": {
 38 | 			input: "a:<22",
 39 | 			want:  `"a" < 22`,
 40 | 		},
 41 | 		"basic_less_eq_with_number": {
 42 | 			input: "a:<=22",
 43 | 			want:  `"a" <= 22`,
 44 | 		},
 45 | 		"basic_greater_less_with_number": {
 46 | 			input: "a:<22 AND b:>33",
 47 | 			want:  `("a" < 22) AND ("b" > 33)`,
 48 | 		},
 49 | 		"basic_greater_less_eq_with_number": {
 50 | 			input: "a:<=22 AND b:>=33",
 51 | 			want:  `("a" <= 22) AND ("b" >= 33)`,
 52 | 		},
 53 | 		"basic_wild_equal_with_*": {
 54 | 			input: "a:b*",
 55 | 			want:  `"a" SIMILAR TO 'b%'`,
 56 | 		},
 57 | 		"basic_wild_equal_with_?": {
 58 | 			input: "a:b?z",
 59 | 			want:  `"a" SIMILAR TO 'b_z'`,
 60 | 		},
 61 | 		"basic_inclusive_range": {
 62 | 			input: "a:[* TO 5]",
 63 | 			want:  `"a" <= 5`,
 64 | 		},
 65 | 		"basic_exclusive_range": {
 66 | 			input: "a:{* TO 5}",
 67 | 			want:  `"a" < 5`,
 68 | 		},
 69 | 		"range_over_strings": {
 70 | 			input: "a:{foo TO bar}",
 71 | 			want:  `"a" BETWEEN 'foo' AND 'bar'`,
 72 | 		},
 73 | 		"basic_fuzzy": {
 74 | 			input: "b AND a~",
 75 | 			err:   "unable to render operator [FUZZY]",
 76 | 		},
 77 | 		"fuzzy_power": {
 78 | 			input: "b AND a~10",
 79 | 			err:   "unable to render operator [FUZZY]",
 80 | 		},
 81 | 		"basic_boost": {
 82 | 			input: "b AND a^",
 83 | 			err:   "unable to render operator [BOOST]",
 84 | 		},
 85 | 		"boost_power": {
 86 | 			input: "b AND a^10",
 87 | 			err:   "unable to render operator [BOOST]",
 88 | 		},
 89 | 		"regexp": {
 90 | 			input: "a:/b [c]/",
 91 | 			want:  `"a" ~ '/b [c]/'`,
 92 | 		},
 93 | 		"regexp_with_keywords": {
 94 | 			input: `a:/b "[c]/`,
 95 | 			want:  `"a" ~ '/b "[c]/'`,
 96 | 		},
 97 | 		"regexp_with_escaped_chars": {
 98 | 			input: `url:/example.com\/foo\/bar\/.*/`,
 99 | 			want:  `"url" ~ '/example.com\/foo\/bar\/.*/'`,
100 | 		},
101 | 		"basic_default_AND": {
102 | 			input: "a b",
103 | 			want:  `'a' AND 'b'`,
104 | 		},
105 | 		"default_to_AND_with_subexpressions": {
106 | 			input: "a:b c:d",
107 | 			want:  `("a" = 'b') AND ("c" = 'd')`,
108 | 		},
109 | 		"basic_and": {
110 | 			input: "a AND b",
111 | 			want:  `'a' AND 'b'`,
112 | 		},
113 | 		"and_with_nesting": {
114 | 			input: "a:foo AND b:bar",
115 | 			want:  `("a" = 'foo') AND ("b" = 'bar')`,
116 | 		},
117 | 		"basic_or": {
118 | 			input: "a OR b",
119 | 			want:  `'a' OR 'b'`,
120 | 		},
121 | 		"or_with_nesting": {
122 | 			input: "a:foo OR b:bar",
123 | 			want:  `("a" = 'foo') OR ("b" = 'bar')`,
124 | 		},
125 | 		"range_operator_inclusive": {
126 | 			input: "a:[1 TO 5]",
127 | 			want:  `"a" >= 1 AND "a" <= 5`,
128 | 		},
129 | 		"range_operator_inclusive_unbound": {
130 | 			input: `a:[* TO 200]`,
131 | 			want:  `"a" <= 200`,
132 | 		},
133 | 		"range_operator_exclusive": {
134 | 			input: `a:{"ab" TO "az"}`,
135 | 			want:  `"a" BETWEEN 'ab' AND 'az'`,
136 | 		},
137 | 		"range_operator_exclusive_unbound": {
138 | 			input: `a:{2 TO *}`,
139 | 			want:  `"a" > 2`,
140 | 		},
141 | 		"basic_not": {
142 | 			input: "NOT b",
143 | 			want:  `NOT('b')`,
144 | 		},
145 | 		"nested_not": {
146 | 			input: "a:foo OR NOT b:bar",
147 | 			want:  `("a" = 'foo') OR (NOT("b" = 'bar'))`,
148 | 		},
149 | 		"term_grouping": {
150 | 			input: "(a:foo OR b:bar) AND c:baz",
151 | 			want:  `(("a" = 'foo') OR ("b" = 'bar')) AND ("c" = 'baz')`,
152 | 		},
153 | 		"value_grouping": {
154 | 			input: "a:(foo OR baz OR bar)",
155 | 			want:  `"a" IN ('foo', 'baz', 'bar')`,
156 | 		},
157 | 		"basic_must": {
158 | 			input: "+a:b",
159 | 			want:  `"a" = 'b'`,
160 | 		},
161 | 		"basic_must_not": {
162 | 			input: "-a:b",
163 | 			want:  `NOT("a" = 'b')`,
164 | 		},
165 | 		"basic_nested_must_not": {
166 | 			input: "d:e AND (-a:b AND +f:e)",
167 | 			want:  `("d" = 'e') AND ((NOT("a" = 'b')) AND ("f" = 'e'))`,
168 | 		},
169 | 		"basic_escaping": {
170 | 			input: `a:\(1\+1\)\:2`,
171 | 			want:  `"a" = '(1+1):2'`,
172 | 		},
173 | 		"escaped_column_name": {
174 | 			input: `foo\ bar:b`,
175 | 			want:  `"foo bar" = 'b'`,
176 | 		},
177 | 		"boost_key_value": {
178 | 			input: "a:b^2 AND foo",
179 | 			err:   "unable to render operator [BOOST]",
180 | 		},
181 | 		"nested_sub_expressions": {
182 | 			input: "((title:foo OR title:bar) AND (body:foo OR body:bar)) OR k:v",
183 | 			want:  `((("title" = 'foo') OR ("title" = 'bar')) AND (("body" = 'foo') OR ("body" = 'bar'))) OR ("k" = 'v')`,
184 | 		},
185 | 		"fuzzy_key_value": {
186 | 			input: "a:b~2 AND foo",
187 | 			err:   "unable to render operator [FUZZY]",
188 | 		},
189 | 		"precedence_works": {
190 | 			input: "a:b AND c:d OR e:f OR h:i AND j:k",
191 | 			want:  `((("a" = 'b') AND ("c" = 'd')) OR ("e" = 'f')) OR (("h" = 'i') AND ("j" = 'k'))`,
192 | 		},
193 | 		"test_precedence_weaving": {
194 | 			input: "a OR b AND c OR d",
195 | 			want:  `('a' OR ('b' AND 'c')) OR 'd'`,
196 | 		},
197 | 		"test_precedence_weaving_with_not": {
198 | 			input: "NOT a OR b AND NOT c OR d",
199 | 			want:  `((NOT('a')) OR ('b' AND (NOT('c')))) OR 'd'`,
200 | 		},
201 | 		"test_equals_in_precedence": {
202 | 			input: "a:az OR b:bz AND NOT c:z OR d",
203 | 			want:  `(("a" = 'az') OR (("b" = 'bz') AND (NOT("c" = 'z')))) OR 'd'`,
204 | 		},
205 | 		"test_parens_in_precedence": {
206 | 			input: "a AND (c OR d)",
207 | 			want:  `'a' AND ('c' OR 'd')`,
208 | 		},
209 | 		"test_range_precedence_simple": {
210 | 			input: "c:[* to -1] OR d",
211 | 			want:  `("c" <= -1) OR 'd'`,
212 | 		},
213 | 		"test_range_precedence": {
214 | 			input: "a OR b AND c:[* to -1] OR d",
215 | 			want:  `('a' OR ('b' AND ("c" <= -1))) OR 'd'`,
216 | 		},
217 | 		"test_full_precedence": {
218 | 			input: "a OR b AND c:[* to -1] OR d AND NOT +e:f",
219 | 			want:  `('a' OR ('b' AND ("c" <= -1))) OR ('d' AND (NOT("e" = 'f')))`,
220 | 		},
221 | 		"test_elastic_greater_than_precedence": {
222 | 			input: "a:>10 AND -b:<=-20",
223 | 			want:  `("a" > 10) AND (NOT("b" <= -20))`,
224 | 		},
225 | 		"escape_quotes": {
226 | 			input: "a:'b'",
227 | 			want:  `"a" = '''b'''`,
228 | 		},
229 | 		"name_starts_with_number": {
230 | 			input: "1a:b",
231 | 			want:  `"1a" = 'b'`,
232 | 		},
233 | 		"default_field_and": {
234 | 			input:        `title:"The Right Way" AND go`,
235 | 			want:         `("title" = 'The Right Way') AND ("default" = 'go')`,
236 | 			defaultField: "default",
237 | 		},
238 | 		"default_field_or": {
239 | 			input:        `title:"The Right Way" OR go`,
240 | 			want:         `("title" = 'The Right Way') OR ("default" = 'go')`,
241 | 			defaultField: "default",
242 | 		},
243 | 		"default_field_not": {
244 | 			input:        `title:"The Right Way" AND NOT(go)`,
245 | 			want:         `("title" = 'The Right Way') AND (NOT("default" = 'go'))`,
246 | 			defaultField: "default",
247 | 		},
248 | 		"asterisk_in_literal_are_regular_expression": {
249 | 			input: `foo:*`,
250 | 			want:  `"foo" SIMILAR TO '%'`,
251 | 		},
252 | 	}
253 | 
254 | 	for name, tc := range tcs {
255 | 		t.Run(name, func(t *testing.T) {
256 | 			got, err := ToPostgres(tc.input, WithDefaultField(tc.defaultField))
257 | 			if err != nil {
258 | 				// if we got an expect error then we are fine
259 | 				if tc.err != "" && strings.Contains(err.Error(), tc.err) {
260 | 					return
261 | 				}
262 | 				t.Fatalf("unexpected error rendering expression: %v", err)
263 | 			}
264 | 
265 | 			if tc.err != "" {
266 | 				t.Fatalf("\nexpected error [%s]\ngot: %s", tc.err, got)
267 | 			}
268 | 
269 | 			if got != tc.want {
270 | 				expr, err := Parse(tc.input)
271 | 				if err != nil {
272 | 					t.Fatalf("unable to parse expression: %v", err)
273 | 				}
274 | 				t.Fatalf("\nwant %s\ngot  %s\nparsed expression: %#v\n", tc.want, got, expr)
275 | 			}
276 | 		})
277 | 	}
278 | }
279 | 
280 | func TestPostgresParameterizedSQLEndToEnd(t *testing.T) {
281 | 	type tc struct {
282 | 		input        string
283 | 		wantStr      string
284 | 		wantParams   []any
285 | 		defaultField string
286 | 		err          string
287 | 	}
288 | 
289 | 	tcs := map[string]tc{
290 | 		// "single_literal": {
291 | 		// 	input: "a",
292 | 		// 	want:  `a`,
293 | 		// },
294 | 		"basic_equal": {
295 | 			input:      "a:b",
296 | 			wantStr:    `"a" = $1`,
297 | 			wantParams: []any{"b"},
298 | 		},
299 | 		"basic_equal_with_number": {
300 | 			input:      "a:5",
301 | 			wantStr:    `"a" = $1`,
302 | 			wantParams: []any{5},
303 | 		},
304 | 		"basic_greater_with_number": {
305 | 			input:      "a:>22",
306 | 			wantStr:    `"a" > $1`,
307 | 			wantParams: []any{22},
308 | 		},
309 | 		"basic_greater_eq_with_number": {
310 | 			input:      "a:>=22",
311 | 			wantStr:    `"a" >= $1`,
312 | 			wantParams: []any{22},
313 | 		},
314 | 		"basic_less_with_number": {
315 | 			input:      "a:<22",
316 | 			wantStr:    `"a" < $1`,
317 | 			wantParams: []any{22},
318 | 		},
319 | 		"basic_less_eq_with_number": {
320 | 			input:      "a:<=22",
321 | 			wantStr:    `"a" <= $1`,
322 | 			wantParams: []any{22},
323 | 		},
324 | 		"basic_greater_less_with_number": {
325 | 			input:      "a:<22 AND b:>33",
326 | 			wantStr:    `("a" < $1) AND ("b" > $2)`,
327 | 			wantParams: []any{22, 33},
328 | 		},
329 | 		"basic_greater_less_eq_with_number": {
330 | 			input:      "a:<=22 AND b:>=33",
331 | 			wantStr:    `("a" <= $1) AND ("b" >= $2)`,
332 | 			wantParams: []any{22, 33},
333 | 		},
334 | 		"basic_wild_equal_with_*": {
335 | 			input:      "a:b*",
336 | 			wantStr:    `"a" SIMILAR TO $1`,
337 | 			wantParams: []any{"b%"},
338 | 		},
339 | 		"basic_wild_equal_with_?": {
340 | 			input:      "a:b?z",
341 | 			wantStr:    `"a" SIMILAR TO $1`,
342 | 			wantParams: []any{"b_z"},
343 | 		},
344 | 		"basic_inclusive_range": {
345 | 			input:      "a:[* TO 5]",
346 | 			wantStr:    `"a" <= $1`,
347 | 			wantParams: []any{5},
348 | 		},
349 | 		"basic_exclusive_range": {
350 | 			input:      "a:{* TO 5}",
351 | 			wantStr:    `"a" < $1`,
352 | 			wantParams: []any{5},
353 | 		},
354 | 		"range_over_strings": {
355 | 			input:      "a:{foo TO bar}",
356 | 			wantStr:    `"a" BETWEEN $1 AND $2`,
357 | 			wantParams: []any{"foo", "bar"},
358 | 		},
359 | 		"basic_fuzzy": {
360 | 			input: "b AND a~",
361 | 			err:   "unable to render operator [FUZZY]",
362 | 		},
363 | 		"fuzzy_power": {
364 | 			input: "b AND a~10",
365 | 			err:   "unable to render operator [FUZZY]",
366 | 		},
367 | 		"basic_boost": {
368 | 			input: "b AND a^",
369 | 			err:   "unable to render operator [BOOST]",
370 | 		},
371 | 		"boost_power": {
372 | 			input: "b AND a^10",
373 | 			err:   "unable to render operator [BOOST]",
374 | 		},
375 | 		"regexp": {
376 | 			input:      "a:/b [c]/",
377 | 			wantStr:    `"a" ~ $1`,
378 | 			wantParams: []any{"/b [c]/"},
379 | 		},
380 | 		"regexp_with_keywords": {
381 | 			input:      `a:/b "[c]/`,
382 | 			wantStr:    `"a" ~ $1`,
383 | 			wantParams: []any{`/b "[c]/`},
384 | 		},
385 | 		"regexp_with_escaped_chars": {
386 | 			input:      `url:/example.com\/foo\/bar\/.*/`,
387 | 			wantStr:    `"url" ~ $1`,
388 | 			wantParams: []any{`/example.com\/foo\/bar\/.*/`},
389 | 		},
390 | 		"basic_default_AND": {
391 | 			input:      "a b",
392 | 			wantStr:    `$1 AND $2`,
393 | 			wantParams: []any{"a", "b"},
394 | 		},
395 | 		"default_to_AND_with_subexpressions": {
396 | 			input:      "a:b c:d",
397 | 			wantStr:    `("a" = $1) AND ("c" = $2)`,
398 | 			wantParams: []any{"b", "d"},
399 | 		},
400 | 		"basic_and": {
401 | 			input:      "a AND b",
402 | 			wantStr:    `$1 AND $2`,
403 | 			wantParams: []any{"a", "b"},
404 | 		},
405 | 		"and_with_nesting": {
406 | 			input:      "a:foo AND b:bar",
407 | 			wantStr:    `("a" = $1) AND ("b" = $2)`,
408 | 			wantParams: []any{"foo", "bar"},
409 | 		},
410 | 		"basic_or": {
411 | 			input:      "a OR b",
412 | 			wantStr:    `$1 OR $2`,
413 | 			wantParams: []any{"a", "b"},
414 | 		},
415 | 		"or_with_nesting": {
416 | 			input:      "a:foo OR b:bar",
417 | 			wantStr:    `("a" = $1) OR ("b" = $2)`,
418 | 			wantParams: []any{"foo", "bar"},
419 | 		},
420 | 		"range_operator_inclusive": {
421 | 			input:      "a:[1 TO 5]",
422 | 			wantStr:    `"a" >= $1 AND "a" <= $2`,
423 | 			wantParams: []any{1, 5},
424 | 		},
425 | 		"range_operator_inclusive_unbound": {
426 | 			input:      `a:[* TO 200]`,
427 | 			wantStr:    `"a" <= $1`,
428 | 			wantParams: []any{200},
429 | 		},
430 | 		"range_operator_exclusive": {
431 | 			input:      `a:{"ab" TO "az"}`,
432 | 			wantStr:    `"a" BETWEEN $1 AND $2`,
433 | 			wantParams: []any{"ab", "az"},
434 | 		},
435 | 		"range_operator_exclusive_unbound": {
436 | 			input:      `a:{2 TO *}`,
437 | 			wantStr:    `"a" > $1`,
438 | 			wantParams: []any{2},
439 | 		},
440 | 		"basic_not": {
441 | 			input:      "NOT b",
442 | 			wantStr:    `NOT($1)`,
443 | 			wantParams: []any{"b"},
444 | 		},
445 | 		"nested_not": {
446 | 			input:      "a:foo OR NOT b:bar",
447 | 			wantStr:    `("a" = $1) OR (NOT("b" = $2))`,
448 | 			wantParams: []any{"foo", "bar"},
449 | 		},
450 | 		"term_grouping": {
451 | 			input:      "(a:foo OR b:bar) AND c:baz",
452 | 			wantStr:    `(("a" = $1) OR ("b" = $2)) AND ("c" = $3)`,
453 | 			wantParams: []any{"foo", "bar", "baz"},
454 | 		},
455 | 		"value_grouping": {
456 | 			input:      "a:(foo OR baz OR bar)",
457 | 			wantStr:    `"a" IN ($1, $2, $3)`,
458 | 			wantParams: []any{"foo", "baz", "bar"},
459 | 		},
460 | 		"basic_must": {
461 | 			input:      "+a:b",
462 | 			wantStr:    `"a" = $1`,
463 | 			wantParams: []any{"b"},
464 | 		},
465 | 		"basic_must_not": {
466 | 			input:      "-a:b",
467 | 			wantStr:    `NOT("a" = $1)`,
468 | 			wantParams: []any{"b"},
469 | 		},
470 | 		"basic_nested_must_not": {
471 | 			input:      "d:e AND (-a:b AND +f:e)",
472 | 			wantStr:    `("d" = $1) AND ((NOT("a" = $2)) AND ("f" = $3))`,
473 | 			wantParams: []any{"e", "b", "e"},
474 | 		},
475 | 		"basic_escaping": {
476 | 			input:      `a:\(1\+1\)\:2`,
477 | 			wantStr:    `"a" = $1`,
478 | 			wantParams: []any{"(1+1):2"},
479 | 		},
480 | 		"escaped_column_name": {
481 | 			input:      `foo\ bar:b`,
482 | 			wantStr:    `"foo bar" = $1`,
483 | 			wantParams: []any{"b"},
484 | 		},
485 | 		"boost_key_value": {
486 | 			input: "a:b^2 AND foo",
487 | 			err:   "unable to render operator [BOOST]",
488 | 		},
489 | 		"nested_sub_expressions": {
490 | 			input:      "((title:foo OR title:bar) AND (body:foo OR body:bar)) OR k:v",
491 | 			wantStr:    `((("title" = $1) OR ("title" = $2)) AND (("body" = $3) OR ("body" = $4))) OR ("k" = $5)`,
492 | 			wantParams: []any{"foo", "bar", "foo", "bar", "v"},
493 | 		},
494 | 		"fuzzy_key_value": {
495 | 			input: "a:b~2 AND foo",
496 | 			err:   "unable to render operator [FUZZY]",
497 | 		},
498 | 		"precedence_works": {
499 | 			input:      "a:b AND c:d OR e:f OR h:i AND j:k",
500 | 			wantStr:    `((("a" = $1) AND ("c" = $2)) OR ("e" = $3)) OR (("h" = $4) AND ("j" = $5))`,
501 | 			wantParams: []any{"b", "d", "f", "i", "k"},
502 | 		},
503 | 		"test_precedence_weaving": {
504 | 			input:      "a OR b AND c OR d",
505 | 			wantStr:    `($1 OR ($2 AND $3)) OR $4`,
506 | 			wantParams: []any{"a", "b", "c", "d"},
507 | 		},
508 | 		"test_precedence_weaving_with_not": {
509 | 			input:      "NOT a OR b AND NOT c OR d",
510 | 			wantStr:    `((NOT($1)) OR ($2 AND (NOT($3)))) OR $4`,
511 | 			wantParams: []any{"a", "b", "c", "d"},
512 | 		},
513 | 		"test_equals_in_precedence": {
514 | 			input:      "a:az OR b:bz AND NOT c:z OR d",
515 | 			wantStr:    `(("a" = $1) OR (("b" = $2) AND (NOT("c" = $3)))) OR $4`,
516 | 			wantParams: []any{"az", "bz", "z", "d"},
517 | 		},
518 | 		"test_parens_in_precedence": {
519 | 			input:      "a AND (c OR d)",
520 | 			wantStr:    `$1 AND ($2 OR $3)`,
521 | 			wantParams: []any{"a", "c", "d"},
522 | 		},
523 | 		"test_range_precedence_simple": {
524 | 			input:      "c:[* to -1] OR d",
525 | 			wantStr:    `("c" <= $1) OR $2`,
526 | 			wantParams: []any{-1, "d"},
527 | 		},
528 | 		"test_range_precedence": {
529 | 			input:      "a OR b AND c:[* to -1] OR d",
530 | 			wantStr:    `($1 OR ($2 AND ("c" <= $3))) OR $4`,
531 | 			wantParams: []any{"a", "b", -1, "d"},
532 | 		},
533 | 		"test_full_precedence": {
534 | 			input:      "a OR b AND c:[* to -1] OR d AND NOT +e:f",
535 | 			wantStr:    `($1 OR ($2 AND ("c" <= $3))) OR ($4 AND (NOT("e" = $5)))`,
536 | 			wantParams: []any{"a", "b", -1, "d", "f"},
537 | 		},
538 | 		"test_elastic_greater_than_precedence": {
539 | 			input:      "a:>10 AND -b:<=-20",
540 | 			wantStr:    `("a" > $1) AND (NOT("b" <= $2))`,
541 | 			wantParams: []any{10, -20},
542 | 		},
543 | 		"escape_quotes": {
544 | 			input:      "a:'b'",
545 | 			wantStr:    `"a" = $1`,
546 | 			wantParams: []any{"'b'"},
547 | 		},
548 | 		"name_starts_with_number": {
549 | 			input:      "1a:b",
550 | 			wantStr:    `"1a" = $1`,
551 | 			wantParams: []any{"b"},
552 | 		},
553 | 		"default_field_and": {
554 | 			input:        `title:"The Right Way" AND go`,
555 | 			wantStr:      `("title" = $1) AND ("default" = $2)`,
556 | 			wantParams:   []any{"The Right Way", "go"},
557 | 			defaultField: "default",
558 | 		},
559 | 		"default_field_or": {
560 | 			input:        `title:"The Right Way" OR go`,
561 | 			wantStr:      `("title" = $1) OR ("default" = $2)`,
562 | 			wantParams:   []any{"The Right Way", "go"},
563 | 			defaultField: "default",
564 | 		},
565 | 		"default_field_not": {
566 | 			input:        `title:"The Right Way" AND NOT(go)`,
567 | 			wantStr:      `("title" = $1) AND (NOT("default" = $2))`,
568 | 			wantParams:   []any{"The Right Way", "go"},
569 | 			defaultField: "default",
570 | 		},
571 | 		"default_bare_field": {
572 | 			input:        `this is an example`,
573 | 			wantStr:      `((("default" = $1) AND ("default" = $2)) AND ("default" = $3)) AND ("default" = $4)`,
574 | 			wantParams:   []any{"this", "is", "an", "example"},
575 | 			defaultField: "default",
576 | 		},
577 | 		"default_single_literal": {
578 | 			input:        `a`,
579 | 			wantStr:      `"default" = $1`,
580 | 			wantParams:   []any{"a"},
581 | 			defaultField: "default",
582 | 		},
583 | 		"question_marks_in_literal_are_regular_expression": {
584 | 			input:      `foo:abc?`,
585 | 			wantStr:    `"foo" SIMILAR TO $1`,
586 | 			wantParams: []any{"abc_"},
587 | 		},
588 | 		"start asterisk_in_literal_are_regular_expression": {
589 | 			input:      `foo:*`,
590 | 			wantStr:    `"foo" SIMILAR TO $1`,
591 | 			wantParams: []any{"%"},
592 | 		},
593 | 	}
594 | 
595 | 	for name, tc := range tcs {
596 | 		t.Run(name, func(t *testing.T) {
597 | 			gotStr, gotParams, err := ToParameterizedPostgres(tc.input, WithDefaultField(tc.defaultField))
598 | 			if err != nil {
599 | 				// if we got an expect error then we are fine
600 | 				if tc.err != "" && strings.Contains(err.Error(), tc.err) {
601 | 					return
602 | 				}
603 | 				t.Fatalf("unexpected error rendering expression: %v", err)
604 | 			}
605 | 
606 | 			if tc.err != "" {
607 | 				t.Fatalf("\nexpected error [%s]\ngot: %s", tc.err, gotStr)
608 | 			}
609 | 
610 | 			if gotStr != tc.wantStr {
611 | 				expr, err := Parse(tc.input)
612 | 				if err != nil {
613 | 					t.Fatalf("unable to parse expression: %v", err)
614 | 				}
615 | 				t.Fatalf("\nwant %s\ngot  %s\nparsed expression: %#v\n", tc.wantStr, gotStr, expr)
616 | 			}
617 | 
618 | 			if len(gotParams) != len(tc.wantParams) {
619 | 				t.Fatalf("expected %d params(%v), got %d (%v)", len(tc.wantParams), tc.wantParams, len(gotParams), gotParams)
620 | 			}
621 | 
622 | 			for i := range gotParams {
623 | 				if gotParams[i] != tc.wantParams[i] {
624 | 					t.Fatalf("expected param %d to be %v, got %v", i, tc.wantParams[i], gotParams[i])
625 | 				}
626 | 			}
627 | 		})
628 | 	}
629 | }
630 | 


--------------------------------------------------------------------------------