├── go.sum ├── go.work ├── go.mod ├── .gitignore ├── fuzz ├── README.md ├── go.mod ├── fuzz_test.go └── go.sum ├── .github ├── workflows │ ├── bump-release.yml │ └── create-release.yml └── bump.yml ├── render.go ├── release-process.md ├── cmd └── main.go ├── pkg ├── driver │ ├── postgresql.go │ ├── postgresql_test.go │ ├── base.go │ └── renderfn.go └── lucene │ ├── expr │ ├── operator.go │ ├── renderer.go │ ├── expression_test.go │ ├── validator.go │ └── expression.go │ └── reduce │ └── reduce.go ├── README.md ├── internal └── lex │ ├── lext_test.go │ └── lex.go ├── parse.go ├── LICENSE ├── parse_test.go └── postgresql_test.go /go.sum: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /go.work: -------------------------------------------------------------------------------- 1 | go 1.22 2 | 3 | use ( 4 | . 5 | ./fuzz 6 | ) 7 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/grindlemire/go-lucene 2 | 3 | go 1.22 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # vendor/ 15 | .vscode 16 | .cursor -------------------------------------------------------------------------------- /fuzz/README.md: -------------------------------------------------------------------------------- 1 | # What is this package? 2 | 3 | This package contains all the necessary code to fuzz test go-lucene. However it requires a few imports 4 | to do so and uses pg_query to validate the produced queries. Moving it to this directory allows the top level 5 | mod file to remain clean of dependencies while still allowing for the fuzz testing. -------------------------------------------------------------------------------- /fuzz/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/grindlemire/go-lucene/fuzz 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/grindlemire/go-lucene v0.0.14 7 | github.com/pganalyze/pg_query_go/v4 v4.2.3 8 | ) 9 | 10 | require ( 11 | github.com/golang/protobuf v1.4.2 // indirect 12 | google.golang.org/protobuf v1.23.0 // indirect 13 | ) 14 | 15 | // Always just use the local version of go-lucene 16 | replace github.com/grindlemire/go-lucene => ../ 17 | -------------------------------------------------------------------------------- /.github/workflows/bump-release.yml: -------------------------------------------------------------------------------- 1 | name: Bump Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: checkout repo 13 | uses: actions/checkout@v3 14 | 15 | - name: setup go 16 | uses: actions/setup-go@v3 17 | with: 18 | go-version: 'stable' 19 | 20 | - name: test 21 | run: go test -v ./... 22 | 23 | - name: fuzz test 24 | run: go test -v ./fuzz 25 | 26 | - uses: grindlemire/bump-release-action@master 27 | with: 28 | config_path: '.github/bump.yml' 29 | -------------------------------------------------------------------------------- /render.go: -------------------------------------------------------------------------------- 1 | package lucene 2 | 3 | import "github.com/grindlemire/go-lucene/pkg/driver" 4 | 5 | var ( 6 | postgres = driver.NewPostgresDriver() 7 | ) 8 | 9 | // ToPostgres is a wrapper that will render the lucene expression string as a postgres sql filter string. 10 | func ToPostgres(in string, opts ...Opt) (string, error) { 11 | e, err := Parse(in, opts...) 12 | if err != nil { 13 | return "", err 14 | } 15 | 16 | return postgres.Render(e) 17 | } 18 | 19 | // ToParameterizedPostgres is a wrapper that will render the lucene expression string as a postgres sql filter string with parameters. 20 | // The returned string will contain placeholders for the parameters that can be passed directly to a Query statement. 21 | func ToParameterizedPostgres(in string, opts ...Opt) (s string, params []any, err error) { 22 | e, err := Parse(in, opts...) 23 | if err != nil { 24 | return "", nil, err 25 | } 26 | 27 | return postgres.RenderParam(e) 28 | } 29 | -------------------------------------------------------------------------------- /release-process.md: -------------------------------------------------------------------------------- 1 | # Release Process 2 | 3 | ### Note this might be out of date, I have to figure out what is going on here 4 | 5 | ## Rules for release branches: 6 | 7 | - If you are releasing a new major version you need to branch off of master into a branch `release-branch.v#` (example `release-branch.v2` for a 2.x release) 8 | - If you are releasing a minor or patch update to an existing major release make sure to merge master into the release branch 9 | 10 | ## Rules for tagging and publishing the release 11 | 12 | When you are ready to publish the release make sure you... 13 | 14 | 1. Merge your changes into the correct release branch. 15 | 2. Check out the release branch locally (example: `git pull origin release-branch.v3`) 16 | 3. Create a new tag for the specific release version you will publish (example: `git tag v3.0.1`) 17 | 4. Push the tag up to github (example: `git push origin v3.0.1`) 18 | 5. Check that the github action successfully finished and created a release 19 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/grindlemire/go-lucene" 9 | "github.com/grindlemire/go-lucene/pkg/driver" 10 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 11 | ) 12 | 13 | func main() { 14 | if len(os.Args) < 2 { 15 | fmt.Printf("Please provide a lucene query\n") 16 | os.Exit(1) 17 | } 18 | 19 | e, err := lucene.Parse(os.Args[1]) 20 | if err != nil { 21 | fmt.Printf("Error parsing: %s\n", err) 22 | os.Exit(1) 23 | } 24 | 25 | fmt.Printf("Parsed input: %s\n", e) 26 | fmt.Printf("Verbose input: %#v\n", e) 27 | 28 | s, err := json.MarshalIndent(e, "", " ") 29 | if err != nil { 30 | fmt.Printf("Error marshalling to json: %s\n", err) 31 | os.Exit(1) 32 | } 33 | 34 | fmt.Printf("\n%s\n", s) 35 | 36 | var e1 expr.Expression 37 | err = json.Unmarshal(s, &e1) 38 | if err != nil { 39 | fmt.Printf("Error unmarshalling from json: %s\n", err) 40 | os.Exit(1) 41 | } 42 | 43 | sq, err := driver.NewPostgresDriver().Render(e) 44 | if err != nil { 45 | fmt.Printf("Error rendering sql: %s\n", err) 46 | os.Exit(1) 47 | } 48 | 49 | fmt.Printf("Reparsed input: %v\n", e1) 50 | fmt.Printf("Verbose input: %#v\n", e1) 51 | fmt.Printf("SQL output: %s\n", sq) 52 | } 53 | -------------------------------------------------------------------------------- /pkg/driver/postgresql.go: -------------------------------------------------------------------------------- 1 | package driver 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 8 | ) 9 | 10 | // PostgresDriver transforms a parsed lucene expression to a postgres sql filter. 11 | type PostgresDriver struct { 12 | Base 13 | } 14 | 15 | // NewPostgresDriver creates a new driver that will output postgres filter strings from parsed lucene expressions. 16 | func NewPostgresDriver() PostgresDriver { 17 | fns := map[expr.Operator]RenderFN{ 18 | expr.Literal: literal, 19 | } 20 | 21 | for op, sharedFN := range Shared { 22 | _, found := fns[op] 23 | if !found { 24 | fns[op] = sharedFN 25 | } 26 | } 27 | 28 | return PostgresDriver{ 29 | Base{ 30 | RenderFNs: fns, 31 | }, 32 | } 33 | } 34 | 35 | // RenderParam will render the expression into a parameterized query using PostgreSQL's $N placeholder format. 36 | // The returned string will contain $1, $2, $3, etc. placeholders and the params will contain the values 37 | // that should be passed to the query. 38 | func (p PostgresDriver) RenderParam(e *expr.Expression) (s string, params []any, err error) { 39 | // First, use the base implementation to get the result with ? placeholders 40 | str, params, err := p.Base.RenderParam(e) 41 | if err != nil { 42 | return s, params, err 43 | } 44 | 45 | // Then convert ? placeholders to $N format 46 | paramIndex := 1 47 | result := strings.Builder{} 48 | i := 0 49 | for i < len(str) { 50 | if str[i] == '?' { 51 | result.WriteString(fmt.Sprintf("$%d", paramIndex)) 52 | paramIndex++ 53 | } else { 54 | result.WriteByte(str[i]) 55 | } 56 | i++ 57 | } 58 | 59 | return result.String(), params, nil 60 | } 61 | -------------------------------------------------------------------------------- /.github/workflows/create-release.yml: -------------------------------------------------------------------------------- 1 | name: Create Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: checkout repo 13 | uses: actions/checkout@v3 14 | 15 | - name: setup go 16 | uses: actions/setup-go@v3 17 | with: 18 | go-version: 'stable' 19 | 20 | - name: set tagname 21 | id: tag 22 | run: echo "version=$(echo $GITHUB_REF | cut -d / -f 3)" >> $GITHUB_OUTPUT 23 | 24 | - name: set major version name 25 | id: major_version 26 | run: echo "version=$(echo $GITHUB_REF | cut -d / -f 3 | cut -d . -f 1)" >> $GITHUB_OUTPUT 27 | 28 | - name: test 29 | run: go test -v ./... 30 | 31 | - name: fuzz test 32 | run: go test -v ./fuzz 33 | 34 | - name: release 35 | uses: softprops/action-gh-release@v1 36 | with: 37 | tag_name: ${{ steps.tag.outputs.version }} 38 | 39 | - name: create release branch 40 | uses: peterjgrainger/action-create-branch@v2.2.0 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 43 | with: 44 | branch: release-branch.${{ steps.major_version.outputs.version }} 45 | 46 | - name: merge to release branch 47 | uses: tukasz/direct-merge-action@v2.0.2 48 | with: 49 | source-branch: main 50 | target-branch: release-branch.${{ steps.major_version.outputs.version }} 51 | commit-message: Automatic merge from main for release ${{ steps.tag.outputs.version }} 52 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 53 | -------------------------------------------------------------------------------- /pkg/lucene/expr/operator.go: -------------------------------------------------------------------------------- 1 | package expr 2 | 3 | // Operator is an enum over the different valid lucene operations 4 | type Operator int 5 | 6 | // operations that can be used 7 | // To add a new operator, do the following: 8 | // 1. Add it to the iota here 9 | // 2. Add it to the string maps below 10 | // 3. Add a render function for it at least in base, perhaps in all the drivers as well 11 | // 4. Update the json parsing and tests to support the new operator 12 | // 5. Add tests in parse_test and expression_test 13 | const ( 14 | Undefined Operator = iota 15 | And 16 | Or 17 | Equals 18 | Like 19 | Not 20 | Range 21 | Must 22 | MustNot 23 | Boost 24 | Fuzzy 25 | Literal 26 | Wild 27 | Regexp 28 | Greater 29 | Less 30 | GreaterEq 31 | LessEq 32 | In 33 | List 34 | ) 35 | 36 | // String renders the operator as a string 37 | func (o Operator) String() string { 38 | return toString[o] 39 | } 40 | 41 | var fromString = map[string]Operator{ 42 | "AND": And, 43 | "OR": Or, 44 | "EQUALS": Equals, 45 | "LIKE": Like, 46 | "NOT": Not, 47 | "RANGE": Range, 48 | "MUST": Must, 49 | "MUST_NOT": MustNot, 50 | "BOOST": Boost, 51 | "FUZZY": Fuzzy, 52 | "LITERAL": Literal, 53 | "WILD": Wild, 54 | "REGEXP": Regexp, 55 | "GREATER": Greater, 56 | "LESS": Less, 57 | "GREATER_EQ": GreaterEq, 58 | "LESS_EQ": LessEq, 59 | "IN": In, 60 | "LIST": List, 61 | } 62 | 63 | var toString = map[Operator]string{ 64 | And: "AND", 65 | Or: "OR", 66 | Equals: "EQUALS", 67 | Like: "LIKE", 68 | Not: "NOT", 69 | Range: "RANGE", 70 | Must: "MUST", 71 | MustNot: "MUST_NOT", 72 | Boost: "BOOST", 73 | Fuzzy: "FUZZY", 74 | Literal: "LITERAL", 75 | Wild: "WILD", 76 | Regexp: "REGEXP", 77 | Greater: "GREATER", 78 | Less: "LESS", 79 | GreaterEq: "GREATER_EQ", 80 | LessEq: "LESS_EQ", 81 | In: "IN", 82 | List: "LIST", 83 | } 84 | -------------------------------------------------------------------------------- /.github/bump.yml: -------------------------------------------------------------------------------- 1 | release: 2 | title-prefix: 'v' 3 | initial-version: '0.0.1' 4 | tag-prefix: 'v' 5 | commit-note-replacers: 6 | - replace-prefix: 'breaking: ' 7 | new-prefix: '' 8 | - replace-prefix: 'feature: ' 9 | new-prefix: '' 10 | - replace-prefix: 'change: ' 11 | new-prefix: '' 12 | - replace-prefix: 'fix: ' 13 | new-prefix: '' 14 | - replace-prefix: 'document: ' 15 | new-prefix: '' 16 | - replace-prefix: 'dependency: ' 17 | new-prefix: '' 18 | branch: 19 | base-branch: main 20 | version-branch-prefix: 'v' 21 | bump-version-commit-prefix: 'v' 22 | categories: 23 | - title: 'Breaking Changes!' 24 | labels: 25 | - 'BreakingChange' 26 | commits: 27 | - 'breaking:' 28 | changes-prefix: ':warning: ' 29 | 30 | - title: 'Changes' 31 | labels: 32 | - 'Feature' 33 | commits: 34 | - 'feature:' 35 | changes-prefix: ':gift: ' 36 | 37 | - title: 'Changes' 38 | labels: 39 | - Maintenance 40 | commits: 41 | - 'change:' 42 | changes-prefix: ':hammer: ' 43 | 44 | - title: 'Bug Fixes' 45 | labels: 46 | - 'Bug' 47 | commits: 48 | - 'fix:' 49 | changes-prefix: ':ambulance: ' 50 | 51 | - title: 'Changes' 52 | labels: 53 | - 'Documentation' 54 | commits: 55 | - 'document:' 56 | changes-prefix: ':blue_book: ' 57 | 58 | - title: 'Dependency Updates' 59 | labels: 60 | - 'Dependencies' 61 | skip-label: 'Development' 62 | commits: 63 | - 'dependency:' 64 | changes-prefix: ':green_book: ' 65 | bump: 66 | default: 'patch' 67 | major: 68 | labels: 69 | - 'BreakingChange' 70 | commits: 71 | - 'breaking:' 72 | minor: 73 | labels: 74 | - 'Feature' 75 | commits: 76 | - 'feature:' 77 | -------------------------------------------------------------------------------- /fuzz/fuzz_test.go: -------------------------------------------------------------------------------- 1 | package fuzz 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/grindlemire/go-lucene" 8 | "github.com/grindlemire/go-lucene/pkg/driver" 9 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 10 | pg_query "github.com/pganalyze/pg_query_go/v4" 11 | ) 12 | 13 | func FuzzPostgresDriver(f *testing.F) { 14 | tcs := []string{ 15 | "A:B AND C:D", 16 | "+foo OR (NOT(B))", 17 | "A:bar", 18 | "NOT(b:c)", 19 | "z:[* TO 10]", 20 | "x:[10 TO *] AND NOT(y:[1 TO 5]", 21 | "(+a:b -c:d) OR (z:[1 TO *] NOT(foo))", 22 | `+bbq:"woo yay"`, 23 | `-bbq:"woo"`, 24 | `(a:b)^10`, 25 | `a:foo~`, 26 | } 27 | for _, tc := range tcs { 28 | f.Add(tc) 29 | } 30 | 31 | f.Fuzz(func(t *testing.T, in string) { 32 | e, err := lucene.Parse(in) 33 | if err != nil { 34 | // Ignore invalid expressions. 35 | return 36 | } 37 | 38 | validateRender(t, e) 39 | 40 | // Test the default field option. 41 | e, err = lucene.Parse(in, lucene.WithDefaultField("default")) 42 | if err != nil { 43 | // Ignore invalid expressions. 44 | return 45 | } 46 | 47 | validateRender(t, e) 48 | }) 49 | } 50 | 51 | func validateRender(t *testing.T, e *expr.Expression) { 52 | f, err := driver.NewPostgresDriver().Render(e) 53 | if err != nil { 54 | // Ignore errors that are expected. 55 | if strings.Contains(err.Error(), "unable to render operator") || 56 | strings.Contains(err.Error(), "literal contains invalid utf8") || 57 | strings.Contains(err.Error(), "literal contains null byte") || 58 | strings.Contains(err.Error(), "column name contains a double quote") || 59 | strings.Contains(err.Error(), "column name is empty") || 60 | strings.Contains(err.Error(), "the BETWEEN operator needs a two item list in the right hand side") { 61 | return 62 | } 63 | 64 | t.Fatal(err) 65 | } 66 | 67 | j, err := pg_query.ParseToJSON("SELECT * FROM test WHERE a = b AND (" + f + ")") 68 | if err != nil { 69 | t.Fatal(err) 70 | } 71 | 72 | if strings.Contains(j, "CommentStmt") { 73 | t.Fatal("CommentStmt found") 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /fuzz/go.sum: -------------------------------------------------------------------------------- 1 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 2 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 3 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 4 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 5 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 6 | github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= 7 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 8 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 9 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 10 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 11 | github.com/google/go-cmp v0.5.1 h1:JFrFEBb2xKufg6XkJsJr+WbKb4FQlURi5RUcBveYu9k= 12 | github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 13 | github.com/grindlemire/go-lucene v0.0.14 h1:sYR1mxzvWlaQ1GTYbV6EFgswIyPQMweR2dVmUjV/pc8= 14 | github.com/grindlemire/go-lucene v0.0.14/go.mod h1:pJrIjVA07GCtlDuWTodRGcLOAiQDyWZfhHQY8DBT4jk= 15 | github.com/pganalyze/pg_query_go/v4 v4.2.3 h1:cNLqyiVMasV7YGWyYV+fkXyHp32gDfXVNCqoHztEGNk= 16 | github.com/pganalyze/pg_query_go/v4 v4.2.3/go.mod h1:aEkDNOXNM5j0YGzaAapwJ7LB3dLNj+bvbWcLv1hOVqA= 17 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 18 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 19 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 20 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 21 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 22 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 23 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 24 | google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= 25 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 26 | -------------------------------------------------------------------------------- /pkg/driver/postgresql_test.go: -------------------------------------------------------------------------------- 1 | package driver 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 7 | ) 8 | 9 | const errTemplate = "%s:\n wanted %s\n got %s" 10 | 11 | func TestSQLDriver(t *testing.T) { 12 | type tc struct { 13 | input *expr.Expression 14 | want string 15 | } 16 | 17 | tcs := map[string]tc{ 18 | "simple_equals": { 19 | input: expr.Eq("a", 5), 20 | want: `"a" = 5`, 21 | }, 22 | "simple_and": { 23 | input: expr.AND(expr.Eq("a", 5), expr.Eq("b", "foo")), 24 | want: `("a" = 5) AND ("b" = 'foo')`, 25 | }, 26 | "simple_or": { 27 | input: expr.OR(expr.Eq("a", 5), expr.Eq("b", "foo")), 28 | want: `("a" = 5) OR ("b" = 'foo')`, 29 | }, 30 | "simple_not": { 31 | input: expr.NOT(expr.Eq("a", 1)), 32 | want: `NOT("a" = 1)`, 33 | }, 34 | "simple_like": { 35 | input: expr.LIKE("a", "%(b|d)%"), 36 | want: `"a" SIMILAR TO '%(b|d)%'`, 37 | }, 38 | "string_range": { 39 | input: expr.Rang("a", "foo", "bar", true), 40 | want: `"a" BETWEEN 'foo' AND 'bar'`, 41 | }, 42 | "mixed_number_range": { 43 | input: expr.Rang("a", 1.1, 10, true), 44 | want: `"a" >= 1.10 AND "a" <= 10.00`, 45 | }, 46 | "mixed_number_range_exlusive": { 47 | input: expr.Rang("a", 1, 10.1, false), 48 | want: `"a" > 1.00 AND "a" < 10.10`, 49 | }, 50 | "int_range": { 51 | input: expr.Rang("a", 1, 10, true), 52 | want: `"a" >= 1 AND "a" <= 10`, 53 | }, 54 | "int_range_exlusive": { 55 | input: expr.Rang("a", 1, 10, false), 56 | want: `"a" > 1 AND "a" < 10`, 57 | }, 58 | "float_range": { 59 | input: expr.Rang("a", 1.0, 10.0, true), 60 | want: `"a" >= 1 AND "a" <= 10`, 61 | }, 62 | "float_range_exlusive": { 63 | input: expr.Rang("a", 1.0, 10.0, false), 64 | want: `"a" > 1 AND "a" < 10`, 65 | }, 66 | "lt_range": { 67 | input: expr.Rang("a", "*", 10, false), 68 | want: `"a" < 10`, 69 | }, 70 | "lte_range": { 71 | input: expr.Rang("a", "*", 10, true), 72 | want: `"a" <= 10`, 73 | }, 74 | "gt_range": { 75 | input: expr.Rang("a", 1, "*", false), 76 | want: `"a" > 1`, 77 | }, 78 | "gte_range": { 79 | input: expr.Rang("a", 1, "*", true), 80 | want: `"a" >= 1`, 81 | }, 82 | "lt": { 83 | input: expr.LESS("a", 10), 84 | want: `"a" < 10`, 85 | }, 86 | "lte": { 87 | input: expr.LESSEQ("a", 10), 88 | want: `"a" <= 10`, 89 | }, 90 | "gt": { 91 | input: expr.GREATER("a", 10), 92 | want: `"a" > 10`, 93 | }, 94 | "gte": { 95 | input: expr.GREATEREQ("a", 10), 96 | want: `"a" >= 10`, 97 | }, 98 | "must_ignored": { 99 | input: expr.MUST(expr.Eq("a", 1)), 100 | want: `"a" = 1`, 101 | }, 102 | "nested_filter": { 103 | input: expr.Expr( 104 | expr.Expr( 105 | expr.Expr( 106 | "a", 107 | expr.Equals, 108 | "foo", 109 | ), 110 | expr.Or, 111 | expr.Expr( 112 | "b", 113 | expr.Equals, 114 | expr.REGEXP("/b*ar/"), 115 | ), 116 | ), 117 | expr.And, 118 | expr.Expr( 119 | expr.Rang("c", "aaa", "*", false), 120 | expr.Not, 121 | ), 122 | ), 123 | want: `(("a" = 'foo') OR ("b" ~ '/b*ar/')) AND (NOT("c" BETWEEN 'aaa' AND '*'))`, 124 | }, 125 | "space_in_fieldname": { 126 | input: expr.Eq("a b", 1), 127 | want: `"a b" = 1`, 128 | }, 129 | "equals_in_equals": { 130 | input: expr.Eq("a", expr.Eq("b", 1)), 131 | want: `"a" = ("b" = 1)`, 132 | }, 133 | "regexp": { 134 | input: expr.REGEXP("/b*ar/"), 135 | want: `'/b*ar/'`, 136 | }, 137 | } 138 | 139 | for name, tc := range tcs { 140 | t.Run(name, func(t *testing.T) { 141 | got, err := NewPostgresDriver().Render(tc.input) 142 | if err != nil { 143 | t.Fatalf("got an unexpected error when rendering: %v", err) 144 | } 145 | 146 | if tc.want != got { 147 | t.Fatalf(errTemplate, "generated sql does not match", tc.want, got) 148 | } 149 | }) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /pkg/lucene/expr/renderer.go: -------------------------------------------------------------------------------- 1 | package expr 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | type renderer func(e *Expression, verbose bool) string 9 | 10 | var renderers = map[Operator]renderer{ 11 | Equals: renderEquals, 12 | And: renderBasic, 13 | Or: renderBasic, 14 | Not: renderWrapper, 15 | Range: renderRange, 16 | Must: renderMust, 17 | MustNot: renderMustNot, 18 | Boost: renderBoost, 19 | Fuzzy: renderFuzzy, 20 | Literal: renderLiteral, 21 | Wild: renderLiteral, 22 | Regexp: renderLiteral, 23 | Greater: renderBasic, 24 | Less: renderBasic, 25 | GreaterEq: renderBasic, 26 | LessEq: renderBasic, 27 | Like: renderBasic, 28 | In: renderBasic, 29 | List: renderList, 30 | } 31 | 32 | func renderEquals(e *Expression, verbose bool) string { 33 | if verbose { 34 | return fmt.Sprintf("%#v:%#v", e.Left, e.Right) 35 | } 36 | return fmt.Sprintf("%s:%s", e.Left, e.Right) 37 | } 38 | 39 | func renderBasic(e *Expression, verbose bool) string { 40 | if verbose { 41 | return fmt.Sprintf("(%#v) %s (%#v)", e.Left, toString[e.Op], e.Right) 42 | } 43 | return fmt.Sprintf("%s %s %s", e.Left, toString[e.Op], e.Right) 44 | } 45 | 46 | func renderWrapper(e *Expression, verbose bool) string { 47 | if verbose { 48 | return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left) 49 | } 50 | return fmt.Sprintf("%s(%s)", toString[e.Op], e.Left) 51 | } 52 | 53 | func renderMustNot(e *Expression, verbose bool) string { 54 | if verbose { 55 | return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left) 56 | } 57 | return fmt.Sprintf("-%s", e.Left) 58 | } 59 | 60 | func renderMust(e *Expression, verbose bool) string { 61 | if verbose { 62 | return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left) 63 | } 64 | return fmt.Sprintf("+%s", e.Left) 65 | } 66 | 67 | func renderBoost(e *Expression, verbose bool) string { 68 | if verbose { 69 | if e.boostPower > 1 { 70 | return fmt.Sprintf("%s(%#v^%.1f)", toString[e.Op], e.Left, e.boostPower) 71 | } 72 | 73 | return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left) 74 | } 75 | 76 | if e.boostPower > 1 { 77 | return fmt.Sprintf("%s^%.1f", e.Left, e.boostPower) 78 | } 79 | 80 | return fmt.Sprintf("%s^", e.Left) 81 | } 82 | 83 | func renderFuzzy(e *Expression, verbose bool) string { 84 | if verbose { 85 | if e.fuzzyDistance > 1 { 86 | return fmt.Sprintf("%s(%#v~%d)", toString[e.Op], e.Left, e.fuzzyDistance) 87 | } 88 | 89 | return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left) 90 | } 91 | 92 | if e.fuzzyDistance > 1 { 93 | return fmt.Sprintf("%s~%d", e.Left, e.fuzzyDistance) 94 | } 95 | 96 | return fmt.Sprintf("%s~", e.Left) 97 | } 98 | 99 | func renderRange(e *Expression, verbose bool) string { 100 | boundary := e.Right.(*RangeBoundary) 101 | if verbose { 102 | if boundary.Inclusive { 103 | return fmt.Sprintf("%#v:[%#v TO %#v]", e.Left, boundary.Min, boundary.Max) 104 | } 105 | 106 | return fmt.Sprintf("%#v:{%#v TO %#v}", e.Left, boundary.Min, boundary.Max) 107 | } 108 | if boundary.Inclusive { 109 | return fmt.Sprintf("%s:[%s TO %s]", e.Left, boundary.Min, boundary.Max) 110 | } 111 | 112 | return fmt.Sprintf("%s:{%s TO %s}", e.Left, boundary.Min, boundary.Max) 113 | } 114 | 115 | func renderList(e *Expression, verbose bool) string { 116 | vals := e.Left.([]*Expression) 117 | strs := []string{} 118 | for _, v := range vals { 119 | if verbose { 120 | strs = append(strs, fmt.Sprintf("%#v", v.Left)) 121 | continue 122 | } 123 | strs = append(strs, fmt.Sprintf("%s", v.Left)) 124 | } 125 | 126 | if verbose { 127 | return fmt.Sprintf("LIST(%s)", strings.Join(strs, ", ")) 128 | } 129 | 130 | return fmt.Sprintf("(%s)", strings.Join(strs, ", ")) 131 | } 132 | 133 | func renderLiteral(e *Expression, verbose bool) string { 134 | if verbose { 135 | return fmt.Sprintf("%s(%#v)", toString[e.Op], e.Left) 136 | } 137 | 138 | s, isStr := e.Left.(string) 139 | if isStr && strings.ContainsAny(s, " ") { 140 | return fmt.Sprintf(`"%s"`, s) 141 | } 142 | 143 | return fmt.Sprintf("%v", e.Left) 144 | } 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-lucene 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/grindlemire/go-lucene.svg)](https://pkg.go.dev/github.com/grindlemire/go-lucene) 4 | 5 | A zero-dependency Lucene query parser for Go that converts Lucene syntax into SQL queries. 6 | 7 | ## Features 8 | 9 | - Full Lucene syntax support (compatible with [Apache Lucene 9.4.2](https://lucene.apache.org/core/9_4_2/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package.description)) 10 | - SQL injection safe with parameterized queries 11 | - Zero dependencies 12 | - Extensible with custom SQL drivers 13 | - PostgreSQL support out of the box 14 | 15 | ## Installation 16 | 17 | ```bash 18 | go get github.com/grindlemire/go-lucene 19 | ``` 20 | 21 | ## Basic Usage 22 | 23 | ```go 24 | query := `name:"John Doe" AND age:[25 TO 35]` 25 | filter, err := lucene.ToPostgres(query) 26 | // Result: (("name" = 'John Doe') AND ("age" >= 25 AND "age" <= 35)) 27 | ``` 28 | 29 | 30 | ## API Methods 31 | 32 | ### Direct SQL Generation 33 | ```go 34 | filter, err := lucene.ToPostgres(query) 35 | ``` 36 | 37 | ### Parameterized Queries (Recommended) 38 | ```go 39 | filter, params, err := lucene.ToParameterizedPostgres(query) 40 | db.Query(sql, params...) 41 | ``` 42 | 43 | ### Default Fields 44 | ```go 45 | filter, err := lucene.ToPostgres("red OR green", lucene.WithDefaultField("color")) 46 | // Result: ("color" = 'red') OR ("color" = 'green') 47 | ``` 48 | 49 | ## Lucene to SQL Operator Mapping 50 | 51 | | Lucene Query | SQL Output | Description | 52 | |--------------|------------|-------------| 53 | | `field:value` | `"field" = 'value'` | Exact match | 54 | | `field:"phrase with spaces"` | `"field" = 'phrase with spaces'` | Quoted phrase | 55 | | `field1:value1 AND field2:value2` | `("field1" = 'value1') AND ("field2" = 'value2')` | Boolean AND | 56 | | `field1:value1 OR field2:value2` | `("field1" = 'value1') OR ("field2" = 'value2')` | Boolean OR | 57 | | `NOT field:value` | `NOT("field" = 'value')` | Boolean NOT | 58 | | `+field:value` | `"field" = 'value'` | Required (equivalent to no operator) | 59 | | `-field:value` | `NOT("field" = 'value')` | Prohibited (equivalent to NOT) | 60 | | `field:[min TO max]` | `"field" >= min AND "field" <= max` | Inclusive range | 61 | | `field:{min TO max}` | `"field" BETWEEN 'min' AND 'max'` (strings) or `"field" > min AND "field" < max` (numbers) | Exclusive range | 62 | | `field:[min TO *]` | `"field" >= min` | Open-ended range (min to infinity) | 63 | | `field:[* TO max]` | `"field" <= max` | Open-ended range (negative infinity to max) | 64 | | `field:*` | `"field" SIMILAR TO '%'` | Wildcard match (matches anything) | 65 | | `field:pattern*` | `"field" SIMILAR TO 'pattern%'` | Wildcard suffix | 66 | | `field:pattern?` | `"field" SIMILAR TO 'pattern_'` | Single character wildcard | 67 | | `field:/regex/` | `"field" ~ '/regex/'` | Regular expression match | 68 | | `(field1:value1 OR field2:value2) AND field3:value3` | `(("field1" = 'value1') OR ("field2" = 'value2')) AND ("field3" = 'value3')` | Grouping | 69 | 70 | ## Examples 71 | 72 | ### Complex Query 73 | ```go 74 | query := `name:"John Doe" AND age:[25 TO 35] AND NOT status:inactive` 75 | // SQL: (("name" = 'John Doe') AND ("age" >= 25 AND "age" <= 35)) AND (NOT("status" = 'inactive')) 76 | ``` 77 | 78 | ### Parameterized Output 79 | ```go 80 | filter, params, err := lucene.ToParameterizedPostgres(`color:red AND type:"gala"`) 81 | // SQL: ("color" = $1) AND ("type" = $2) 82 | // Params: ["red", "gala"] 83 | ``` 84 | 85 | ### Wildcard Queries 86 | ```go 87 | filter, err := lucene.ToPostgres(`name:John* AND email:*@example.com`) 88 | // SQL: ("name" SIMILAR TO 'John%') AND ("email" SIMILAR TO '%@example.com') 89 | ``` 90 | 91 | ### Regular Expression Queries 92 | ```go 93 | filter, err := lucene.ToPostgres(`url:/example\.com\/.*\/`) 94 | // SQL: "url" ~ '/example\.com\/.*\/' 95 | ``` 96 | 97 | ## Custom SQL Drivers 98 | 99 | Extend the library for different SQL dialects by creating custom drivers: 100 | 101 | ```go 102 | import ( 103 | "github.com/grindlemire/go-lucene/pkg/driver" 104 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 105 | ) 106 | 107 | type MySQLDriver struct { 108 | driver.Base 109 | } 110 | 111 | func NewMySQLDriver() MySQLDriver { 112 | fns := map[expr.Operator]driver.RenderFN{ 113 | expr.Equals: func(left, right string) (string, error) { 114 | return fmt.Sprintf("`%s` = %s", left, right), nil 115 | }, 116 | } 117 | 118 | // Use shared functions for other operators 119 | for op, sharedFN := range driver.Shared { 120 | if _, exists := fns[op]; !exists { 121 | fns[op] = sharedFN 122 | } 123 | } 124 | 125 | return MySQLDriver{Base: driver.Base{RenderFNs: fns}} 126 | } 127 | 128 | // Usage 129 | mysqlDriver := NewMySQLDriver() 130 | expr, _ := lucene.Parse(`color:red`) 131 | filter, _ := mysqlDriver.Render(expr) 132 | // Result: `color` = 'red' 133 | ``` 134 | -------------------------------------------------------------------------------- /internal/lex/lext_test.go: -------------------------------------------------------------------------------- 1 | package lex 2 | 3 | import ( 4 | "reflect" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | const errTemplate = "%s:\n wanted %v\n got %v" 10 | 11 | func TestLex(t *testing.T) { 12 | type tc struct { 13 | in string 14 | expected []Token 15 | } 16 | tcs := map[string]tc{ 17 | "empty_returns_eof": { 18 | in: "", 19 | expected: []Token{tok(TEOF, "EOF")}, 20 | }, 21 | "negatives": { 22 | in: "-1", 23 | expected: []Token{tok(TLiteral, "-1")}, 24 | }, 25 | "negatives_mixed_with_minus": { 26 | in: "a:-1 AND -b:c", 27 | expected: []Token{ 28 | tok(TLiteral, "a"), 29 | tok(TColon, ":"), 30 | tok(TLiteral, "-1"), 31 | tok(TAnd, "AND"), 32 | tok(TMinus, "-"), 33 | tok(TLiteral, "b"), 34 | tok(TColon, ":"), 35 | tok(TLiteral, "c"), 36 | }, 37 | }, 38 | "negatives_in_elastic_comparison": { 39 | in: "a:<-10 AND -b:>=20", 40 | expected: []Token{ 41 | tok(TLiteral, "a"), 42 | tok(TColon, ":"), 43 | tok(TLess, "<"), 44 | tok(TLiteral, "-10"), 45 | tok(TAnd, "AND"), 46 | tok(TMinus, "-"), 47 | tok(TLiteral, "b"), 48 | tok(TColon, ":"), 49 | tok(TGreater, ">"), 50 | tok(TEqual, "="), 51 | tok(TLiteral, "20"), 52 | }, 53 | }, 54 | "literals": { 55 | in: "abc", 56 | expected: []Token{tok(TLiteral, "abc")}, 57 | }, 58 | "spaces_ignored": { 59 | in: "ab c", 60 | expected: []Token{ 61 | tok(TLiteral, "ab"), 62 | tok(TLiteral, "c"), 63 | }, 64 | }, 65 | "quotes_single_token": { 66 | in: `"abc"`, 67 | expected: []Token{ 68 | tok(TQuoted, "\"abc\""), 69 | }, 70 | }, 71 | "single_quotes_single_token": { 72 | in: `'abc'`, 73 | expected: []Token{ 74 | tok(TQuoted, "'abc'"), 75 | }, 76 | }, 77 | "quotes_single_token_with_spaces": { 78 | in: `"ab c"`, 79 | expected: []Token{ 80 | tok(TQuoted, "\"ab c\""), 81 | }, 82 | }, 83 | "single_quotes_single_token_with_spaces": { 84 | in: `'ab c'`, 85 | expected: []Token{ 86 | tok(TQuoted, "'ab c'"), 87 | }, 88 | }, 89 | "parens_tokenized": { 90 | in: `(ABC)`, 91 | expected: []Token{ 92 | tok(TLParen, "("), 93 | tok(TLiteral, "ABC"), 94 | tok(TRParen, ")"), 95 | }, 96 | }, 97 | "equals_operator_tokenized_in_stream": { 98 | in: `a = b`, 99 | expected: []Token{ 100 | tok(TLiteral, "a"), 101 | tok(TEqual, "="), 102 | tok(TLiteral, "b"), 103 | }, 104 | }, 105 | "equals_operator_lucene_tokenized_in_stream": { 106 | in: `a:b`, 107 | expected: []Token{ 108 | tok(TLiteral, "a"), 109 | tok(TColon, ":"), 110 | tok(TLiteral, "b"), 111 | }, 112 | }, 113 | "and_boolean_tokenized": { 114 | in: `a AND b`, 115 | expected: []Token{ 116 | tok(TLiteral, "a"), 117 | tok(TAnd, "AND"), 118 | tok(TLiteral, "b"), 119 | }, 120 | }, 121 | "or_boolean_tokenized": { 122 | in: `a OR b`, 123 | expected: []Token{ 124 | tok(TLiteral, "a"), 125 | tok(TOr, "OR"), 126 | tok(TLiteral, "b"), 127 | }, 128 | }, 129 | "not_boolean_tokenized": { 130 | in: `NOT a`, 131 | expected: []Token{ 132 | tok(TNot, "NOT"), 133 | tok(TLiteral, "a"), 134 | }, 135 | }, 136 | "to_tokenized": { 137 | in: `a TO b`, 138 | expected: []Token{ 139 | tok(TLiteral, "a"), 140 | tok(TTO, "TO"), 141 | tok(TLiteral, "b"), 142 | }, 143 | }, 144 | "regexp_tokenized": { 145 | in: `/a[b]*/`, 146 | expected: []Token{ 147 | tok(TRegexp, "/a[b]*/"), 148 | }, 149 | }, 150 | "regexp_tokenized_with_escaped_chars": { 151 | in: `/.*example.com\/article\/.*/`, 152 | expected: []Token{ 153 | tok(TRegexp, `/.*example.com\/article\/.*/`), 154 | }, 155 | }, 156 | "symbols_tokenized": { 157 | in: `()[]{}:+-=><`, 158 | expected: []Token{ 159 | tok(TLParen, "("), 160 | tok(TRParen, ")"), 161 | tok(TLSquare, "["), 162 | tok(TRSquare, "]"), 163 | tok(TLCurly, "{"), 164 | tok(TRCurly, "}"), 165 | tok(TColon, ":"), 166 | tok(TPlus, "+"), 167 | tok(TMinus, "-"), 168 | tok(TEqual, "="), 169 | tok(TGreater, ">"), 170 | tok(TLess, "<"), 171 | }, 172 | }, 173 | "token_boost": { 174 | in: "a:b^2 foo^4", 175 | expected: []Token{ 176 | tok(TLiteral, "a"), 177 | tok(TColon, ":"), 178 | tok(TLiteral, "b"), 179 | tok(TCarrot, "^"), 180 | tok(TLiteral, "2"), 181 | tok(TLiteral, "foo"), 182 | tok(TCarrot, "^"), 183 | tok(TLiteral, "4"), 184 | }, 185 | }, 186 | "token_boost_floats": { 187 | in: "a:b^2.1 foo^4.40", 188 | expected: []Token{ 189 | tok(TLiteral, "a"), 190 | tok(TColon, ":"), 191 | tok(TLiteral, "b"), 192 | tok(TCarrot, "^"), 193 | tok(TLiteral, "2.1"), 194 | tok(TLiteral, "foo"), 195 | tok(TCarrot, "^"), 196 | tok(TLiteral, "4.40"), 197 | }, 198 | }, 199 | "entire_stream_tokenized": { 200 | in: `(+k1:v1 AND -k2:v2) OR k3:"foo bar"^2 OR k4:a*~10`, 201 | expected: []Token{ 202 | tok(TLParen, "("), 203 | tok(TPlus, "+"), 204 | tok(TLiteral, "k1"), 205 | tok(TColon, ":"), 206 | tok(TLiteral, "v1"), 207 | tok(TAnd, "AND"), 208 | tok(TMinus, "-"), 209 | tok(TLiteral, "k2"), 210 | tok(TColon, ":"), 211 | tok(TLiteral, "v2"), 212 | tok(TRParen, ")"), 213 | tok(TOr, "OR"), 214 | tok(TLiteral, "k3"), 215 | tok(TColon, ":"), 216 | tok(TQuoted, "\"foo bar\""), 217 | tok(TCarrot, "^"), 218 | tok(TLiteral, "2"), 219 | tok(TOr, "OR"), 220 | tok(TLiteral, "k4"), 221 | tok(TColon, ":"), 222 | tok(TLiteral, "a*"), 223 | tok(TTilde, "~"), 224 | tok(TLiteral, "10"), 225 | }, 226 | }, 227 | "escape_sequence_tokenized": { 228 | in: `\(1\+1\)\:2`, 229 | expected: []Token{ 230 | tok(TLiteral, `\(1\+1\)\:2`), 231 | }, 232 | }, 233 | "quoted_sequence_tokensized": { 234 | in: `"foo bar":"works well"`, 235 | expected: []Token{ 236 | tok(TQuoted, "\"foo bar\""), 237 | tok(TColon, ":"), 238 | tok(TQuoted, "\"works well\""), 239 | }, 240 | }, 241 | } 242 | 243 | for name, tc := range tcs { 244 | t.Run(name, func(t *testing.T) { 245 | tokens := consumeAll(tc.in) 246 | tc.expected = finalizeExpected(tc.in, tc.expected) 247 | if !reflect.DeepEqual(tc.expected, tokens) { 248 | t.Fatalf(errTemplate, "token streams don't match", tc.expected, tokens) 249 | } 250 | }) 251 | } 252 | } 253 | 254 | func finalizeExpected(in string, tokens []Token) (out []Token) { 255 | // if we are testing just the EOF return early and don't do anything 256 | if tokens[0].Typ == TEOF { 257 | return tokens 258 | } 259 | 260 | offset := 0 261 | for idx, token := range tokens { 262 | sliced := in[offset:] 263 | 264 | // if its an error then we don't have any offset to calculate 265 | if token.Typ == TErr { 266 | tokens[idx].pos = offset 267 | continue 268 | } 269 | 270 | // calculate the position of the new token in the string 271 | tokens[idx].pos = strings.Index(sliced, token.Val) + offset 272 | 273 | // handle the whitespace that pops up so we keep the offset in sync 274 | whitespaceOffset := movePastWhitespace(sliced) 275 | offset += len(token.Val) + whitespaceOffset 276 | } 277 | 278 | // if we didn't end in an error, add in an EOF token at the end 279 | if tokens[len(tokens)-1].Typ != TErr { 280 | tokens = append(tokens, Token{TEOF, len(in), "EOF"}) 281 | } 282 | return tokens 283 | } 284 | 285 | func movePastWhitespace(in string) (count int) { 286 | for _, c := range in { 287 | if !isSpace(c) { 288 | return count 289 | } 290 | count++ 291 | } 292 | return count 293 | } 294 | 295 | func consumeAll(in string) (toks []Token) { 296 | l := Lex(in) 297 | for { 298 | tok := l.Next() 299 | toks = append(toks, tok) 300 | if tok.Typ == TEOF || tok.Typ == TErr { 301 | return toks 302 | } 303 | } 304 | } 305 | 306 | func tok(typ TokType, val string) Token { 307 | return Token{ 308 | Typ: typ, 309 | // there is intentionally no pos set because we are doing it in generate 310 | Val: val, 311 | } 312 | } 313 | -------------------------------------------------------------------------------- /pkg/driver/base.go: -------------------------------------------------------------------------------- 1 | package driver 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 8 | ) 9 | 10 | // Shared is the shared set of render functions that can be used as a base and overriden 11 | // for each flavor of sql 12 | var Shared = map[expr.Operator]RenderFN{ 13 | expr.Literal: literal, 14 | expr.And: basicCompound(expr.And), 15 | expr.Or: basicCompound(expr.Or), 16 | expr.Not: basicWrap(expr.Not), 17 | expr.Equals: equals, 18 | expr.Range: rang, 19 | expr.Must: noop, // must doesn't really translate to sql 20 | expr.MustNot: basicWrap(expr.Not), // must not is really just a negation 21 | // expr.Fuzzy: unsupported, 22 | // expr.Boost: unsupported, 23 | expr.Wild: literal, 24 | expr.Regexp: literal, 25 | expr.Like: like, 26 | expr.Greater: greater, 27 | expr.GreaterEq: greaterEq, 28 | expr.Less: less, 29 | expr.LessEq: lessEq, 30 | expr.In: inFn, 31 | expr.List: list, 32 | } 33 | 34 | // Base is the base driver that is embedded in each driver 35 | type Base struct { 36 | RenderFNs map[expr.Operator]RenderFN 37 | } 38 | 39 | // RenderParam will render the expression into a parameterized query. The returned string will contain placeholders 40 | // and the params will contain the values that should be passed to the query. 41 | func (b Base) RenderParam(e *expr.Expression) (s string, params []any, err error) { 42 | if e == nil { 43 | return "", params, nil 44 | } 45 | 46 | left, lparams, err := b.serializeParams(e.Left) 47 | if err != nil { 48 | return s, params, err 49 | } 50 | 51 | right, rparams, err := b.serializeParams(e.Right) 52 | if err != nil { 53 | return s, params, err 54 | } 55 | 56 | // edge case for a standalone wildcard on a like operator. 57 | // Convert to a regular expression that matches anything 58 | if right == "'*'" && e.Op == expr.Like { 59 | right = "?" 60 | rparams = []any{"%"} 61 | } 62 | 63 | // if we are in a regular expression we need to convert the * to % and ? to _ 64 | if e.Op == expr.Like && len(rparams) > 0 { 65 | rval := rparams[0].(string) 66 | // keep the regexp intact if it is a // regexp 67 | if len(rval) < 4 || rval[0] != '/' || rval[len(rval)-1] != '/' { 68 | rval = strings.ReplaceAll(rval, "*", "%") 69 | rval = strings.ReplaceAll(rval, "?", "_") 70 | rparams[0] = rval 71 | } 72 | } 73 | 74 | params = append(lparams, rparams...) 75 | 76 | if e.Op != expr.Range && 77 | e.Op != expr.Not && 78 | e.Op != expr.List && 79 | e.Op != expr.In && 80 | e.Op != expr.Literal && 81 | e.Op != expr.Must && 82 | e.Op != expr.MustNot { 83 | if !b.isSimple(e.Left) { 84 | left = "(" + left + ")" 85 | } 86 | if !b.isSimple(e.Right) { 87 | right = "(" + right + ")" 88 | } 89 | } 90 | 91 | // if we have a like operator then we need to use the likeParam function instead of the default 92 | // since we are replacing all the * with % and ? with _ 93 | if e.Op == expr.Like { 94 | str, err := likeParam(left, right, rparams) 95 | return str, params, err 96 | } 97 | 98 | // if we have a range operator then we need to use the rangParam function instead of the default 99 | // since we need to be able to infer the param types that are injected 100 | if e.Op == expr.Range { 101 | str, err := rangParam(left, right, rparams) 102 | return str, params, err 103 | } 104 | 105 | fn, ok := b.RenderFNs[e.Op] 106 | if !ok { 107 | return s, params, fmt.Errorf("unable to render operator [%s]", e.Op) 108 | } 109 | 110 | str, err := fn(left, right) 111 | return str, params, err 112 | } 113 | 114 | // Render will render the expression based on the renderFNs provided by the driver. 115 | func (b Base) Render(e *expr.Expression) (s string, err error) { 116 | if e == nil { 117 | return "", nil 118 | } 119 | 120 | left, err := b.serialize(e.Left) 121 | if err != nil { 122 | return s, err 123 | } 124 | 125 | right, err := b.serialize(e.Right) 126 | if err != nil { 127 | return s, err 128 | } 129 | 130 | if e.Op != expr.Range && 131 | e.Op != expr.Not && 132 | e.Op != expr.List && 133 | e.Op != expr.In && 134 | e.Op != expr.Literal && 135 | e.Op != expr.Must && 136 | e.Op != expr.MustNot { 137 | if !b.isSimple(e.Left) { 138 | left = "(" + left + ")" 139 | } 140 | if !b.isSimple(e.Right) { 141 | right = "(" + right + ")" 142 | } 143 | } 144 | 145 | fn, ok := b.RenderFNs[e.Op] 146 | if !ok { 147 | return s, fmt.Errorf("unable to render operator [%s]", e.Op) 148 | } 149 | 150 | return fn(left, right) 151 | } 152 | 153 | func (b Base) isSimple(in any) bool { 154 | switch v := in.(type) { 155 | case *expr.Expression: 156 | return v.Op == expr.Undefined || v.Op == expr.Literal || v.Op == expr.Regexp || v.Op == expr.Wild 157 | case expr.Column: 158 | return true 159 | case nil: 160 | return true 161 | case string, int, float64: 162 | return true 163 | default: 164 | return false 165 | } 166 | } 167 | 168 | func (b Base) serialize(in any) (s string, err error) { 169 | if in == nil { 170 | return "", nil 171 | } 172 | 173 | switch v := in.(type) { 174 | case *expr.Expression: 175 | return b.Render(v) 176 | case []*expr.Expression: 177 | strs := []string{} 178 | for _, e := range v { 179 | s, err = b.Render(e) 180 | if err != nil { 181 | return s, err 182 | } 183 | strs = append(strs, s) 184 | } 185 | return strings.Join(strs, ", "), nil 186 | case *expr.RangeBoundary: 187 | min, err := b.serialize(v.Min) 188 | if err != nil { 189 | return "", err 190 | } 191 | max, err := b.serialize(v.Max) 192 | if err != nil { 193 | return "", err 194 | } 195 | 196 | if v.Inclusive { 197 | return fmt.Sprintf("[%s, %s]", min, max), nil 198 | } 199 | return fmt.Sprintf("(%s, %s)", min, max), nil 200 | 201 | case expr.Column: 202 | if len(v) == 0 { 203 | return "", fmt.Errorf("column name is empty") 204 | } 205 | if strings.ContainsRune(string(v), '"') { 206 | return "", fmt.Errorf("column name contains a double quote: %q", v) 207 | } 208 | // Always escape column names with double quotes, 209 | // otherwise we need to know the reserved words 210 | // which might change in the future. 211 | return fmt.Sprintf(`"%s"`, string(v)), nil 212 | case string: 213 | // escape single quotes with double single quotes 214 | return fmt.Sprintf("'%s'", strings.ReplaceAll(v, "'", "''")), nil 215 | default: 216 | return fmt.Sprintf("%v", v), nil 217 | } 218 | } 219 | 220 | func (b Base) serializeParams(in any) (s string, params []any, err error) { 221 | if in == nil { 222 | return "", params, nil 223 | } 224 | 225 | switch v := in.(type) { 226 | case *expr.Expression: 227 | return b.RenderParam(v) 228 | case []*expr.Expression: 229 | strs := []string{} 230 | for _, e := range v { 231 | s, eparams, err := b.RenderParam(e) 232 | if err != nil { 233 | return s, params, err 234 | } 235 | strs = append(strs, s) 236 | params = append(params, eparams...) 237 | } 238 | return strings.Join(strs, ", "), params, nil 239 | case *expr.RangeBoundary: 240 | min, minParams, err := b.serializeParams(v.Min) 241 | if err != nil { 242 | return "", params, err 243 | } 244 | max, maxParams, err := b.serializeParams(v.Max) 245 | if err != nil { 246 | return "", params, err 247 | } 248 | params = append(minParams, maxParams...) 249 | 250 | if v.Inclusive { 251 | return fmt.Sprintf("[%s, %s]", min, max), params, nil 252 | } 253 | return fmt.Sprintf("(%s, %s)", min, max), params, nil 254 | 255 | case expr.Column: 256 | if len(v) == 0 { 257 | return "", params, fmt.Errorf("column name is empty") 258 | } 259 | if strings.ContainsRune(string(v), '"') { 260 | return "", params, fmt.Errorf("column name contains a double quote: %q", v) 261 | } 262 | // Always escape column names with double quotes, 263 | // otherwise we need to know the reserved words 264 | // which might change in the future. 265 | return fmt.Sprintf(`"%s"`, string(v)), params, nil 266 | case string: 267 | // if we have a '*' then we don't want to insert a param since 268 | // it can be used either in a regexp or a range operator. 269 | if v == "*" { 270 | return "'*'", params, nil 271 | } 272 | 273 | // escape single quotes with double single quotes 274 | return "?", []any{v}, nil 275 | default: 276 | return "?", []any{v}, nil 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /parse.go: -------------------------------------------------------------------------------- 1 | package lucene 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "strconv" 7 | "strings" 8 | 9 | "github.com/grindlemire/go-lucene/internal/lex" 10 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 11 | "github.com/grindlemire/go-lucene/pkg/lucene/reduce" 12 | ) 13 | 14 | type Opt func(*parser) 15 | 16 | // WithDefaultField sets the default field to equate literals to. 17 | // For example a:b AND "c" will be parsed as a:b AND myfield:"c" 18 | func WithDefaultField(field string) Opt { 19 | return func(p *parser) { 20 | p.defaultField = field 21 | } 22 | } 23 | 24 | // Parse will parse a lucene expression string using a buffer and the shift reduce algorithm. The returned expression 25 | // is an AST that can be rendered to a variety of different formats. 26 | func Parse(input string, opts ...Opt) (e *expr.Expression, err error) { 27 | p := &parser{ 28 | lex: lex.Lex(input), 29 | stack: []any{}, 30 | nonTerminals: []lex.Token{{Typ: lex.TStart}}, 31 | } 32 | 33 | for _, opt := range opts { 34 | opt(p) 35 | } 36 | 37 | ex, err := p.parse() 38 | if err != nil { 39 | return e, err 40 | } 41 | 42 | err = expr.Validate(ex) 43 | if err != nil { 44 | return e, err 45 | } 46 | 47 | return ex, nil 48 | } 49 | 50 | type parser struct { 51 | lex *lex.Lexer 52 | stack []any 53 | nonTerminals []lex.Token 54 | 55 | defaultField string 56 | } 57 | 58 | func (p *parser) parse() (e *expr.Expression, err error) { 59 | for { 60 | next := p.lex.Peek() 61 | if p.shouldAccept(next) { 62 | if len(p.stack) != 1 { 63 | return e, fmt.Errorf("multiple expressions left after parsing: %v", p.stack) 64 | } 65 | final, ok := p.stack[0].(*expr.Expression) 66 | if !ok { 67 | return e, fmt.Errorf( 68 | "final parse didn't return an expression: %s [type: %s]", 69 | p.stack[0], 70 | reflect.TypeOf(final), 71 | ) 72 | } 73 | 74 | // edge case for a single literal in the expression and a default field specified 75 | if final.Op == expr.Literal && p.defaultField != "" { 76 | final = expr.Expr(p.defaultField, expr.Equals, final.Left) 77 | } 78 | 79 | return final, nil 80 | } 81 | 82 | if p.shouldShift(next) { 83 | tok := p.shift() 84 | if lex.IsTerminal(tok) { 85 | // if we have a terminal parse it and put it on the stack 86 | lit, err := parseLiteral(tok) 87 | if err != nil { 88 | return e, err 89 | } 90 | 91 | // we should always check if the current top of the stack is another token 92 | // if it isn't then we have an implicit AND we need to inject. 93 | if len(p.stack) > 0 { 94 | _, isTopToken := p.stack[len(p.stack)-1].(lex.Token) 95 | if !isTopToken { 96 | implAnd := lex.Token{Typ: lex.TAnd, Val: "AND"} 97 | // act as if we just saw an AND and check if we need to reduce the 98 | // current token stack first. 99 | if !p.shouldShift(implAnd) { 100 | err = p.reduce() 101 | if err != nil { 102 | return e, err 103 | } 104 | } 105 | 106 | // if we have a literal as the previous parsed thing then 107 | // we must be in an implicit AND and should reduce 108 | p.stack = append(p.stack, implAnd) 109 | p.nonTerminals = append(p.nonTerminals, implAnd) 110 | } 111 | } 112 | 113 | p.stack = append(p.stack, lit) 114 | continue 115 | } 116 | // otherwise just push the token on the stack 117 | p.stack = append(p.stack, tok) 118 | p.nonTerminals = append(p.nonTerminals, tok) 119 | continue 120 | } 121 | 122 | err = p.reduce() 123 | if err != nil { 124 | return e, err 125 | } 126 | } 127 | } 128 | 129 | func (p *parser) shift() (tok lex.Token) { 130 | return p.lex.Next() 131 | } 132 | 133 | // shouldShift determines if the parser should shift or not. This might end up in the grammar specific 134 | // packages and implemented for each grammar this parser supports but for now it can live at the top level. 135 | func (p *parser) shouldShift(next lex.Token) bool { 136 | if next.Typ == lex.TEOF { 137 | return false 138 | } 139 | 140 | if next.Typ == lex.TErr { 141 | return false 142 | } 143 | 144 | curr := p.nonTerminals[len(p.nonTerminals)-1] 145 | 146 | // if we have a terminal symbol then we always want to shift since it won't be 147 | // matched by any rule 148 | if lex.IsTerminal(next) { 149 | return true 150 | } 151 | 152 | // if we have an open grouping or the next one is we want to always shift 153 | if anyOpenBracket(curr, next) { 154 | return true 155 | } 156 | 157 | // we need the closing bracket to reduce the range subexpression so shift that on 158 | // if we see it 159 | if endingRangeSubExpr(next) { 160 | return true 161 | } 162 | 163 | // if we are ever attempting to move past a subexpr we need to parse it before moving on 164 | if anyClosingBracket(curr) { 165 | return false 166 | } 167 | 168 | // shift if our current token has less precedence than the next token 169 | return lex.HasLessPrecedence(curr, next) 170 | } 171 | 172 | func anyOpenBracket(curr, next lex.Token) bool { 173 | return curr.Typ == lex.TLSquare || 174 | next.Typ == lex.TLSquare || 175 | curr.Typ == lex.TLCurly || 176 | next.Typ == lex.TLCurly || 177 | curr.Typ == lex.TLParen || 178 | next.Typ == lex.TLParen 179 | } 180 | 181 | func anyClosingBracket(curr lex.Token) bool { 182 | return curr.Typ == lex.TRParen || 183 | curr.Typ == lex.TRSquare || 184 | curr.Typ == lex.TRCurly 185 | } 186 | 187 | func endingRangeSubExpr(next lex.Token) bool { 188 | return next.Typ == lex.TRSquare || next.Typ == lex.TRCurly 189 | } 190 | 191 | func (p *parser) shouldAccept(next lex.Token) bool { 192 | return len(p.stack) == 1 && 193 | next.Typ == lex.TEOF 194 | } 195 | 196 | func (p *parser) reduce() (err error) { 197 | top := []any{} 198 | for { 199 | if len(p.stack) == 0 { 200 | return fmt.Errorf("error parsing, no items left to reduce, current state: %v", top) 201 | } 202 | 203 | // pull the top off the stack 204 | s := p.stack[len(p.stack)-1] 205 | p.stack = p.stack[:len(p.stack)-1] 206 | 207 | // keep the original ordering when building up our subslice 208 | top = append([]any{s}, top...) 209 | 210 | // try to reduce with all our reducers 211 | var reduced bool 212 | top, p.nonTerminals, reduced = reduce.Reduce(top, p.nonTerminals, p.defaultField) 213 | 214 | // if we consumed some non terminals during the reduce it means we successfully reduced 215 | if reduced { 216 | // If the reducer returned multiple elements and the first two are both expressions, 217 | // we need to inject an implicit AND between them (this happens when fuzzy/boost 218 | // does a partial reduction like [FUZZY(...), other-expr]) 219 | if len(top) >= 2 { 220 | _, isFirstExpr := top[0].(*expr.Expression) 221 | _, isSecondExpr := top[1].(*expr.Expression) 222 | if isFirstExpr && isSecondExpr { 223 | // Insert AND between the two expressions: [expr1, expr2] -> [expr1, AND, expr2] 224 | implAnd := lex.Token{Typ: lex.TAnd, Val: "AND"} 225 | newTop := append([]any{top[0]}, implAnd) 226 | newTop = append(newTop, top[1:]...) 227 | top = newTop 228 | p.nonTerminals = append(p.nonTerminals, implAnd) 229 | } 230 | } 231 | 232 | // if we successfully reduced re-add it to the top of the stack and return 233 | p.stack = append(p.stack, top...) 234 | return nil 235 | } 236 | } 237 | } 238 | 239 | func parseLiteral(token lex.Token) (e any, err error) { 240 | // if it is a quote then remove escape 241 | if token.Typ == lex.TQuoted { 242 | return expr.Lit(strings.ReplaceAll(token.Val, "\"", "")), nil 243 | } 244 | 245 | // if it is a regexp then parse it 246 | if token.Typ == lex.TRegexp { 247 | return expr.REGEXP(token.Val), nil 248 | } 249 | 250 | // attempt to parse it as an integer 251 | ival, err := strconv.Atoi(token.Val) 252 | if err == nil { 253 | return expr.Lit(ival), nil 254 | } 255 | 256 | // attempt to parse it as a float 257 | fval, err := strconv.ParseFloat(token.Val, 64) 258 | if err == nil { 259 | return expr.Lit(fval), nil 260 | } 261 | 262 | // if it contains unescaped wildcards then it is a wildcard string 263 | if strings.ContainsAny(token.Val, "*?") { 264 | return expr.WILD(token.Val), nil 265 | } 266 | 267 | // if it contains an escape string then strip it out now 268 | if strings.Contains(token.Val, `\`) { 269 | return expr.Lit(strings.ReplaceAll(token.Val, `\`, "")), nil 270 | } 271 | 272 | return expr.Lit(token.Val), nil 273 | } 274 | -------------------------------------------------------------------------------- /pkg/lucene/expr/expression_test.go: -------------------------------------------------------------------------------- 1 | package expr 2 | 3 | import ( 4 | "encoding/json" 5 | "reflect" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | const ( 11 | errTemplate = "%s:\n wanted %#v\n got %#v" 12 | jsonErrTemplate = "%s:\n wanted %s\n got %s" 13 | ) 14 | 15 | func TestExprJSON(t *testing.T) { 16 | type tc struct { 17 | input string 18 | want *Expression 19 | } 20 | 21 | tcs := map[string]tc{ 22 | "flat_literal": { 23 | input: `"a"`, 24 | want: Lit("a"), 25 | }, 26 | "flat_wildcard": { 27 | input: `"a*"`, 28 | want: WILD("a*"), 29 | }, 30 | "flat_equals": { 31 | input: `{"left": "a", "operator": "EQUALS", "right": "b"}`, 32 | want: Eq(Lit("a"), Lit("b")), 33 | }, 34 | "flat_regexp": { 35 | input: `{ 36 | "left": "a", 37 | "operator": "LIKE", 38 | "right": "/b [c]/" 39 | }`, 40 | want: LIKE(Lit("a"), REGEXP("/b [c]/")), 41 | }, 42 | "flat_inclusive_range": { 43 | input: `{ 44 | "left": "a", 45 | "operator": "RANGE", 46 | "right": { 47 | "min": 1, 48 | "max": 2, 49 | "inclusive": true 50 | } 51 | }`, 52 | want: Rang("a", 1, 2, true), 53 | }, 54 | "flat_exclusive_range": { 55 | input: `{ 56 | "left": "a", 57 | "operator": "RANGE", 58 | "right": { 59 | "min": 1, 60 | "max": 2, 61 | "inclusive": false 62 | } 63 | }`, 64 | want: Rang("a", 1, 2, false), 65 | }, 66 | "flat_range_with_float": { 67 | input: `{ 68 | "left": "a", 69 | "operator": "RANGE", 70 | "right": { 71 | "min": 1.1, 72 | "max": 2.2, 73 | "inclusive": true 74 | } 75 | }`, 76 | want: Rang("a", 1.1, 2.2, true), 77 | }, 78 | "must_wrapping_range": { 79 | input: `{ 80 | "left": { 81 | "left": "c", 82 | "operator": "RANGE", 83 | "right": { 84 | "min": "*", 85 | "max": "foo", 86 | "inclusive": false 87 | } 88 | }, 89 | "operator": "MUST" 90 | }`, 91 | want: MUST(Rang("c", "*", "foo", false)), 92 | }, 93 | "flat_must": { 94 | input: `{ 95 | "left": "a", 96 | "operator": "MUST" 97 | }`, 98 | want: MUST(Lit("a")), 99 | }, 100 | "flat_must_not": { 101 | input: `{ 102 | "left": "a", 103 | "operator": "MUST_NOT" 104 | }`, 105 | want: MUSTNOT(Lit("a")), 106 | }, 107 | "flat_not": { 108 | input: `{ 109 | "left": "a", 110 | "operator": "NOT" 111 | }`, 112 | want: NOT(Lit("a")), 113 | }, 114 | "flat_boost": { 115 | input: `{ 116 | "left": "a", 117 | "operator": "BOOST" 118 | }`, 119 | want: BOOST(Lit("a")), 120 | }, 121 | "flat_boost_explicit_power": { 122 | input: `{ 123 | "left": "a", 124 | "operator": "BOOST", 125 | "power": 0.8 126 | }`, 127 | want: BOOST(Lit("a"), 0.8), 128 | }, 129 | "flat_fuzzy": { 130 | input: `{ 131 | "left": "a", 132 | "operator": "FUZZY" 133 | }`, 134 | want: FUZZY(Lit("a")), 135 | }, 136 | "flat_fuzzy_explicit_power": { 137 | input: `{ 138 | "left": "a", 139 | "operator": "FUZZY", 140 | "distance": 2 141 | }`, 142 | want: FUZZY("a", 2), 143 | }, 144 | "flat_in_list": { 145 | input: `{ 146 | "left": "a", 147 | "operator": "IN", 148 | "right": { 149 | "left": ["b", "c"], 150 | "operator": "LIST" 151 | } 152 | }`, 153 | want: IN("a", LIST(Lit("b"), Lit("c"))), 154 | }, 155 | "basic_and": { 156 | input: `{ 157 | "left": { 158 | "left": "a", 159 | "operator": "EQUALS", 160 | "right": "b" 161 | }, 162 | "operator": "AND", 163 | "right": { 164 | "left": "c", 165 | "operator": "EQUALS", 166 | "right": "d" 167 | } 168 | }`, 169 | want: AND( 170 | Eq("a", "b"), 171 | Eq("c", "d"), 172 | ), 173 | }, 174 | "basic_or": { 175 | input: `{ 176 | "left": { 177 | "left": "a", 178 | "operator": "EQUALS", 179 | "right": "b" 180 | }, 181 | "operator": "OR", 182 | "right": { 183 | "left": "c", 184 | "operator": "EQUALS", 185 | "right": "d" 186 | } 187 | }`, 188 | want: OR( 189 | Eq("a", "b"), 190 | Eq("c", "d"), 191 | ), 192 | }, 193 | "preserves_precedence": { 194 | input: `{ 195 | "left": { 196 | "left": { 197 | "left": "a", 198 | "operator": "AND", 199 | "right": "b" 200 | }, 201 | "operator": "OR", 202 | "right": { 203 | "left": "c", 204 | "operator": "AND", 205 | "right": "d" 206 | } 207 | }, 208 | "operator": "OR", 209 | "right": "e" 210 | }`, 211 | want: OR( 212 | OR( 213 | AND("a", "b"), 214 | AND("c", "d"), 215 | ), 216 | "e", 217 | ), 218 | }, 219 | "compound_using_range": { 220 | input: `{ 221 | "left": { 222 | "left": { 223 | "left": "c", 224 | "operator": "RANGE", 225 | "right": { 226 | "min": "*", 227 | "max": "foo", 228 | "inclusive": false 229 | } 230 | }, 231 | "operator": "MUST" 232 | }, 233 | "operator": "OR", 234 | "right": { 235 | "left": { 236 | "left": { 237 | "left": "d", 238 | "operator": "EQUALS", 239 | "right": { 240 | "left": "bar", 241 | "operator": "FUZZY", 242 | "distance": 3 243 | } 244 | }, 245 | "operator": "NOT" 246 | }, 247 | "operator": "MUST_NOT" 248 | } 249 | }`, 250 | want: OR( 251 | MUST(Rang("c", "*", "foo", false)), 252 | MUSTNOT(NOT(Eq("d", FUZZY("bar", 3)))), 253 | ), 254 | }, 255 | "large_blob": { 256 | input: `{ 257 | "left": "a", 258 | "operator": "OR", 259 | "right": { 260 | "left": { 261 | "left": "c", 262 | "operator": "RANGE", 263 | "right": { 264 | "min": "*", 265 | "max": "foo", 266 | "inclusive": false 267 | } 268 | }, 269 | "operator": "OR", 270 | "right": "b" 271 | } 272 | }`, 273 | want: OR( 274 | "a", 275 | OR( 276 | Rang("c", "*", "foo", false), 277 | "b", 278 | ), 279 | ), 280 | }, 281 | "every_operator_combined": { 282 | input: `{ 283 | "left": { 284 | "left": { 285 | "left": "a", 286 | "operator": "RANGE", 287 | "right": { 288 | "min": 1, 289 | "max": "*", 290 | "inclusive": true 291 | } 292 | }, 293 | "operator": "AND", 294 | "right": { 295 | "left": { 296 | "left": { 297 | "left": "b", 298 | "operator": "LIKE", 299 | "right": "/foo?ar.*/" 300 | }, 301 | "operator": "NOT" 302 | }, 303 | "operator": "BOOST" 304 | } 305 | }, 306 | "operator": "OR", 307 | "right": { 308 | "left": { 309 | "left": { 310 | "left": "c", 311 | "operator": "RANGE", 312 | "right": { 313 | "min": "*", 314 | "max": "foo", 315 | "inclusive": false 316 | } 317 | }, 318 | "operator": "MUST" 319 | }, 320 | "operator": "OR", 321 | "right": { 322 | "left": { 323 | "left": { 324 | "left": "d", 325 | "operator": "EQUALS", 326 | "right": { 327 | "left": "bar", 328 | "operator": "FUZZY", 329 | "distance": 3 330 | } 331 | }, 332 | "operator": "NOT" 333 | }, 334 | "operator": "MUST_NOT" 335 | } 336 | } 337 | }`, 338 | want: OR( 339 | AND( 340 | Rang("a", 1, "*", true), 341 | BOOST(NOT(LIKE("b", REGEXP("/foo?ar.*/")))), 342 | ), 343 | OR( 344 | MUST(Rang("c", "*", "foo", false)), 345 | MUSTNOT(NOT(Eq("d", FUZZY("bar", 3)))), 346 | ), 347 | ), 348 | }, 349 | } 350 | 351 | for name, tc := range tcs { 352 | t.Run(name, func(t *testing.T) { 353 | got := &Expression{} 354 | err := json.Unmarshal([]byte(tc.input), got) 355 | if err != nil { 356 | t.Fatalf("expected no error during unmarshal but got [%s]", err) 357 | } 358 | 359 | if !reflect.DeepEqual(tc.want, got) { 360 | t.Fatalf(errTemplate, "parsed expression doesn't match", tc.want, got) 361 | } 362 | 363 | gotSerialized, err := json.Marshal(got) 364 | if err != nil { 365 | t.Fatalf("expected no error during marshal but got [%s]", err) 366 | } 367 | 368 | if !jsonEqual(string(gotSerialized), tc.input) { 369 | t.Fatalf( 370 | jsonErrTemplate, 371 | "serialized expressions don't match", 372 | stripWhitespace(tc.input), 373 | stripWhitespace(string(gotSerialized)), 374 | ) 375 | } 376 | }) 377 | } 378 | } 379 | 380 | func jsonEqual(got string, want string) bool { 381 | return stripWhitespace(got) == stripWhitespace(want) 382 | } 383 | 384 | func stripWhitespace(in string) string { 385 | return strings.Join(strings.Fields(in), "") 386 | } 387 | -------------------------------------------------------------------------------- /pkg/driver/renderfn.go: -------------------------------------------------------------------------------- 1 | package driver 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | "unicode/utf8" 8 | 9 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 10 | ) 11 | 12 | // RenderFN is a rendering function. It takes the left and right side of the operator serialized to a string 13 | // and serializes the entire expression 14 | type RenderFN func(left, right string) (string, error) 15 | 16 | func literal(left, right string) (string, error) { 17 | if !utf8.ValidString(left) { 18 | return "", fmt.Errorf("literal contains invalid utf8: %q", left) 19 | } 20 | if strings.ContainsRune(left, 0) { 21 | return "", fmt.Errorf("literal contains null byte: %q", left) 22 | } 23 | 24 | return left, nil 25 | } 26 | 27 | func equals(left, right string) (string, error) { 28 | return fmt.Sprintf("%s = %s", left, right), nil 29 | } 30 | 31 | func noop(left, right string) (string, error) { 32 | return left, nil 33 | } 34 | 35 | func like(left, right string) (string, error) { 36 | if len(right) >= 4 && right[1] == '/' && right[len(right)-2] == '/' { 37 | return fmt.Sprintf("%s ~ %s", left, right), nil 38 | } 39 | 40 | right = strings.ReplaceAll(right, "*", "%") 41 | right = strings.ReplaceAll(right, "?", "_") 42 | return fmt.Sprintf("%s SIMILAR TO %s", left, right), nil 43 | } 44 | 45 | func likeParam(left, right string, params []any) (string, error) { 46 | if len(params) == 1 { 47 | pright := params[0].(string) 48 | if len(pright) >= 4 && pright[0] == '/' && pright[len(pright)-1] == '/' { 49 | return fmt.Sprintf("%s ~ %s", left, right), nil 50 | } 51 | } 52 | 53 | return fmt.Sprintf("%s SIMILAR TO %s", left, right), nil 54 | } 55 | 56 | func inFn(left, right string) (string, error) { 57 | return fmt.Sprintf("%s IN %s", left, right), nil 58 | } 59 | 60 | func list(left, right string) (string, error) { 61 | return fmt.Sprintf("(%s)", left), nil 62 | } 63 | 64 | func greater(left, right string) (string, error) { 65 | return fmt.Sprintf("%s > %s", left, right), nil 66 | } 67 | 68 | func less(left, right string) (string, error) { 69 | return fmt.Sprintf("%s < %s", left, right), nil 70 | } 71 | 72 | func greaterEq(left, right string) (string, error) { 73 | return fmt.Sprintf("%s >= %s", left, right), nil 74 | } 75 | 76 | func lessEq(left, right string) (string, error) { 77 | return fmt.Sprintf("%s <= %s", left, right), nil 78 | } 79 | 80 | // rang is more complicated than the others because it has to handle inclusive and exclusive ranges, 81 | // number and string ranges, and ranges that only have one bound 82 | func rang(left, right string) (string, error) { 83 | inclusive := true 84 | if right[0] == '(' && right[len(right)-1] == ')' { 85 | inclusive = false 86 | } 87 | 88 | stripped := right[1 : len(right)-1] 89 | rangeSlice := strings.Split(stripped, ",") 90 | 91 | if len(rangeSlice) != 2 { 92 | return "", fmt.Errorf("the BETWEEN operator needs a two item list in the right hand side, have %s", right) 93 | } 94 | 95 | rawMin := strings.Trim(rangeSlice[0], " ") 96 | rawMax := strings.Trim(rangeSlice[1], " ") 97 | 98 | iMin, iMax, err := toInts(rawMin, rawMax) 99 | if err == nil { 100 | if rawMin == "'*'" { 101 | if inclusive { 102 | return fmt.Sprintf("%s <= %d", left, iMax), nil 103 | } 104 | return fmt.Sprintf("%s < %d", left, iMax), nil 105 | } 106 | 107 | if rawMax == "'*'" { 108 | if inclusive { 109 | return fmt.Sprintf("%s >= %d", left, iMin), nil 110 | } 111 | return fmt.Sprintf("%s > %d", left, iMin), nil 112 | } 113 | 114 | if inclusive { 115 | return fmt.Sprintf("%s >= %d AND %s <= %d", 116 | left, 117 | iMin, 118 | left, 119 | iMax, 120 | ), 121 | nil 122 | } 123 | 124 | return fmt.Sprintf("%s > %d AND %s < %d", 125 | left, 126 | iMin, 127 | left, 128 | iMax, 129 | ), 130 | nil 131 | } 132 | 133 | fMin, fMax, err := toFloats(rawMin, rawMax) 134 | if err == nil { 135 | if rawMin == "'*'" { 136 | if inclusive { 137 | return fmt.Sprintf("%s <= %.2f", left, fMax), nil 138 | } 139 | return fmt.Sprintf("%s < %.2f", left, fMax), nil 140 | } 141 | 142 | if rawMax == "'*'" { 143 | if inclusive { 144 | return fmt.Sprintf("%s >= %.2f", left, fMin), nil 145 | } 146 | return fmt.Sprintf("%s > %.2f", left, fMin), nil 147 | } 148 | 149 | if inclusive { 150 | return fmt.Sprintf("%s >= %.2f AND %s <= %.2f", 151 | left, 152 | fMin, 153 | left, 154 | fMax, 155 | ), 156 | nil 157 | } 158 | 159 | return fmt.Sprintf("%s > %.2f AND %s < %.2f", 160 | left, 161 | fMin, 162 | left, 163 | fMax, 164 | ), 165 | nil 166 | } 167 | 168 | return fmt.Sprintf(`%s BETWEEN %s AND %s`, 169 | left, 170 | strings.Trim(rangeSlice[0], " "), 171 | strings.Trim(rangeSlice[1], " "), 172 | ), 173 | nil 174 | } 175 | 176 | func rangParam(left, right string, params []any) (string, error) { 177 | inclusive := true 178 | if right[0] == '(' && right[len(right)-1] == ')' { 179 | inclusive = false 180 | } 181 | 182 | stripped := right[1 : len(right)-1] 183 | rangeSlice := strings.Split(stripped, ",") 184 | 185 | if len(rangeSlice) != 2 { 186 | return "", fmt.Errorf("the BETWEEN operator needs a two item list in the right hand side, have %s", right) 187 | } 188 | 189 | rawMin := strings.Trim(rangeSlice[0], " ") 190 | rawMax := strings.Trim(rangeSlice[1], " ") 191 | 192 | // if we have a parameterized input then we need to check the type 193 | if rawMin == "?" || rawMax == "?" { 194 | switch params[0].(type) { 195 | case int, float64, float32: 196 | if rawMin == "'*'" { 197 | if inclusive { 198 | return fmt.Sprintf("%s <= %s", left, rawMax), nil 199 | } 200 | return fmt.Sprintf("%s < %s", left, rawMax), nil 201 | } 202 | 203 | if rawMax == "'*'" { 204 | if inclusive { 205 | return fmt.Sprintf("%s >= %s", left, rawMin), nil 206 | } 207 | return fmt.Sprintf("%s > %s", left, rawMin), nil 208 | } 209 | 210 | if inclusive { 211 | return fmt.Sprintf("%s >= %s AND %s <= %s", 212 | left, 213 | rawMin, 214 | left, 215 | rawMax, 216 | ), 217 | nil 218 | } 219 | 220 | return fmt.Sprintf("%s > %s AND %s < %s", 221 | left, 222 | rawMin, 223 | left, 224 | rawMax, 225 | ), 226 | nil 227 | default: 228 | return fmt.Sprintf(`%s BETWEEN %s AND %s`, 229 | left, 230 | strings.Trim(rangeSlice[0], " "), 231 | strings.Trim(rangeSlice[1], " "), 232 | ), 233 | nil 234 | } 235 | 236 | } 237 | 238 | iMin, iMax, err := toInts(rawMin, rawMax) 239 | if err == nil { 240 | if rawMin == "'*'" { 241 | if inclusive { 242 | return fmt.Sprintf("%s <= %d", left, iMax), nil 243 | } 244 | return fmt.Sprintf("%s < %d", left, iMax), nil 245 | } 246 | 247 | if rawMax == "'*'" { 248 | if inclusive { 249 | return fmt.Sprintf("%s >= %d", left, iMin), nil 250 | } 251 | return fmt.Sprintf("%s > %d", left, iMin), nil 252 | } 253 | 254 | if inclusive { 255 | return fmt.Sprintf("%s >= %d AND %s <= %d", 256 | left, 257 | iMin, 258 | left, 259 | iMax, 260 | ), 261 | nil 262 | } 263 | 264 | return fmt.Sprintf("%s > %d AND %s < %d", 265 | left, 266 | iMin, 267 | left, 268 | iMax, 269 | ), 270 | nil 271 | } 272 | 273 | fMin, fMax, err := toFloats(rawMin, rawMax) 274 | if err == nil { 275 | if rawMin == "'*'" { 276 | if inclusive { 277 | return fmt.Sprintf("%s <= %.2f", left, fMax), nil 278 | } 279 | return fmt.Sprintf("%s < %.2f", left, fMax), nil 280 | } 281 | 282 | if rawMax == "'*'" { 283 | if inclusive { 284 | return fmt.Sprintf("%s >= %.2f", left, fMin), nil 285 | } 286 | return fmt.Sprintf("%s > %.2f", left, fMin), nil 287 | } 288 | 289 | if inclusive { 290 | return fmt.Sprintf("%s >= %.2f AND %s <= %.2f", 291 | left, 292 | fMin, 293 | left, 294 | fMax, 295 | ), 296 | nil 297 | } 298 | 299 | return fmt.Sprintf("%s > %.2f AND %s < %.2f", 300 | left, 301 | fMin, 302 | left, 303 | fMax, 304 | ), 305 | nil 306 | } 307 | 308 | return fmt.Sprintf(`%s BETWEEN %s AND %s`, 309 | left, 310 | strings.Trim(rangeSlice[0], " "), 311 | strings.Trim(rangeSlice[1], " "), 312 | ), 313 | nil 314 | } 315 | 316 | func basicCompound(op expr.Operator) RenderFN { 317 | return func(left, right string) (string, error) { 318 | return fmt.Sprintf("%s %s %s", left, op, right), nil 319 | } 320 | } 321 | 322 | func basicWrap(op expr.Operator) RenderFN { 323 | return func(left, right string) (string, error) { 324 | return fmt.Sprintf("%s(%s)", op, left), nil 325 | } 326 | } 327 | 328 | func toInts(rawMin, rawMax string) (iMin, iMax int, err error) { 329 | iMin, err = strconv.Atoi(rawMin) 330 | if rawMin != "'*'" && err != nil { 331 | return 0, 0, err 332 | } 333 | 334 | iMax, err = strconv.Atoi(rawMax) 335 | if rawMax != "'*'" && err != nil { 336 | return 0, 0, err 337 | } 338 | 339 | return iMin, iMax, nil 340 | } 341 | 342 | func toFloats(rawMin, rawMax string) (fMin, fMax float64, err error) { 343 | fMin, err = strconv.ParseFloat(rawMin, 64) 344 | if rawMin != "*" && err != nil { 345 | return 0, 0, err 346 | } 347 | 348 | fMax, err = strconv.ParseFloat(rawMax, 64) 349 | if rawMax != "*" && err != nil { 350 | return 0, 0, err 351 | } 352 | 353 | return fMin, fMax, nil 354 | } 355 | -------------------------------------------------------------------------------- /pkg/lucene/expr/validator.go: -------------------------------------------------------------------------------- 1 | package expr 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "reflect" 7 | ) 8 | 9 | type validator = func(*Expression) (err error) 10 | 11 | var validators = map[Operator]validator{ 12 | Equals: validateEquals, 13 | And: validateAnd, 14 | Or: validateOr, 15 | Not: validateNot, 16 | Range: validateRange, 17 | Must: validateMust, 18 | MustNot: validateMustNot, 19 | Boost: validateBoost, 20 | Fuzzy: validateFuzzy, 21 | Literal: validateLiteral, 22 | Wild: validateWild, 23 | Regexp: validateRegexp, 24 | Greater: validateCompare, 25 | Less: validateCompare, 26 | GreaterEq: validateCompare, 27 | LessEq: validateCompare, 28 | Like: validateLike, 29 | In: validateIn, 30 | List: validateList, 31 | } 32 | 33 | func validateEquals(e *Expression) (err error) { 34 | if e == nil { 35 | return nil 36 | } 37 | 38 | if e.Op != Equals { 39 | return errors.New("EQUALS validation error: must have equals operator") 40 | } 41 | 42 | if !isLiteralExpr(e.Left) { 43 | return errors.New("EQUALS validation: left value must be a literal expression") 44 | } 45 | 46 | return nil 47 | } 48 | 49 | func validateCompare(e *Expression) (err error) { 50 | if e == nil { 51 | return nil 52 | } 53 | 54 | if e.Op != Greater && e.Op != Less && e.Op != GreaterEq && e.Op != LessEq { 55 | return errors.New("COMPARE validation error: must have comparison operator operator") 56 | } 57 | 58 | if !isLiteralExpr(e.Left) { 59 | return errors.New("COMPARE validation: left value must be a literal expression") 60 | } 61 | 62 | return nil 63 | } 64 | 65 | func validateAnd(e *Expression) (err error) { 66 | if e == nil { 67 | return nil 68 | } 69 | 70 | if e.Left == nil { 71 | return errors.New("AND validation: left value must not be nil") 72 | } 73 | 74 | if e.Right == nil { 75 | return errors.New("AND validation: right value must not be nil") 76 | } 77 | 78 | return nil 79 | } 80 | 81 | func validateOr(e *Expression) (err error) { 82 | if e == nil { 83 | return nil 84 | } 85 | 86 | if e.Left == nil { 87 | return errors.New("OR validation: left value must not be nil") 88 | } 89 | 90 | if e.Right == nil { 91 | return errors.New("OR validation: right value must not be nil") 92 | } 93 | 94 | return nil 95 | } 96 | 97 | func validateNot(e *Expression) (err error) { 98 | if e == nil { 99 | return nil 100 | } 101 | 102 | if e.Left == nil { 103 | return errors.New("NOT validation: sub expression must not be nil") 104 | } 105 | 106 | if e.Right != nil { 107 | return errors.New("NOT validation: must not have two sub expressions") 108 | } 109 | 110 | return nil 111 | } 112 | 113 | func validateRange(e *Expression) (err error) { 114 | if e == nil { 115 | return nil 116 | } 117 | 118 | if e.Left == nil { 119 | return errors.New("RANGE validation: term value must not be nil") 120 | } 121 | 122 | if e.Right == nil { 123 | return errors.New("RANGE validation: boundary value must not be nil") 124 | } 125 | 126 | if !isLiteralExpr(e.Left) { 127 | return errors.New("RANGE validation: term value must be a literal") 128 | } 129 | 130 | boundary, isBoundary := e.Right.(*RangeBoundary) 131 | if !isBoundary { 132 | return fmt.Errorf("RANGE validation: invalid range boundary - incorrect type [%s]", reflect.TypeOf(e.Right)) 133 | } 134 | 135 | if boundary == nil { 136 | return errors.New("RANGE validation: range boundary must not be nil") 137 | } 138 | 139 | if boundary.Min == nil { 140 | return errors.New("RANGE validation: range boundary must have a minimum") 141 | } 142 | 143 | if boundary.Max == nil { 144 | return errors.New("RANGE validation: range boundary must have a maximum") 145 | } 146 | 147 | return nil 148 | } 149 | 150 | func validateMust(e *Expression) (err error) { 151 | if e == nil { 152 | return nil 153 | } 154 | 155 | if e.Left == nil { 156 | return errors.New("MUST validation: sub expression must not be nil") 157 | } 158 | 159 | if e.Right != nil { 160 | return errors.New("MUST validation: must not have two sub expressions") 161 | } 162 | 163 | return nil 164 | } 165 | 166 | func validateMustNot(e *Expression) (err error) { 167 | if e == nil { 168 | return nil 169 | } 170 | 171 | if e.Left == nil { 172 | return errors.New("MUST_NOT validation: sub expression must not be nil") 173 | } 174 | 175 | if e.Right != nil { 176 | return errors.New("MUST_NOT validation: must not have two sub expressions") 177 | } 178 | 179 | return nil 180 | } 181 | 182 | func validateBoost(e *Expression) (err error) { 183 | if e == nil { 184 | return nil 185 | } 186 | 187 | if e.Left == nil { 188 | return errors.New("BOOST validation: sub expression must not be nil") 189 | } 190 | 191 | if e.Right != nil { 192 | return errors.New("BOOST validation: must not have two sub expressions") 193 | } 194 | 195 | return nil 196 | } 197 | 198 | func validateFuzzy(e *Expression) (err error) { 199 | if e == nil { 200 | return nil 201 | } 202 | 203 | if e.Left == nil { 204 | return errors.New("FUZZY validation: sub expression must not be nil") 205 | } 206 | 207 | if e.Right != nil { 208 | return errors.New("FUZZY validation: must not have two sub expressions") 209 | } 210 | 211 | return nil 212 | } 213 | 214 | func validateLiteral(e *Expression) (err error) { 215 | if e == nil { 216 | return nil 217 | } 218 | 219 | if e.Left == nil { 220 | return errors.New("LITERAL validation: value must not be nil") 221 | } 222 | 223 | if e.Right != nil { 224 | return errors.New("LITERAL validation: must not have two values") 225 | } 226 | 227 | if !isLiteral(e.Left) { 228 | return fmt.Errorf("LITERAL validation: value must be a literal, not %s", reflect.TypeOf(e.Left)) 229 | } 230 | 231 | return nil 232 | } 233 | 234 | func validateWild(e *Expression) (err error) { 235 | if e == nil { 236 | return nil 237 | } 238 | 239 | if e.Left == nil { 240 | return errors.New("WILDCARD validation: value must not be nil") 241 | } 242 | 243 | if e.Right != nil { 244 | return errors.New("WILDCARD validation: must not have two values") 245 | } 246 | 247 | if !isLiteral(e.Left) { 248 | return fmt.Errorf("WILDCARD validation: value must be a literal, not %s", reflect.TypeOf(e.Left)) 249 | } 250 | 251 | return nil 252 | } 253 | 254 | func validateRegexp(e *Expression) (err error) { 255 | if e == nil { 256 | return nil 257 | } 258 | 259 | if e.Left == nil { 260 | return errors.New("REGEXP validation: value must not be nil") 261 | } 262 | 263 | if e.Right != nil { 264 | return errors.New("REGEXP validation: must not have two values") 265 | } 266 | 267 | if !isLiteral(e.Left) { 268 | return fmt.Errorf("REGEXP validation: value must be a literal, not %s", reflect.TypeOf(e.Left)) 269 | } 270 | 271 | return nil 272 | } 273 | 274 | func validateLike(e *Expression) (err error) { 275 | if e == nil { 276 | return nil 277 | } 278 | 279 | if e.Left == nil { 280 | return errors.New("LIKE validation: column must not be nil") 281 | } 282 | 283 | if !isLiteralExpr(e.Left) { 284 | return fmt.Errorf("LIKE validation: value must be a literal, not %s", reflect.TypeOf(e.Left)) 285 | } 286 | 287 | if e.Right == nil { 288 | return errors.New("LIKE validation: must have two values") 289 | } 290 | 291 | right, ok := e.Right.(*Expression) 292 | if !ok { 293 | return fmt.Errorf("LIKE validation: right side must be an expression, not %s", reflect.TypeOf(e.Right)) 294 | } 295 | 296 | if right.Op != Wild && right.Op != Regexp { 297 | return fmt.Errorf("LIKE validation: right side must be a wildcard or regexp, not %s", right.Op) 298 | } 299 | 300 | return nil 301 | } 302 | 303 | func validateIn(e *Expression) (err error) { 304 | if e == nil { 305 | return nil 306 | } 307 | 308 | if e.Left == nil { 309 | return errors.New("IN validation: column must not be nil") 310 | } 311 | 312 | if !isLiteralExpr(e.Left) { 313 | return fmt.Errorf("IN validation: value must be a literal, not %s", reflect.TypeOf(e.Left)) 314 | } 315 | 316 | if e.Right == nil { 317 | return errors.New("IN validation: must have two values") 318 | } 319 | 320 | right, ok := e.Right.(*Expression) 321 | if !ok { 322 | return fmt.Errorf("IN validation: right side must be an expression, not %s", reflect.TypeOf(e.Right)) 323 | } 324 | 325 | if right.Op != List { 326 | return fmt.Errorf("IN validation: right side must be a list, not %s", right.Op) 327 | } 328 | 329 | return nil 330 | } 331 | 332 | func validateList(e *Expression) (err error) { 333 | if e == nil { 334 | return nil 335 | } 336 | 337 | if e.Left == nil { 338 | return errors.New("LIST validation: value must not be nil") 339 | } 340 | 341 | if e.Right != nil { 342 | return errors.New("LIST validation: must not have two values") 343 | } 344 | 345 | if !isListOfLiteralExprs(e.Left) { 346 | return fmt.Errorf("LIST validation: value must be a list of literals, not %s", reflect.TypeOf(e.Left)) 347 | } 348 | 349 | return nil 350 | } 351 | 352 | func isListOfLiteralExprs(in any) bool { 353 | e, isList := in.([]*Expression) 354 | if !isList { 355 | return false 356 | } 357 | for _, v := range e { 358 | if !isLiteralExpr(v) { 359 | return false 360 | } 361 | } 362 | return true 363 | } 364 | 365 | func isLiteralExpr(in any) bool { 366 | e, isExpr := in.(*Expression) 367 | return isExpr && (e.Op == Literal || e.Op == Wild || e.Op == Regexp) && isLiteral(e.Left) 368 | } 369 | 370 | func isLiteral(in any) bool { 371 | return isString(in) || isNum(in) || isBool(in) || isColumn(in) 372 | } 373 | 374 | func isColumn(in any) bool { 375 | _, is := in.(Column) 376 | return is 377 | } 378 | 379 | func isString(in any) bool { 380 | _, is := in.(string) 381 | return is 382 | } 383 | 384 | func isNum(in any) bool { 385 | return isInt(in) || isFloat(in) 386 | } 387 | 388 | func isBool(in any) bool { 389 | _, is := in.(bool) 390 | return is 391 | } 392 | 393 | func isInt(in any) bool { 394 | switch in.(type) { 395 | case int, int32, int64, uint, uint8, uint16, uint32, uint64: 396 | return true 397 | default: 398 | return false 399 | } 400 | } 401 | 402 | func isFloat(in any) bool { 403 | switch in.(type) { 404 | case float32, float64: 405 | return true 406 | default: 407 | return false 408 | } 409 | } 410 | -------------------------------------------------------------------------------- /internal/lex/lex.go: -------------------------------------------------------------------------------- 1 | package lex 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "unicode" 7 | "unicode/utf8" 8 | ) 9 | 10 | const eof = -1 11 | 12 | // Token is a parsed token from the input buffer sent to the lexer 13 | type Token struct { 14 | Typ TokType // the type of the item 15 | pos int // the position of the item in the string 16 | Val string // the value of the item 17 | } 18 | 19 | // String is a string representation of a lex item 20 | func (i Token) String() string { 21 | switch { 22 | case i.Typ == TErr: 23 | return i.Val 24 | case len(i.Val) > 10: 25 | return fmt.Sprintf("%.10q...", i.Val) 26 | } 27 | return fmt.Sprintf("%q", i.Val) 28 | } 29 | 30 | // precedence : > ) > + > - > ~ > ^ > NOT > AND > OR > ( 31 | 32 | // TokType is an enum of token types that can be parsed by the lexer. Order matters here for non terminals 33 | // with a lower number meaning a higher precedence. 34 | type TokType int 35 | 36 | // types of tokens that can be parsed 37 | const ( 38 | // terminal characters 39 | TErr TokType = iota 40 | TLiteral 41 | TQuoted 42 | TRegexp 43 | 44 | // precedence of operators. Order matters here. This might need to be abstracted 45 | // to a grammar specific precedence but for now it is fine here. 46 | TEqual 47 | TGreater 48 | TLess 49 | TColon 50 | TPlus 51 | TMinus 52 | TTilde 53 | TCarrot 54 | TNot 55 | TAnd 56 | TOr 57 | TRParen 58 | TLParen 59 | 60 | // operators that do not have a set precedence because we specifically handle them 61 | // due to ambiguities in the grammar 62 | TLCurly 63 | TRCurly 64 | TTO 65 | TLSquare 66 | TRSquare 67 | 68 | // start and end operators 69 | TEOF 70 | TStart 71 | ) 72 | 73 | var symbols = map[rune]TokType{ 74 | '(': TLParen, 75 | ')': TRParen, 76 | '[': TLSquare, 77 | ']': TRSquare, 78 | '{': TLCurly, 79 | '}': TRCurly, 80 | ':': TColon, 81 | '+': TPlus, 82 | '=': TEqual, 83 | '>': TGreater, 84 | '~': TTilde, 85 | '^': TCarrot, 86 | '<': TLess, 87 | // minus is not included because we have to special case it for negative numbers 88 | // '-': tMINUS, 89 | } 90 | 91 | var tokStrings = map[TokType]string{ 92 | TErr: "tERR", 93 | TLiteral: "tLITERAL", 94 | TQuoted: "tQUOTED", 95 | TRegexp: "tREGEXP", 96 | TEqual: "tEQUAL", 97 | TLParen: "tLPAREN", 98 | TRParen: "tRPAREN", 99 | TAnd: "tAND", 100 | TOr: "tOR", 101 | TNot: "tNOT", 102 | TLSquare: "tLSQUARE", 103 | TRSquare: "tRSQUARE", 104 | TLCurly: "tLCURLY", 105 | TRCurly: "tRCURLY", 106 | TTO: "tTO", 107 | TColon: "tCOLON", 108 | TPlus: "tPLUS", 109 | TMinus: "tMINUS", 110 | TGreater: "tGREATER", 111 | TLess: "tLESS", 112 | TTilde: "tTILDE", 113 | TCarrot: "tCARROT", 114 | TEOF: "tEOF", 115 | TStart: "tSTART", 116 | } 117 | 118 | func (tt TokType) String() string { 119 | return tokStrings[tt] 120 | } 121 | 122 | // terminalTokens contains a map of terminal tokens. 123 | // Uses empty struct value to conserve memory. 124 | var terminalTokens = map[TokType]struct{}{ 125 | TErr: {}, 126 | TLiteral: {}, 127 | TQuoted: {}, 128 | TRegexp: {}, 129 | TEOF: {}, 130 | } 131 | 132 | // IsTerminal checks wether a specific token is a terminal token meaning 133 | // it can't be matched in the grammar. 134 | func IsTerminal(tok Token) bool { 135 | _, terminal := terminalTokens[tok.Typ] 136 | 137 | return terminal 138 | } 139 | 140 | // HasLessPrecedence checks if a current token has lower precedence than the next. 141 | // There is a specific ordering in the iota (lower numbers = higher precedence) indicating 142 | // whether the operator has more precedence or not. 143 | func HasLessPrecedence(current Token, next Token) bool { 144 | // left associative. If we see another of the same type don't add onto the pile. 145 | // right associative would return true here. 146 | if current.Typ == next.Typ { 147 | return false 148 | } 149 | 150 | // lower numbers mean higher precedence 151 | return current.Typ > next.Typ 152 | } 153 | 154 | type tokenStateFn func(*Lexer) tokenStateFn 155 | 156 | // Lexer is a lexer that will parse an input string into tokens for consumption by a 157 | // grammar parser. 158 | type Lexer struct { 159 | input string // the input to parse 160 | 161 | pos int // the position of the cursor 162 | start int // the start of the current token 163 | currItem Token // the current item being worked on 164 | atEOF bool // whether we have finished parsing the string or not 165 | } 166 | 167 | // Lex creates a lexer for an input string 168 | func Lex(input string) *Lexer { 169 | return &Lexer{ 170 | input: input, 171 | pos: 0, 172 | start: 0, 173 | } 174 | } 175 | 176 | // Next parses and returns just the next token in the input. 177 | func (l *Lexer) Next() Token { 178 | // default to returning EOF 179 | l.currItem = Token{ 180 | Typ: TEOF, 181 | pos: l.pos, 182 | Val: "EOF", 183 | } 184 | 185 | // run the state machine until we have a token 186 | for state := lexSpace; state != nil; { 187 | state = state(l) 188 | } 189 | 190 | return l.currItem 191 | } 192 | 193 | // Peek looks at the the next token but does not impact the lexer state 194 | // note this is intentionally not a pointer because we don't want any changes to take affect here. 195 | func (l Lexer) Peek() Token { 196 | if l.currItem.Typ == TEOF { 197 | return l.currItem 198 | } 199 | 200 | return l.Next() 201 | } 202 | 203 | // lexSpace is the first state that we always start with 204 | func lexSpace(l *Lexer) tokenStateFn { 205 | for { 206 | switch l.next() { 207 | case eof: 208 | return nil 209 | case ' ', '\t', '\r', '\n': 210 | continue 211 | default: 212 | // transition to being in a value 213 | l.backup() 214 | return lexVal 215 | } 216 | } 217 | } 218 | 219 | func lexVal(l *Lexer) tokenStateFn { 220 | l.start = l.pos 221 | switch r := l.next(); { 222 | case isAlphaNumeric(r) || isWildcard(r) || isEscape(r): 223 | l.backup() 224 | return lexWord 225 | case isSymbol(r): 226 | return l.emit(symbols[r]) 227 | // special case minus sign since it can be a negative number or a minus 228 | case r == '-': 229 | if !unicode.IsDigit(l.peek()) { 230 | return l.emit(TMinus) 231 | } 232 | l.backup() 233 | return lexWord 234 | 235 | case r == '"' || r == '\'': 236 | l.backup() 237 | return lexPhrase 238 | case r == '/': 239 | l.backup() 240 | return lexRegexp 241 | default: 242 | l.errorf("error parsing token [%s]", string(r)) 243 | } 244 | return nil 245 | } 246 | 247 | func lexPhrase(l *Lexer) tokenStateFn { 248 | open := l.next() 249 | 250 | for { 251 | switch r := l.next(); { 252 | case isAlphaNumeric(r) || isWildcard(r) || isEscape(r): 253 | // do nothing 254 | case r == ' ' || r == '\t' || r == '\r' || r == '\n': 255 | // do nothing 256 | case r == open: 257 | return l.emit(TQuoted) 258 | case r == eof: 259 | return l.errorf("unterminated quote") 260 | } 261 | } 262 | } 263 | 264 | func lexRegexp(l *Lexer) tokenStateFn { 265 | // theoretically allow us to use anything to specify a regexp 266 | open := l.next() 267 | 268 | for { 269 | switch r := l.next(); { 270 | case isAlphaNumeric(r) || isWildcard(r): 271 | // do nothing 272 | case isEscape(r): 273 | l.next() // just ignore the next character 274 | case r == ' ' || r == '\t' || r == '\r' || r == '\n': 275 | // do nothing 276 | case r == open: 277 | return l.emit(TRegexp) 278 | case r == eof: 279 | return l.errorf("unterminated regexp") 280 | } 281 | } 282 | } 283 | 284 | func lexWord(l *Lexer) tokenStateFn { 285 | loop: 286 | for { 287 | switch r := l.next(); { 288 | case isAlphaNumeric(r) || isWildcard(r) || r == '.' || r == '-': 289 | // do nothing 290 | case isEscape(r): 291 | l.next() // just ignore the next character 292 | default: 293 | l.backup() 294 | break loop 295 | } 296 | } 297 | 298 | switch strings.ToUpper(l.currWord()) { 299 | case "AND": 300 | return l.emit(TAnd) 301 | case "OR": 302 | return l.emit(TOr) 303 | case "NOT": 304 | return l.emit(TNot) 305 | case "TO": 306 | return l.emit(TTO) 307 | } 308 | return l.emit(TLiteral) 309 | } 310 | 311 | func (l *Lexer) currWord() string { 312 | return l.input[l.start:l.pos] 313 | } 314 | 315 | // toTok returns the item at the current input point with the specified type 316 | // and advances the input. 317 | func (l *Lexer) toTok(t TokType) Token { 318 | i := Token{ 319 | Typ: t, 320 | pos: l.start, 321 | Val: l.input[l.start:l.pos], 322 | } 323 | // update the lexer's start for the next token to be the current position 324 | l.start = l.pos 325 | return i 326 | } 327 | 328 | // emit passes the trailing text as an item back to the parser. 329 | func (l *Lexer) emit(t TokType) tokenStateFn { 330 | l.currItem = l.toTok(t) 331 | return nil 332 | } 333 | 334 | // next moves one rune forward in the input string and returns the consumed rune 335 | func (l *Lexer) next() rune { 336 | if int(l.pos) >= len(l.input) { 337 | l.atEOF = true 338 | return eof 339 | } 340 | r, width := utf8.DecodeRuneInString(l.input[l.pos:]) 341 | l.pos += width 342 | return r 343 | } 344 | 345 | // peek returns but does not consume the next rune in the input. 346 | func (l *Lexer) peek() rune { 347 | r := l.next() 348 | l.backup() 349 | return r 350 | } 351 | 352 | // backup steps back one rune. 353 | func (l *Lexer) backup() { 354 | if !l.atEOF && l.pos > 0 { 355 | _, width := utf8.DecodeLastRuneInString(l.input[:l.pos]) 356 | l.pos -= width 357 | } 358 | } 359 | 360 | // errorf returns an error token and terminates the scan by passing 361 | // back a nil pointer that will be the next state, terminating l.nextToken. 362 | func (l *Lexer) errorf(format string, args ...any) tokenStateFn { 363 | l.currItem = Token{ 364 | Typ: TErr, 365 | pos: l.start, 366 | Val: fmt.Sprintf(format, args...), 367 | } 368 | l.start = 0 369 | l.pos = 0 370 | l.input = l.input[:0] 371 | return nil 372 | } 373 | 374 | // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 375 | func isAlphaNumeric(r rune) bool { 376 | return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 377 | } 378 | 379 | // isWildcard checks whether the string contains any wildcard characters. 380 | func isWildcard(r rune) bool { 381 | return r == '*' || r == '?' 382 | } 383 | 384 | // isSpace reports whether r is a space character. 385 | func isSpace(r rune) bool { 386 | return r == ' ' || r == '\t' || r == '\r' || r == '\n' 387 | } 388 | 389 | // isEscape checks whether the character is an escape character 390 | func isEscape(r rune) bool { 391 | return r == '\\' 392 | } 393 | 394 | // isSymbol checks whether the run is one of the reserved symbols 395 | func isSymbol(r rune) bool { 396 | _, found := symbols[r] 397 | return found 398 | } 399 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /pkg/lucene/reduce/reduce.go: -------------------------------------------------------------------------------- 1 | package reduce 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | 7 | "github.com/grindlemire/go-lucene/internal/lex" 8 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 9 | ) 10 | 11 | // Reduce will reduce the elems and nonTerminals stacks using the available reducers and return 12 | // those slices modified to contain the reduced expressions. The elems will contain the reduced 13 | // expression the the nonTerminals will contain the modified stack of nonTerminals yet to be reduced. 14 | func Reduce(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 15 | for _, reducer := range reducers { 16 | elems, nonTerminals, reduced := reducer(elems, nonTerminals, defaultField) 17 | if reduced { 18 | return elems, nonTerminals, true 19 | } 20 | } 21 | return elems, nonTerminals, false 22 | } 23 | 24 | type reducer func(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) 25 | 26 | // reducers are the reducers that will be executed during the grammar parsing 27 | var reducers = []reducer{ 28 | and, 29 | or, 30 | fuzzy, 31 | boost, 32 | equal, 33 | compare, 34 | compareEq, 35 | not, 36 | sub, 37 | must, 38 | mustNot, 39 | rangeop, 40 | } 41 | 42 | func equal(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 43 | if len(elems) != 3 { 44 | return elems, nonTerminals, false 45 | } 46 | 47 | // ensure the middle token is an equals 48 | tok, ok := elems[1].(lex.Token) 49 | if !ok || (tok.Typ != lex.TEqual && tok.Typ != lex.TColon) { 50 | return elems, nonTerminals, false 51 | } 52 | 53 | // make sure the left is a literal and right is an expression 54 | term, ok := elems[0].(*expr.Expression) 55 | if !ok { 56 | return elems, nonTerminals, false 57 | } 58 | value, ok := elems[2].(*expr.Expression) 59 | if !ok { 60 | return elems, nonTerminals, false 61 | } 62 | 63 | if literals, ok := isChainedOrLiterals(value); ok && len(literals) > 1 { 64 | elems = []any{ 65 | expr.IN( 66 | term, 67 | expr.LIST(literals), 68 | ), 69 | } 70 | } else { 71 | elems = []any{ 72 | expr.Eq( 73 | term, 74 | value, 75 | ), 76 | } 77 | } 78 | // we consumed one terminal, the = 79 | return elems, drop(nonTerminals, 1), true 80 | } 81 | 82 | func isChainedOrLiterals(in *expr.Expression) (out []*expr.Expression, ok bool) { 83 | if in == nil { 84 | return out, false 85 | } 86 | 87 | if in.Op == expr.Literal { 88 | return []*expr.Expression{in}, true 89 | } 90 | 91 | if in.Op == expr.Or { 92 | left, ok := in.Left.(*expr.Expression) 93 | if !ok { 94 | return out, false 95 | } 96 | right, ok := in.Right.(*expr.Expression) 97 | if !ok { 98 | return out, false 99 | } 100 | 101 | l, isLLiterals := isChainedOrLiterals(left) 102 | r, isRLiterals := isChainedOrLiterals(right) 103 | return append(l, r...), isLLiterals && isRLiterals 104 | } 105 | 106 | return out, false 107 | } 108 | 109 | func compare(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 110 | if len(elems) != 4 { 111 | return elems, nonTerminals, false 112 | } 113 | 114 | // ensure our middle tokens start with a colon 115 | tok, ok := elems[1].(lex.Token) 116 | if !ok || (tok.Typ != lex.TColon) { 117 | return elems, nonTerminals, false 118 | } 119 | 120 | // ensure the colon is followed by a > or < 121 | tokCmp, ok := elems[2].(lex.Token) 122 | if !ok || (tokCmp.Typ != lex.TGreater && tokCmp.Typ != lex.TLess) { 123 | return elems, nonTerminals, false 124 | } 125 | 126 | // make sure the left is a literal and right is an expression 127 | term, ok := elems[0].(*expr.Expression) 128 | if !ok { 129 | return elems, nonTerminals, false 130 | } 131 | value, ok := elems[3].(*expr.Expression) 132 | if !ok { 133 | return elems, nonTerminals, false 134 | } 135 | 136 | if tokCmp.Typ == lex.TGreater { 137 | elems = []any{ 138 | expr.GREATER( 139 | term, 140 | value, 141 | ), 142 | } 143 | } else { 144 | elems = []any{ 145 | expr.LESS( 146 | term, 147 | value, 148 | ), 149 | } 150 | } 151 | 152 | return elems, drop(nonTerminals, 2), true 153 | } 154 | 155 | func compareEq(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 156 | if len(elems) != 5 { 157 | return elems, nonTerminals, false 158 | } 159 | 160 | // ensure our middle tokens start with a colon 161 | tok, ok := elems[1].(lex.Token) 162 | if !ok || (tok.Typ != lex.TColon) { 163 | return elems, nonTerminals, false 164 | } 165 | 166 | // ensure the colon is followed by a > or < 167 | tokCmp, ok := elems[2].(lex.Token) 168 | if !ok || (tokCmp.Typ != lex.TGreater && tokCmp.Typ != lex.TLess) { 169 | return elems, nonTerminals, false 170 | } 171 | 172 | // ensure the middle tokens are followed by an = 173 | tokEp, ok := elems[3].(lex.Token) 174 | if !ok || (tokEp.Typ != lex.TEqual) { 175 | return elems, nonTerminals, false 176 | } 177 | 178 | // make sure the left is a literal and right is an expression 179 | term, ok := elems[0].(*expr.Expression) 180 | if !ok { 181 | return elems, nonTerminals, false 182 | } 183 | value, ok := elems[4].(*expr.Expression) 184 | if !ok { 185 | return elems, nonTerminals, false 186 | } 187 | 188 | if tokCmp.Typ == lex.TGreater { 189 | elems = []any{ 190 | expr.GREATEREQ( 191 | term, 192 | value, 193 | ), 194 | } 195 | } else { 196 | elems = []any{ 197 | expr.LESSEQ( 198 | term, 199 | value, 200 | ), 201 | } 202 | } 203 | 204 | return elems, drop(nonTerminals, 3), true 205 | 206 | } 207 | 208 | func and(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 209 | // if we don't have 3 items in the buffer it's not an AND clause 210 | if len(elems) != 3 { 211 | return elems, nonTerminals, false 212 | } 213 | 214 | // if the middle token is not an AND token do nothing 215 | operatorToken, ok := elems[1].(lex.Token) 216 | if !ok || operatorToken.Typ != lex.TAnd { 217 | return elems, nonTerminals, false 218 | } 219 | 220 | // make sure the left and right clauses are expressions 221 | left, ok := elems[0].(*expr.Expression) 222 | if !ok { 223 | return elems, nonTerminals, false 224 | } 225 | right, ok := elems[2].(*expr.Expression) 226 | if !ok { 227 | return elems, nonTerminals, false 228 | } 229 | 230 | // we have a valid AND clause. Replace it in the stack 231 | elems = []any{ 232 | expr.AND( 233 | wrapLiteral(left, defaultField), 234 | wrapLiteral(right, defaultField), 235 | ), 236 | } 237 | // we consumed one terminal, the AND 238 | return elems, drop(nonTerminals, 1), true 239 | } 240 | 241 | func or(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 242 | // if we don't have 3 items in the buffer it's not an OR clause 243 | if len(elems) != 3 { 244 | return elems, nonTerminals, false 245 | } 246 | 247 | // if the middle token is not an OR token do nothing 248 | operatorToken, ok := elems[1].(lex.Token) 249 | if !ok || operatorToken.Typ != lex.TOr { 250 | return elems, nonTerminals, false 251 | } 252 | 253 | // make sure the left and right clauses are expressions 254 | left, ok := elems[0].(*expr.Expression) 255 | if !ok { 256 | return elems, nonTerminals, false 257 | } 258 | right, ok := elems[2].(*expr.Expression) 259 | if !ok { 260 | return elems, nonTerminals, false 261 | } 262 | 263 | // we have a valid OR clause. Replace it in the stack 264 | elems = []any{ 265 | expr.OR( 266 | wrapLiteral(left, defaultField), 267 | wrapLiteral(right, defaultField), 268 | ), 269 | } 270 | // we consumed one terminal, the OR 271 | return elems, drop(nonTerminals, 1), true 272 | } 273 | 274 | func not(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 275 | if len(elems) < 2 { 276 | return elems, nonTerminals, false 277 | } 278 | 279 | // if the second to last token is not the NOT operator do nothing 280 | operatorToken, ok := elems[len(elems)-2].(lex.Token) 281 | if !ok || operatorToken.Typ != lex.TNot { 282 | return elems, nonTerminals, false 283 | } 284 | 285 | // make sure the thing to be negated is already a parsed 286 | negated, ok := elems[len(elems)-1].(*expr.Expression) 287 | if !ok { 288 | return elems, nonTerminals, false 289 | } 290 | 291 | elems = elems[:len(elems)-2] 292 | elems = append(elems, 293 | expr.NOT( 294 | wrapLiteral(negated, defaultField), 295 | ), 296 | ) 297 | // we consumed one terminal, the NOT 298 | return elems, drop(nonTerminals, 1), true 299 | } 300 | 301 | func sub(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 302 | // all the internal terms should have reduced by the time we hit this reducer 303 | if len(elems) != 3 { 304 | return elems, nonTerminals, false 305 | } 306 | 307 | open, ok := elems[0].(lex.Token) 308 | if !ok || open.Typ != lex.TLParen { 309 | return elems, nonTerminals, false 310 | } 311 | 312 | closed, ok := elems[len(elems)-1].(lex.Token) 313 | if !ok || closed.Typ != lex.TRParen { 314 | return elems, nonTerminals, false 315 | } 316 | 317 | // we consumed two terminals, the ( and ) 318 | return []any{elems[1]}, drop(nonTerminals, 2), true 319 | } 320 | 321 | func must(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 322 | if len(elems) != 2 { 323 | return elems, nonTerminals, false 324 | } 325 | 326 | must, ok := elems[0].(lex.Token) 327 | if !ok || must.Typ != lex.TPlus { 328 | return elems, nonTerminals, false 329 | } 330 | 331 | rest, ok := elems[1].(*expr.Expression) 332 | if !ok { 333 | return elems, nonTerminals, false 334 | } 335 | 336 | // we consumed 1 terminal, the + 337 | return []any{expr.MUST(rest)}, drop(nonTerminals, 1), true 338 | } 339 | 340 | func mustNot(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 341 | if len(elems) != 2 { 342 | return elems, nonTerminals, false 343 | } 344 | 345 | must, ok := elems[0].(lex.Token) 346 | if !ok || must.Typ != lex.TMinus { 347 | return elems, nonTerminals, false 348 | } 349 | 350 | rest, ok := elems[1].(*expr.Expression) 351 | if !ok { 352 | return elems, nonTerminals, false 353 | } 354 | // we consumed one terminal, the - 355 | return []any{expr.MUSTNOT(rest)}, drop(nonTerminals, 1), true 356 | } 357 | 358 | func fuzzy(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 359 | if len(elems) < 2 { 360 | return elems, nonTerminals, false 361 | } 362 | 363 | tilde, ok := elems[1].(lex.Token) 364 | if !ok || tilde.Typ != lex.TTilde { 365 | return elems, nonTerminals, false 366 | } 367 | 368 | rest, ok := elems[0].(*expr.Expression) 369 | if !ok { 370 | return elems, nonTerminals, false 371 | } 372 | 373 | // If we have exactly 2 elements, use implicit distance of 1 374 | if len(elems) == 2 { 375 | return []any{expr.FUZZY(rest, 1)}, drop(nonTerminals, 1), true 376 | } 377 | 378 | // We have 3+ elements. Check if elems[2] is a valid numeric distance 379 | if distance, ok := elems[2].(*expr.Expression); ok { 380 | if idistance, err := strconv.Atoi(distance.String()); err == nil { 381 | return []any{expr.FUZZY(rest, idistance)}, drop(nonTerminals, 1), true 382 | } 383 | // elems[2] is an Expression but not a valid numeric distance 384 | // This means we have [expr, ~, non-numeric-expr] which should be reduced 385 | // to [FUZZY(expr, 1), non-numeric-expr] so the parser can inject an implicit AND 386 | result := append([]any{expr.FUZZY(rest, 1)}, elems[2:]...) 387 | return result, drop(nonTerminals, 1), true 388 | } 389 | 390 | // elems[2] is NOT an Expression (might be a Token or something else) 391 | // This means we have [expr, ~, token/other] - reduce just [expr, ~] with implicit distance 392 | // The token/other will be handled in the next reduce cycle 393 | result := append([]any{expr.FUZZY(rest, 1)}, elems[2:]...) 394 | return result, drop(nonTerminals, 1), true 395 | } 396 | 397 | func boost(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 398 | if len(elems) < 2 { 399 | return elems, nonTerminals, false 400 | } 401 | 402 | carrot, ok := elems[1].(lex.Token) 403 | if !ok || carrot.Typ != lex.TCarrot { 404 | return elems, nonTerminals, false 405 | } 406 | 407 | rest, ok := elems[0].(*expr.Expression) 408 | if !ok { 409 | return elems, nonTerminals, false 410 | } 411 | 412 | if len(elems) == 2 { 413 | return []any{expr.BOOST(rest, 1.0)}, drop(nonTerminals, 1), true 414 | } 415 | 416 | // We have 3+ elements. Check if elems[2] is a valid numeric power 417 | if power, ok := elems[2].(*expr.Expression); ok { 418 | if fpower, err := toPositiveFloat(power.String()); err == nil { 419 | // Valid power - reduce all 3 elements 420 | return []any{expr.BOOST(rest, fpower)}, drop(nonTerminals, 1), true 421 | } 422 | } 423 | 424 | // elems[2] is NOT a valid power - reduce just [expr, ^] with implicit power 425 | // Return the remaining elements to stay on the stack for further processing 426 | result := append([]any{expr.BOOST(rest, 1.0)}, elems[2:]...) 427 | return result, drop(nonTerminals, 1), true 428 | } 429 | 430 | func rangeop(elems []any, nonTerminals []lex.Token, defaultField string) ([]any, []lex.Token, bool) { 431 | // we need a term, :, [, begin, TO, end, ] to have a range operator which is 7 elems 432 | if len(elems) != 7 { 433 | return elems, nonTerminals, false 434 | } 435 | 436 | colon, ok := elems[1].(lex.Token) 437 | if !ok || colon.Typ != lex.TColon { 438 | return elems, nonTerminals, false 439 | } 440 | 441 | open, ok := elems[2].(lex.Token) 442 | if !ok || (open.Typ != lex.TLSquare && open.Typ != lex.TLCurly) { 443 | return elems, nonTerminals, false 444 | } 445 | 446 | closed, ok := elems[6].(lex.Token) 447 | if !ok || (closed.Typ != lex.TRSquare && closed.Typ != lex.TRCurly) { 448 | return elems, nonTerminals, false 449 | } 450 | 451 | to, ok := elems[4].(lex.Token) 452 | if !ok || to.Typ != lex.TTO { 453 | return elems, nonTerminals, false 454 | } 455 | 456 | term, ok := elems[0].(*expr.Expression) 457 | if !ok { 458 | return elems, nonTerminals, false 459 | } 460 | 461 | start, ok := elems[3].(*expr.Expression) 462 | if !ok { 463 | return elems, nonTerminals, false 464 | } 465 | 466 | end, ok := elems[5].(*expr.Expression) 467 | if !ok { 468 | return elems, nonTerminals, false 469 | } 470 | 471 | // we consumed four terminals, the :, [, TO, and ] 472 | return []any{expr.Rang( 473 | term, start, end, (open.Typ == lex.TLSquare && closed.Typ == lex.TRSquare), 474 | )}, drop(nonTerminals, 4), true 475 | } 476 | 477 | func drop[T any](stack []T, i int) []T { 478 | return stack[:len(stack)-i] 479 | } 480 | 481 | func toPositiveFloat(in string) (f float64, err error) { 482 | i, err := strconv.Atoi(in) 483 | if err == nil && i > 0 { 484 | return float64(i), nil 485 | } 486 | 487 | pf, err := strconv.ParseFloat(in, 64) 488 | if err == nil && pf > 0 { 489 | return float64(pf), nil 490 | } 491 | 492 | return f, fmt.Errorf("[%v] is not a positive float", in) 493 | } 494 | 495 | // wrapLiteral will wrap a literal expression in an equals expression for a defaultField. 496 | // we need this because we want to support lucene expressions like a:b AND "c" which needs a default 497 | // field to compare "c" against to be valid. 498 | func wrapLiteral(lit *expr.Expression, field string) *expr.Expression { 499 | if lit.Op == expr.Literal && field != "" { 500 | return expr.Eq(expr.Column(field), lit) 501 | } 502 | return lit 503 | } 504 | -------------------------------------------------------------------------------- /pkg/lucene/expr/expression.go: -------------------------------------------------------------------------------- 1 | package expr 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | // Lucene Grammar: 12 | // E -> 13 | // E:E 14 | // (E) 15 | // +E 16 | // -E 17 | // E~E 18 | // E^E 19 | // NOT E 20 | // E AND E 21 | // E OR E 22 | // id 23 | // [id TO id] 24 | 25 | // Added grammar to be compatible with elastic lucene 26 | // See https://www.elastic.co/guide/en/elasticsearch/reference/8.9/query-dsl-query-string-query.html#query-string-syntax 27 | // E:>E 28 | // E:>=E 29 | // E: 0 { 157 | return Expr(e, Boost, power[0]) 158 | } 159 | return Expr(e, Boost) 160 | } 161 | 162 | // FUZZY wraps an expression in a fuzzy 163 | func FUZZY(e any, distance ...int) *Expression { 164 | if len(distance) > 0 { 165 | return Expr(e, Fuzzy, distance[0]) 166 | } 167 | return Expr(e, Fuzzy) 168 | } 169 | 170 | // IsExpr checks if the input is an expression 171 | func IsExpr(in any) bool { 172 | _, isExpr := in.(*Expression) 173 | return isExpr 174 | } 175 | 176 | // Validate validates the expression is correctly structured. 177 | func Validate(in any) (err error) { 178 | e, isExpr := in.(*Expression) 179 | if !isExpr { 180 | // if we don't have an expression we must be in a leaf node 181 | return nil 182 | } 183 | 184 | fn, found := validators[e.Op] 185 | if !found { 186 | return fmt.Errorf("unsupported operator %v", e.Op) 187 | } 188 | err = fn(e) 189 | if err != nil { 190 | return err 191 | } 192 | 193 | err = Validate(e.Left) 194 | if err != nil { 195 | return err 196 | } 197 | 198 | return Validate(e.Right) 199 | } 200 | 201 | // Column represents a column in sql. It will not be escaped by quotes in the sql rendering 202 | type Column string 203 | 204 | // GoString is a debug print for the column type 205 | func (c Column) GoString() string { 206 | return fmt.Sprintf("COLUMN(%s)", c) 207 | } 208 | 209 | // Expr creates a general new expression. The other public functions are just helpers that call this 210 | // function underneath. 211 | func Expr(left any, op Operator, right ...any) *Expression { 212 | if isStringlike(left) && operatesOnColumn(op) { 213 | left = wrapInColumn(left) 214 | } 215 | 216 | if isLiteral(left) && op != Literal && op != Wild && op != Regexp { 217 | left = literalToExpr(left) 218 | } 219 | 220 | e := ptr(empty()) 221 | e.Left = left 222 | e.Op = op 223 | 224 | // support using a like operator with wildcards or regex 225 | if op == Equals && len(right) == 1 && shouldUseLikeOperator(right[0]) { 226 | e.Op = Like 227 | e.Right = right[0].(*Expression) 228 | return e 229 | } 230 | 231 | // support changing boost power 232 | if op == Boost { 233 | e.boostPower = 1.0 234 | if len(right) == 1 && isFloat(right[0]) { 235 | e.boostPower = right[0].(float64) 236 | } 237 | return e 238 | } 239 | 240 | // support changing fuzzy distance 241 | if op == Fuzzy { 242 | e.fuzzyDistance = 1 243 | if len(right) == 1 && isInt(right[0]) { 244 | e.fuzzyDistance = right[0].(int) 245 | } 246 | return e 247 | } 248 | 249 | // support passing a range with inclusivity 250 | if op == Range && len(right) == 3 && isBool(right[2]) { 251 | e.Right = &RangeBoundary{ 252 | Min: literalToExpr(right[0]), 253 | Max: literalToExpr(right[1]), 254 | Inclusive: right[2].(bool), 255 | } 256 | return e 257 | } 258 | 259 | // support passing a slice to an IN operator 260 | if op == In && len(right) > 0 { 261 | e.Right = right[0].(*Expression) 262 | return e 263 | } 264 | 265 | if op == List { 266 | // super gross but this is how go handles any types that are slices 267 | slice, isSlice := left.([]any)[0].([]*Expression) 268 | if isSlice { 269 | e.Left = slice 270 | return e 271 | } 272 | 273 | l := left.([]any) 274 | vals := []*Expression{} 275 | for _, v := range l { 276 | vals = append(vals, v.(*Expression)) 277 | } 278 | e.Left = vals 279 | return e 280 | } 281 | 282 | // if right is present and non nil then add it to the expression 283 | if len(right) >= 1 && right[0] != nil { 284 | if isLiteral(right[0]) { 285 | right[0] = literalToExpr(right[0]) 286 | } 287 | 288 | e.Right = right[0] 289 | } 290 | 291 | return e 292 | } 293 | 294 | type jsonExpression struct { 295 | Left json.RawMessage `json:"left"` 296 | Operator string `json:"operator"` 297 | Right json.RawMessage `json:"right,omitempty"` 298 | 299 | RangeBoundary *RangeBoundary `json:"boundaries,omitempty"` 300 | FuzzyDistance *int `json:"distance,omitempty"` 301 | BoostPower *float64 `json:"power,omitempty"` 302 | } 303 | 304 | // MarshalJSON is a custom JSON serialization for the Expression 305 | func (e Expression) MarshalJSON() (out []byte, err error) { 306 | // if we are in a leaf node just marshal the value 307 | if e.Op == Literal || e.Op == Wild || e.Op == Regexp { 308 | return json.Marshal(e.Left) 309 | } 310 | 311 | leftRaw, err := json.Marshal(e.Left) 312 | if err != nil { 313 | return out, err 314 | } 315 | 316 | c := jsonExpression{ 317 | Left: leftRaw, 318 | Operator: toString[e.Op], 319 | } 320 | 321 | // this is dumb but we need it so our "null" is not event given. Otherwise the json serialization 322 | // will persist a null value. 323 | if e.Right != nil { 324 | rightRaw, err := json.Marshal(e.Right) 325 | if err != nil { 326 | return out, err 327 | } 328 | c.Right = rightRaw 329 | } 330 | 331 | if e.boostPower != 1.0 { 332 | c.BoostPower = &e.boostPower 333 | } 334 | 335 | if e.fuzzyDistance != 1 { 336 | c.FuzzyDistance = &e.fuzzyDistance 337 | } 338 | 339 | return json.Marshal(c) 340 | } 341 | 342 | // UnmarshalJSON is a custom JSON deserialization for the Expression 343 | func (e *Expression) UnmarshalJSON(data []byte) (err error) { 344 | // initalize our default values, e cannot be nil here. 345 | *e = empty() 346 | // if this does not look like an object it must be a literal 347 | if !isJSONObject(json.RawMessage(data)) { 348 | Expr, err := unmarshalLiteral(json.RawMessage(data)) 349 | // this is required because apparently you can't swap pointers to your receiver mid method 350 | *e = *Expr 351 | return err 352 | } 353 | 354 | // unmarshal the current layer in the json first, then worry about 355 | // the left and right hand subobjects 356 | var c jsonExpression 357 | err = json.Unmarshal(data, &c) 358 | if err != nil { 359 | return err 360 | } 361 | 362 | // check if it is an array so we can parse it into literals 363 | if isArray(json.RawMessage(c.Left)) { 364 | var l []json.RawMessage 365 | err = json.Unmarshal(c.Left, &l) 366 | if err != nil { 367 | return err 368 | } 369 | 370 | exprs := []*Expression{} 371 | for _, v := range l { 372 | parsedExp, err := unmarshalLiteral(v) 373 | if err != nil { 374 | return err 375 | } 376 | exprs = append(exprs, parsedExp) 377 | } 378 | e.Left = exprs 379 | } else { 380 | e.Left = ptr(empty()) 381 | err = json.Unmarshal(c.Left, e.Left) 382 | if err != nil { 383 | return err 384 | } 385 | } 386 | 387 | e.Op = fromString[c.Operator] 388 | 389 | // if the left hand side is a string then it must be a column 390 | if isStringlike(e.Left) && operatesOnColumn(e.Op) { 391 | e.Left = wrapInColumn(e.Left) 392 | } 393 | 394 | if len(c.Right) > 0 && looksLikeRangeBoundary(c.Right) { 395 | var boundary RangeBoundary 396 | err = json.Unmarshal(c.Right, &boundary) 397 | if err != nil { 398 | return err 399 | } 400 | if !IsExpr(boundary.Min) { 401 | boundary.Min = literalToExpr(toIntIfNecessary(boundary.Min)) 402 | } 403 | 404 | if !IsExpr(boundary.Max) { 405 | boundary.Max = literalToExpr(toIntIfNecessary(boundary.Max)) 406 | } 407 | e.Right = &boundary 408 | } else if len(c.Right) > 0 { 409 | e.Right = ptr(empty()) 410 | err = json.Unmarshal(c.Right, e.Right) 411 | if err != nil { 412 | return err 413 | } 414 | } 415 | 416 | if e.Op == Fuzzy { 417 | e.fuzzyDistance = 1 418 | if c.FuzzyDistance != nil { 419 | e.fuzzyDistance = *c.FuzzyDistance 420 | } 421 | } 422 | 423 | if e.Op == Boost { 424 | e.boostPower = 1.0 425 | if c.BoostPower != nil { 426 | e.boostPower = *c.BoostPower 427 | } 428 | } 429 | 430 | return nil 431 | } 432 | 433 | func unmarshalLiteral(in json.RawMessage) (e *Expression, err error) { 434 | e = ptr(empty()) 435 | 436 | // check if it is an int first because all ints can be parsed as floats 437 | i, err := strconv.Atoi(string(in)) 438 | if err == nil { 439 | return Lit(i), nil 440 | } 441 | 442 | // check if it is a float 443 | f, err := strconv.ParseFloat(string(in), 64) 444 | if err == nil { 445 | return Lit(f), nil 446 | } 447 | 448 | // we know it is some sort of string so decode it 449 | var s string 450 | err = json.Unmarshal(in, &s) 451 | if err != nil { 452 | return e, err 453 | } 454 | 455 | return literalToExpr(s), nil 456 | } 457 | 458 | func isArray(in json.RawMessage) bool { 459 | trimmed := bytes.TrimSpace(in) 460 | if len(trimmed) == 0 { 461 | return false 462 | } 463 | 464 | return trimmed[0] == '[' && trimmed[len(trimmed)-1] == ']' 465 | } 466 | 467 | // looksLikeRangeBoundary checks whether the marshalled json has the keys for a range boundary. 468 | // This is a hack but we need to know whether to unmarshal an expression or a range boundary. 469 | func looksLikeRangeBoundary(in json.RawMessage) bool { 470 | // strip all the whitespace out of the input 471 | s := strings.Join(strings.Fields(string(in)), "") 472 | 473 | return strings.Contains(s, "\"min\":") && 474 | strings.Contains(s, "\"max\":") && 475 | !strings.Contains(s, "\"left\":") 476 | } 477 | 478 | func literalToExpr(in any) *Expression { 479 | if IsExpr(in) { 480 | return in.(*Expression) 481 | } 482 | 483 | s, isStr := in.(string) 484 | if !isStr { 485 | return Lit(in) 486 | } 487 | 488 | // if it has leading and trailing /'s then it probably is a regex. 489 | // Note this needs to be checked before the wildcard check as a regex 490 | // can contain * and ?. 491 | // TODO this should probably check for escaping 492 | if s[0] == '/' && s[len(s)-1] == '/' { 493 | return REGEXP(s) 494 | } 495 | 496 | // if it contains a * or ? then it probably is a wildcard expression 497 | // TODO this should probably check for escaping 498 | if strings.ContainsAny(s, "*?") { 499 | return WILD(s) 500 | } 501 | 502 | return Lit(s) 503 | } 504 | 505 | func isJSONObject(in json.RawMessage) bool { 506 | trimmed := bytes.TrimSpace(in) 507 | if len(trimmed) == 0 { 508 | return false 509 | } 510 | 511 | return trimmed[0] == '{' && trimmed[len(trimmed)-1] == '}' 512 | } 513 | 514 | // isStringLike checks if the input is a string or is a literal wrapping a string 515 | func isStringlike(in any) bool { 516 | _, isStr := in.(string) 517 | e, isExpr := in.(*Expression) 518 | if isExpr { 519 | _, isStrLiteralExpr := e.Left.(string) 520 | return isStrLiteralExpr 521 | } 522 | 523 | return isStr 524 | } 525 | 526 | // operatesOnColumn checks if an operator can be applied to a column (the left side of the operator). 527 | // Example: equal can be applied onto a column (e.g. myColumn = 'foo') but Boost (^) cannot. 528 | func operatesOnColumn(op Operator) bool { 529 | return op == Equals || 530 | op == Range || 531 | op == Greater || 532 | op == Less || 533 | op == GreaterEq || 534 | op == LessEq || 535 | op == In || 536 | op == Like 537 | } 538 | 539 | // wrapInColumn converts a string to a column and enforces column 540 | // invariants (e.g. if the column name contains a space then it must be quoted) 541 | func wrapInColumn(in any) (out *Expression) { 542 | s, isStr := in.(string) 543 | if isStr { 544 | return Lit(Column(s)) 545 | } 546 | 547 | e, isExpr := in.(*Expression) 548 | if isExpr { 549 | s, isStr = e.Left.(string) 550 | if isStr { 551 | return Lit(Column(s)) 552 | } 553 | } 554 | return e 555 | } 556 | 557 | // apparently the json unmarshal only parses float64 values so we check if the float64 558 | // is actually a whole number. If it is then make it an int 559 | func toIntIfNecessary(in any) (out any) { 560 | f, isFloat := in.(float64) 561 | if !isFloat { 562 | return in 563 | } 564 | 565 | if f == float64(int(f)) { 566 | return int(f) 567 | } 568 | 569 | return f 570 | } 571 | 572 | func empty() Expression { 573 | return Expression{ 574 | fuzzyDistance: 1, 575 | boostPower: 1.0, 576 | } 577 | } 578 | 579 | func ptr[T any](in T) *T { 580 | return &in 581 | } 582 | 583 | func shouldUseLikeOperator(in any) bool { 584 | expr, isExpr := in.(*Expression) 585 | if !isExpr { 586 | return false 587 | } 588 | return expr.Op == Wild || expr.Op == Regexp 589 | } 590 | -------------------------------------------------------------------------------- /parse_test.go: -------------------------------------------------------------------------------- 1 | package lucene 2 | 3 | import ( 4 | "encoding/json" 5 | "reflect" 6 | "testing" 7 | 8 | "github.com/grindlemire/go-lucene/pkg/lucene/expr" 9 | ) 10 | 11 | const errTemplate = "%s:\n wanted %#v\n got %#v" 12 | 13 | func TestParseLucene(t *testing.T) { 14 | type tc struct { 15 | input string 16 | want *expr.Expression 17 | } 18 | 19 | tcs := map[string]tc{ 20 | "single_literal": { 21 | input: "a", 22 | want: expr.Lit("a"), 23 | }, 24 | "basic_equal": { 25 | input: "a:b", 26 | want: expr.Eq("a", "b"), 27 | }, 28 | "basic_equal_with_number": { 29 | input: "a:5", 30 | want: expr.Eq("a", 5), 31 | }, 32 | "basic_greater_with_number": { 33 | input: "a:>22", 34 | want: expr.GREATER("a", 22), 35 | }, 36 | "basic_greater_eq_with_number": { 37 | input: "a:>=22", 38 | want: expr.GREATEREQ("a", 22), 39 | }, 40 | "basic_less_with_number": { 41 | input: "a:<22", 42 | want: expr.LESS("a", 22), 43 | }, 44 | "basic_less_eq_with_number": { 45 | input: "a:<=22", 46 | want: expr.LESSEQ("a", 22), 47 | }, 48 | "basic_greater_less_with_number": { 49 | input: "a:<22 AND b:>33", 50 | want: expr.AND(expr.LESS("a", 22), expr.GREATER("b", 33)), 51 | }, 52 | "basic_greater_less_eq_with_number": { 53 | input: "a:<=22 AND b:>=33", 54 | want: expr.AND(expr.LESSEQ("a", 22), expr.GREATEREQ("b", 33)), 55 | }, 56 | "basic_wild_equal_with_*": { 57 | input: "a:b*", 58 | want: expr.LIKE("a", "b*"), 59 | }, 60 | "basic_wild_equal_with_?": { 61 | input: "a:b?z", 62 | want: expr.LIKE("a", expr.WILD("b?z")), 63 | }, 64 | "basic_inclusive_range": { 65 | input: "a:[* TO 5]", 66 | want: expr.Rang("a", expr.WILD("*"), 5, true), 67 | }, 68 | "basic_exclusive_range": { 69 | input: "a:{* TO 5}", 70 | want: expr.Rang("a", expr.WILD("*"), 5, false), 71 | }, 72 | "range_over_strings": { 73 | input: "a:{foo TO bar}", 74 | want: expr.Rang("a", "foo", "bar", false), 75 | }, 76 | "basic_fuzzy": { 77 | input: "b AND a~", 78 | want: expr.AND("b", expr.FUZZY("a", 1)), 79 | }, 80 | "fuzzy_power": { 81 | input: "b AND a~10", 82 | want: expr.AND("b", expr.FUZZY("a", 10)), 83 | }, 84 | "basic_boost": { 85 | input: "b AND a^", 86 | want: expr.AND("b", expr.BOOST("a", 1.0)), 87 | }, 88 | "boost_power": { 89 | input: "b AND a^10", 90 | want: expr.AND("b", expr.BOOST("a", 10.0)), 91 | }, 92 | "regexp": { 93 | input: "a:/b [c]/", 94 | want: expr.Eq("a", expr.REGEXP("/b [c]/")), 95 | }, 96 | "regexp_with_keywords": { 97 | input: `a:/b "[c]/`, 98 | want: expr.Eq("a", expr.REGEXP(`/b "[c]/`)), 99 | }, 100 | "regexp_with_escaped_chars": { 101 | input: `url:/example.com\/foo\/bar\/.*/`, 102 | want: expr.Eq("url", expr.REGEXP(`/example.com\/foo\/bar\/.*/`)), 103 | }, 104 | "basic_default_AND": { 105 | input: "a b", 106 | want: expr.AND("a", "b"), 107 | }, 108 | "default_to_AND_with_subexpressions": { 109 | input: "a:b c:d", 110 | want: expr.AND( 111 | expr.Eq("a", "b"), 112 | expr.Eq("c", "d"), 113 | ), 114 | }, 115 | "basic_and": { 116 | input: "a AND b", 117 | want: expr.AND("a", "b"), 118 | }, 119 | "and_with_nesting": { 120 | input: "a:foo AND b:bar", 121 | want: expr.AND( 122 | expr.Eq("a", "foo"), 123 | expr.Eq("b", "bar"), 124 | ), 125 | }, 126 | "basic_or": { 127 | input: "a OR b", 128 | want: expr.OR( 129 | "a", 130 | "b", 131 | ), 132 | }, 133 | "range_operator_inclusive": { 134 | input: "a:[1 TO 5]", 135 | want: expr.Rang("a", 1, 5, true), 136 | }, 137 | "range_operator_inclusive_unbound": { 138 | input: `a:[* TO 200]`, 139 | want: expr.Rang("a", expr.WILD("*"), expr.Lit(200), true), 140 | }, 141 | "range_operator_exclusive": { 142 | input: `a:{"ab" TO "az"}`, 143 | want: expr.Rang("a", expr.Lit("ab"), expr.Lit("az"), false), 144 | }, 145 | "range_operator_exclusive_unbound": { 146 | input: `a:{2 TO *}`, 147 | want: expr.Rang("a", expr.Lit(2), expr.WILD("*"), false), 148 | }, 149 | "or_with_nesting": { 150 | input: "a:foo OR b:bar", 151 | want: expr.OR( 152 | expr.Eq("a", "foo"), 153 | expr.Eq("b", "bar"), 154 | ), 155 | }, 156 | "basic_not": { 157 | input: "NOT b", 158 | want: expr.NOT("b"), 159 | }, 160 | "nested_not": { 161 | input: "a:foo OR NOT b:bar", 162 | want: expr.OR( 163 | expr.Eq("a", "foo"), 164 | expr.NOT(expr.Eq("b", "bar")), 165 | ), 166 | }, 167 | "term_grouping": { 168 | input: "(a:foo OR b:bar) AND c:baz", 169 | want: expr.AND( 170 | expr.OR( 171 | expr.Eq("a", "foo"), 172 | expr.Eq("b", "bar"), 173 | ), 174 | expr.Eq("c", "baz"), 175 | ), 176 | }, 177 | "value_grouping": { 178 | input: "a:(foo OR baz OR bar)", 179 | want: expr.IN( 180 | "a", 181 | expr.LIST( 182 | expr.Lit("foo"), 183 | expr.Lit("baz"), 184 | expr.Lit("bar"), 185 | ), 186 | ), 187 | }, 188 | "basic_must": { 189 | input: "+a:b", 190 | want: expr.MUST( 191 | expr.Eq("a", "b"), 192 | ), 193 | }, 194 | "basic_must_not": { 195 | input: "-a:b", 196 | want: expr.MUSTNOT( 197 | expr.Eq("a", "b"), 198 | ), 199 | }, 200 | "basic_nested_must_not": { 201 | input: "d:e AND (-a:b AND +f:e)", 202 | want: expr.AND( 203 | expr.Eq("d", "e"), 204 | expr.AND( 205 | expr.MUSTNOT(expr.Eq("a", "b")), 206 | expr.MUST(expr.Eq("f", "e")), 207 | ), 208 | ), 209 | }, 210 | "basic_escaping": { 211 | input: `a:\(1\+1\)\:2`, 212 | want: expr.Eq("a", expr.Lit(`(1+1):2`)), 213 | }, 214 | "escaped_column_name": { 215 | input: `foo\ bar:b`, 216 | want: expr.Eq(`foo bar`, "b"), 217 | }, 218 | "boost_key_value": { 219 | input: "a:b^2 AND foo", 220 | want: expr.AND( 221 | expr.BOOST(expr.Eq("a", "b"), 2), 222 | "foo", 223 | ), 224 | }, 225 | "boost_literal": { 226 | input: "foo^4", 227 | want: expr.BOOST("foo", 4), 228 | }, 229 | "boost_literal_in_compound": { 230 | input: "a:b AND foo^4", 231 | want: expr.AND( 232 | expr.Eq("a", "b"), 233 | expr.BOOST("foo", 4), 234 | ), 235 | }, 236 | "boost_literal_leading": { 237 | input: "foo^4 AND a:b", 238 | want: expr.AND( 239 | expr.BOOST("foo", 4), 240 | expr.Eq("a", "b"), 241 | ), 242 | }, 243 | "boost_quoted_literal": { 244 | input: `"foo bar"^4 AND a:b`, 245 | want: expr.AND( 246 | expr.BOOST(expr.Lit("foo bar"), 4), 247 | expr.Eq("a", "b"), 248 | ), 249 | }, 250 | "boost_key_implicit_power_before_term": { 251 | input: "color:red^ k1:v1", 252 | want: expr.AND( 253 | expr.BOOST(expr.Eq("color", "red"), 1), 254 | expr.Eq("k1", "v1"), 255 | ), 256 | }, 257 | "boost_key_power_before_term": { 258 | input: "color:red^2 k1:v1", 259 | want: expr.AND( 260 | expr.BOOST(expr.Eq("color", "red"), 2), 261 | expr.Eq("k1", "v1"), 262 | ), 263 | }, 264 | "boost_sub_expression": { 265 | input: "(title:foo OR title:bar)^1.5 AND (body:foo OR body:bar)", 266 | want: expr.AND( 267 | expr.BOOST( 268 | expr.OR( 269 | expr.Eq("title", "foo"), 270 | expr.Eq("title", "bar"), 271 | ), 272 | 1.5), 273 | expr.OR( 274 | expr.Eq("body", "foo"), 275 | expr.Eq("body", "bar"), 276 | ), 277 | ), 278 | }, 279 | "nested_sub_expressions_with_boost": { 280 | input: "((title:foo)^1.2 OR title:bar) AND (body:foo OR body:bar)", 281 | want: expr.AND( 282 | expr.OR( 283 | expr.BOOST(expr.Eq("title", "foo"), 1.2), 284 | expr.Eq("title", "bar"), 285 | ), 286 | expr.OR( 287 | expr.Eq("body", "foo"), 288 | expr.Eq("body", "bar"), 289 | ), 290 | ), 291 | }, 292 | "nested_sub_expressions": { 293 | input: "((title:foo OR title:bar) AND (body:foo OR body:bar)) OR k:v", 294 | want: expr.OR( 295 | expr.AND( 296 | expr.OR( 297 | expr.Eq("title", "foo"), 298 | expr.Eq("title", "bar"), 299 | ), 300 | 301 | expr.OR( 302 | expr.Eq("body", "foo"), 303 | expr.Eq("body", "bar"), 304 | ), 305 | ), 306 | expr.Eq("k", "v"), 307 | ), 308 | }, 309 | "fuzzy_key_value": { 310 | input: "a:b~2 AND foo", 311 | want: expr.AND( 312 | expr.FUZZY(expr.Eq("a", "b"), 2), 313 | "foo", 314 | ), 315 | }, 316 | "fuzzy_key_value_default": { 317 | input: "a:b~ AND foo", 318 | want: expr.AND( 319 | expr.FUZZY(expr.Eq("a", "b"), 1), 320 | "foo", 321 | ), 322 | }, 323 | "fuzzy_key_implicit_distance_before_term": { 324 | input: "color:red~ k1:v1", 325 | want: expr.AND( 326 | expr.FUZZY(expr.Eq("color", "red"), 1), 327 | expr.Eq("k1", "v1"), 328 | ), 329 | }, 330 | "fuzzy_key_distance_before_term": { 331 | input: "color:red~2 k1:v1", 332 | want: expr.AND( 333 | expr.FUZZY(expr.Eq("color", "red"), 2), 334 | expr.Eq("k1", "v1"), 335 | ), 336 | }, 337 | "fuzzy_literal": { 338 | input: "foo~4", 339 | want: expr.FUZZY("foo", 4), 340 | }, 341 | "fuzzy_literal_default": { 342 | input: "foo~", 343 | want: expr.FUZZY("foo", 1), 344 | }, 345 | "fuzzy_literal_in_compound": { 346 | input: "a:b AND foo~4", 347 | want: expr.AND( 348 | expr.Eq("a", "b"), 349 | expr.FUZZY("foo", 4), 350 | ), 351 | }, 352 | "fuzzy_literal_in_implicit_compound": { 353 | input: "a:b foo~4", 354 | want: expr.AND( 355 | expr.Eq("a", "b"), 356 | expr.FUZZY("foo", 4), 357 | ), 358 | }, 359 | "fuzzy_literal_leading": { 360 | input: "foo~4 AND a:b", 361 | want: expr.AND( 362 | expr.FUZZY("foo", 4), 363 | expr.Eq("a", "b"), 364 | ), 365 | }, 366 | "fuzzy_literal_leading_in_implicit_compound": { 367 | input: "foo~4 AND a:b", 368 | want: expr.AND( 369 | expr.FUZZY("foo", 4), 370 | expr.Eq("a", "b"), 371 | ), 372 | }, 373 | "fuzzy_quoted_literal": { 374 | input: `"foo bar"~4 AND a:b`, 375 | want: expr.AND( 376 | expr.FUZZY(expr.Lit("foo bar"), 4), 377 | expr.Eq("a", "b"), 378 | ), 379 | }, 380 | "fuzzy_sub_expression": { 381 | input: "(title:foo OR title:bar)~2 AND (body:foo OR body:bar)", 382 | want: expr.AND( 383 | expr.FUZZY( 384 | expr.OR( 385 | expr.Eq("title", "foo"), 386 | expr.Eq("title", "bar"), 387 | ), 388 | 2), 389 | expr.OR( 390 | expr.Eq("body", "foo"), 391 | expr.Eq("body", "bar"), 392 | ), 393 | ), 394 | }, 395 | "nested_sub_expressions_with_fuzzy": { 396 | input: "((title:foo)~ OR title:bar) AND (body:foo OR body:bar)", 397 | want: expr.AND( 398 | expr.OR( 399 | expr.FUZZY(expr.Eq("title", "foo"), 1), 400 | expr.Eq("title", "bar"), 401 | ), 402 | 403 | expr.OR( 404 | expr.Eq("body", "foo"), 405 | expr.Eq("body", "bar"), 406 | ), 407 | ), 408 | }, 409 | "precedence_works": { 410 | input: "a:b AND c:d OR e:f OR h:i AND j:k", 411 | want: expr.OR( 412 | expr.OR( 413 | expr.AND( 414 | expr.Eq("a", "b"), 415 | expr.Eq("c", "d"), 416 | ), 417 | expr.Eq("e", "f")), 418 | expr.AND( 419 | expr.Eq("h", "i"), 420 | expr.Eq("j", "k"), 421 | ), 422 | ), 423 | }, 424 | "test_precedence_weaving": { 425 | input: "a OR b AND c OR d", 426 | want: expr.OR( 427 | expr.OR( 428 | "a", 429 | expr.AND("b", "c"), 430 | ), 431 | "d", 432 | ), 433 | }, 434 | "test_precedence_weaving_with_not": { 435 | input: "NOT a OR b AND NOT c OR d", 436 | want: expr.OR( 437 | expr.OR( 438 | expr.NOT("a"), 439 | expr.AND("b", expr.NOT("c")), 440 | ), 441 | "d", 442 | ), 443 | }, 444 | "test_equals_in_precedence": { 445 | input: "a:az OR b:bz AND NOT c:z OR d", 446 | want: expr.OR( 447 | expr.OR( 448 | expr.Eq("a", "az"), 449 | expr.AND( 450 | expr.Eq("b", "bz"), 451 | expr.NOT( 452 | expr.Eq("c", "z"), 453 | ), 454 | ), 455 | ), 456 | "d", 457 | ), 458 | }, 459 | "test_parens_in_precedence": { 460 | input: "a AND (c OR d)", 461 | want: expr.AND( 462 | "a", 463 | expr.OR( 464 | "c", 465 | "d", 466 | ), 467 | ), 468 | }, 469 | "test_range_precedence_simple": { 470 | input: "c:[* to -1] OR d", 471 | want: expr.OR( 472 | expr.Rang("c", expr.WILD("*"), -1, true), 473 | "d", 474 | ), 475 | }, 476 | "test_range_precedence": { 477 | input: "a OR b AND c:[* to -1] OR d", 478 | want: expr.OR( 479 | expr.OR( 480 | "a", 481 | expr.AND( 482 | "b", 483 | expr.Rang("c", expr.WILD("*"), -1, true), 484 | ), 485 | ), 486 | "d", 487 | ), 488 | }, 489 | "test_full_precedence": { 490 | input: "a OR b AND c:[* to -1] OR d AND NOT +e:f", 491 | want: expr.OR( 492 | expr.OR( 493 | "a", 494 | expr.AND( 495 | "b", 496 | expr.Rang("c", expr.WILD("*"), -1, true), 497 | ), 498 | ), 499 | expr.AND( 500 | "d", 501 | expr.NOT( 502 | expr.MUST(expr.Eq("e", "f")), 503 | ), 504 | ), 505 | ), 506 | }, 507 | "test_full_precedence_with_suffixes": { 508 | input: "a OR b AND c OR d~ AND NOT +(e:f)^10", 509 | want: expr.OR( 510 | expr.OR( 511 | "a", 512 | expr.AND("b", "c"), 513 | ), 514 | expr.AND( 515 | expr.FUZZY("d", 1), 516 | expr.NOT( 517 | expr.BOOST( 518 | expr.MUST( 519 | expr.Eq("e", "f"), 520 | ), 521 | 10.0, 522 | ), 523 | ), 524 | ), 525 | ), 526 | }, 527 | "test_elastic_greater_than_precedence": { 528 | input: "a:>10 AND -b:<=-20", 529 | want: expr.AND( 530 | expr.GREATER("a", 10), 531 | expr.MUSTNOT( 532 | expr.LESSEQ("b", -20), 533 | ), 534 | ), 535 | }, 536 | } 537 | 538 | for name, tc := range tcs { 539 | t.Run(name, func(t *testing.T) { 540 | got, err := Parse(tc.input) 541 | if err != nil { 542 | t.Fatalf("wanted no error, got: %v", err) 543 | } 544 | if !reflect.DeepEqual(tc.want, got) { 545 | t.Fatalf(errTemplate, "parsed expression doesn't match", tc.want, got) 546 | } 547 | 548 | raw, err := json.Marshal(got) 549 | if err != nil { 550 | t.Fatalf("wanted no error marshalling to json, got: %s", err) 551 | } 552 | 553 | var gotSerialized expr.Expression 554 | err = json.Unmarshal(raw, &gotSerialized) 555 | if err != nil { 556 | t.Fatalf("wanted no error unmarshalling from json, got: %s", err) 557 | } 558 | 559 | if !reflect.DeepEqual(got, &gotSerialized) { 560 | // occasionally this test fails and the error message makes the test look like 561 | // the want and got are equivalent. This is almost always an unexported var is different 562 | // Using testify/require will show the error if it shows up 563 | // require.Equal(t, tc.want, gotSerialized) 564 | t.Fatalf(errTemplate, "roundtrip serialization is not stable", tc.want, gotSerialized) 565 | } 566 | }) 567 | } 568 | } 569 | 570 | func TestParseLuceneWithDefaultField(t *testing.T) { 571 | type tc struct { 572 | input string 573 | defaultField string 574 | want *expr.Expression 575 | } 576 | 577 | tcs := map[string]tc{ 578 | "single_literal": { 579 | input: "a", 580 | defaultField: "foo", 581 | want: expr.Eq("foo", "a"), 582 | }, 583 | "quoted_literal": { 584 | input: `"a"`, 585 | defaultField: "foo", 586 | want: expr.Eq("foo", "a"), 587 | }, 588 | "number_literal": { 589 | input: `7`, 590 | defaultField: "foo", 591 | want: expr.Eq("foo", 7), 592 | }, 593 | "multiple_literals": { 594 | input: "a b", 595 | defaultField: "foo", 596 | want: expr.AND(expr.Eq("foo", "a"), expr.Eq("foo", "b")), 597 | }, 598 | "basic_and": { 599 | input: "a AND b", 600 | defaultField: "foo", 601 | want: expr.AND(expr.Eq("foo", "a"), expr.Eq("foo", "b")), 602 | }, 603 | } 604 | 605 | for name, tc := range tcs { 606 | t.Run(name, func(t *testing.T) { 607 | got, err := Parse(tc.input, WithDefaultField(tc.defaultField)) 608 | if err != nil { 609 | t.Fatalf("wanted no error, got: %v", err) 610 | } 611 | if !reflect.DeepEqual(tc.want, got) { 612 | t.Fatalf(errTemplate, "parsed expression doesn't match", tc.want, got) 613 | } 614 | }) 615 | } 616 | } 617 | 618 | func TestParseFailure(t *testing.T) { 619 | type tc struct { 620 | input string 621 | } 622 | 623 | tcs := map[string]tc{ 624 | "unpaired_paren": { 625 | input: "(a AND b", 626 | }, 627 | "unbalanced_paren": { 628 | input: "(a AND b))", 629 | }, 630 | "unbalanced_nested_paren": { 631 | input: "(a AND (b AND c)", 632 | }, 633 | "equal_without_rhs": { 634 | input: "a = ", 635 | }, 636 | "equal_without_lhs": { 637 | input: "= b", 638 | }, 639 | "empty_parens_nil": { 640 | input: "() = ()", 641 | }, 642 | "and_without_rhs": { 643 | input: "a AND", 644 | }, 645 | "and_without_lhs": { 646 | input: "AND a", 647 | }, 648 | "or_without_rhs": { 649 | input: "a OR", 650 | }, 651 | "or_without_lhs": { 652 | input: "OR a", 653 | }, 654 | "not_without_subexpression_1": { 655 | input: "NOT", 656 | }, 657 | "not_without_subexpression_2": { 658 | input: "NOT()", 659 | }, 660 | "must_without_subexpression_1": { 661 | input: "+", 662 | }, 663 | "must_without_subexpression_2": { 664 | input: "+()", 665 | }, 666 | "mustnot_without_subexpression_1": { 667 | input: "-", 668 | }, 669 | "mustnot_without_subexpression_2": { 670 | input: "-()", 671 | }, 672 | "boost_without_subexpression_1": { 673 | input: "^2", 674 | }, 675 | "boost_without_subexpression_2": { 676 | input: "()^2", 677 | }, 678 | "boost_before_colon_nonterminal": { 679 | input: "color:red^:k1", 680 | }, 681 | "fuzzy_without_subexpression_1": { 682 | input: "~2", 683 | }, 684 | "fuzzy_without_subexpression_2": { 685 | input: "()~2", 686 | }, 687 | "fuzzy_without_subexpression_3": { 688 | input: "~", 689 | }, 690 | "fuzzy_without_subexpression_4": { 691 | input: "()~", 692 | }, 693 | "fuzzy_before_colon_nonterminal": { 694 | input: "color:red~:k1", 695 | }, 696 | "range_without_min": { 697 | input: "[ TO 5]", 698 | }, 699 | "range_without_max": { 700 | input: "[* TO ]", 701 | }, 702 | "range_with_invalid_min": { 703 | input: "[(a OR b) TO *]", 704 | }, 705 | "range_with_invalid_max": { 706 | input: "[* TO (a OR b)]", 707 | }, 708 | "nested_range_missing_max": { 709 | input: "(A:B AND C:(D OR E)) OR (NOT(+a:[* TO]))", 710 | }, 711 | "invalid_implicit": { 712 | input: "a: b:c", 713 | }, 714 | } 715 | 716 | for name, tc := range tcs { 717 | t.Run(name, func(t *testing.T) { 718 | _, err := Parse(tc.input) 719 | if err == nil { 720 | t.Fatalf("expected error but did not get one") 721 | } 722 | }) 723 | } 724 | } 725 | 726 | func FuzzParse(f *testing.F) { 727 | tcs := []string{ 728 | "A:B AND C:D", 729 | "+foo OR (NOT(B))", 730 | "A:bar", 731 | "NOT(b:c)", 732 | "z:[* TO 10]", 733 | "x:[10 TO *] AND NOT(y:[1 TO 5]", 734 | "(+a:b -c:d) OR (z:[1 TO *] NOT(foo))", 735 | `+bbq:"woo yay"`, 736 | `-bbq:"woo"`, 737 | `(a:b)^10`, 738 | `a:foo~`, 739 | } 740 | for _, tc := range tcs { 741 | f.Add(tc) 742 | } 743 | f.Fuzz(func(t *testing.T, in string) { 744 | _, _ = Parse(in) 745 | }) 746 | } 747 | -------------------------------------------------------------------------------- /postgresql_test.go: -------------------------------------------------------------------------------- 1 | package lucene 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestPostgresSQLEndToEnd(t *testing.T) { 9 | type tc struct { 10 | input string 11 | want string 12 | defaultField string 13 | err string 14 | } 15 | 16 | tcs := map[string]tc{ 17 | // "single_literal": { 18 | // input: "a", 19 | // want: `a`, 20 | // }, 21 | "basic_equal": { 22 | input: "a:b", 23 | want: `"a" = 'b'`, 24 | }, 25 | "basic_equal_with_number": { 26 | input: "a:5", 27 | want: `"a" = 5`, 28 | }, 29 | "basic_greater_with_number": { 30 | input: "a:>22", 31 | want: `"a" > 22`, 32 | }, 33 | "basic_greater_eq_with_number": { 34 | input: "a:>=22", 35 | want: `"a" >= 22`, 36 | }, 37 | "basic_less_with_number": { 38 | input: "a:<22", 39 | want: `"a" < 22`, 40 | }, 41 | "basic_less_eq_with_number": { 42 | input: "a:<=22", 43 | want: `"a" <= 22`, 44 | }, 45 | "basic_greater_less_with_number": { 46 | input: "a:<22 AND b:>33", 47 | want: `("a" < 22) AND ("b" > 33)`, 48 | }, 49 | "basic_greater_less_eq_with_number": { 50 | input: "a:<=22 AND b:>=33", 51 | want: `("a" <= 22) AND ("b" >= 33)`, 52 | }, 53 | "basic_wild_equal_with_*": { 54 | input: "a:b*", 55 | want: `"a" SIMILAR TO 'b%'`, 56 | }, 57 | "basic_wild_equal_with_?": { 58 | input: "a:b?z", 59 | want: `"a" SIMILAR TO 'b_z'`, 60 | }, 61 | "basic_inclusive_range": { 62 | input: "a:[* TO 5]", 63 | want: `"a" <= 5`, 64 | }, 65 | "basic_exclusive_range": { 66 | input: "a:{* TO 5}", 67 | want: `"a" < 5`, 68 | }, 69 | "range_over_strings": { 70 | input: "a:{foo TO bar}", 71 | want: `"a" BETWEEN 'foo' AND 'bar'`, 72 | }, 73 | "basic_fuzzy": { 74 | input: "b AND a~", 75 | err: "unable to render operator [FUZZY]", 76 | }, 77 | "fuzzy_power": { 78 | input: "b AND a~10", 79 | err: "unable to render operator [FUZZY]", 80 | }, 81 | "basic_boost": { 82 | input: "b AND a^", 83 | err: "unable to render operator [BOOST]", 84 | }, 85 | "boost_power": { 86 | input: "b AND a^10", 87 | err: "unable to render operator [BOOST]", 88 | }, 89 | "regexp": { 90 | input: "a:/b [c]/", 91 | want: `"a" ~ '/b [c]/'`, 92 | }, 93 | "regexp_with_keywords": { 94 | input: `a:/b "[c]/`, 95 | want: `"a" ~ '/b "[c]/'`, 96 | }, 97 | "regexp_with_escaped_chars": { 98 | input: `url:/example.com\/foo\/bar\/.*/`, 99 | want: `"url" ~ '/example.com\/foo\/bar\/.*/'`, 100 | }, 101 | "basic_default_AND": { 102 | input: "a b", 103 | want: `'a' AND 'b'`, 104 | }, 105 | "default_to_AND_with_subexpressions": { 106 | input: "a:b c:d", 107 | want: `("a" = 'b') AND ("c" = 'd')`, 108 | }, 109 | "basic_and": { 110 | input: "a AND b", 111 | want: `'a' AND 'b'`, 112 | }, 113 | "and_with_nesting": { 114 | input: "a:foo AND b:bar", 115 | want: `("a" = 'foo') AND ("b" = 'bar')`, 116 | }, 117 | "basic_or": { 118 | input: "a OR b", 119 | want: `'a' OR 'b'`, 120 | }, 121 | "or_with_nesting": { 122 | input: "a:foo OR b:bar", 123 | want: `("a" = 'foo') OR ("b" = 'bar')`, 124 | }, 125 | "range_operator_inclusive": { 126 | input: "a:[1 TO 5]", 127 | want: `"a" >= 1 AND "a" <= 5`, 128 | }, 129 | "range_operator_inclusive_unbound": { 130 | input: `a:[* TO 200]`, 131 | want: `"a" <= 200`, 132 | }, 133 | "range_operator_exclusive": { 134 | input: `a:{"ab" TO "az"}`, 135 | want: `"a" BETWEEN 'ab' AND 'az'`, 136 | }, 137 | "range_operator_exclusive_unbound": { 138 | input: `a:{2 TO *}`, 139 | want: `"a" > 2`, 140 | }, 141 | "basic_not": { 142 | input: "NOT b", 143 | want: `NOT('b')`, 144 | }, 145 | "nested_not": { 146 | input: "a:foo OR NOT b:bar", 147 | want: `("a" = 'foo') OR (NOT("b" = 'bar'))`, 148 | }, 149 | "term_grouping": { 150 | input: "(a:foo OR b:bar) AND c:baz", 151 | want: `(("a" = 'foo') OR ("b" = 'bar')) AND ("c" = 'baz')`, 152 | }, 153 | "value_grouping": { 154 | input: "a:(foo OR baz OR bar)", 155 | want: `"a" IN ('foo', 'baz', 'bar')`, 156 | }, 157 | "basic_must": { 158 | input: "+a:b", 159 | want: `"a" = 'b'`, 160 | }, 161 | "basic_must_not": { 162 | input: "-a:b", 163 | want: `NOT("a" = 'b')`, 164 | }, 165 | "basic_nested_must_not": { 166 | input: "d:e AND (-a:b AND +f:e)", 167 | want: `("d" = 'e') AND ((NOT("a" = 'b')) AND ("f" = 'e'))`, 168 | }, 169 | "basic_escaping": { 170 | input: `a:\(1\+1\)\:2`, 171 | want: `"a" = '(1+1):2'`, 172 | }, 173 | "escaped_column_name": { 174 | input: `foo\ bar:b`, 175 | want: `"foo bar" = 'b'`, 176 | }, 177 | "boost_key_value": { 178 | input: "a:b^2 AND foo", 179 | err: "unable to render operator [BOOST]", 180 | }, 181 | "nested_sub_expressions": { 182 | input: "((title:foo OR title:bar) AND (body:foo OR body:bar)) OR k:v", 183 | want: `((("title" = 'foo') OR ("title" = 'bar')) AND (("body" = 'foo') OR ("body" = 'bar'))) OR ("k" = 'v')`, 184 | }, 185 | "fuzzy_key_value": { 186 | input: "a:b~2 AND foo", 187 | err: "unable to render operator [FUZZY]", 188 | }, 189 | "precedence_works": { 190 | input: "a:b AND c:d OR e:f OR h:i AND j:k", 191 | want: `((("a" = 'b') AND ("c" = 'd')) OR ("e" = 'f')) OR (("h" = 'i') AND ("j" = 'k'))`, 192 | }, 193 | "test_precedence_weaving": { 194 | input: "a OR b AND c OR d", 195 | want: `('a' OR ('b' AND 'c')) OR 'd'`, 196 | }, 197 | "test_precedence_weaving_with_not": { 198 | input: "NOT a OR b AND NOT c OR d", 199 | want: `((NOT('a')) OR ('b' AND (NOT('c')))) OR 'd'`, 200 | }, 201 | "test_equals_in_precedence": { 202 | input: "a:az OR b:bz AND NOT c:z OR d", 203 | want: `(("a" = 'az') OR (("b" = 'bz') AND (NOT("c" = 'z')))) OR 'd'`, 204 | }, 205 | "test_parens_in_precedence": { 206 | input: "a AND (c OR d)", 207 | want: `'a' AND ('c' OR 'd')`, 208 | }, 209 | "test_range_precedence_simple": { 210 | input: "c:[* to -1] OR d", 211 | want: `("c" <= -1) OR 'd'`, 212 | }, 213 | "test_range_precedence": { 214 | input: "a OR b AND c:[* to -1] OR d", 215 | want: `('a' OR ('b' AND ("c" <= -1))) OR 'd'`, 216 | }, 217 | "test_full_precedence": { 218 | input: "a OR b AND c:[* to -1] OR d AND NOT +e:f", 219 | want: `('a' OR ('b' AND ("c" <= -1))) OR ('d' AND (NOT("e" = 'f')))`, 220 | }, 221 | "test_elastic_greater_than_precedence": { 222 | input: "a:>10 AND -b:<=-20", 223 | want: `("a" > 10) AND (NOT("b" <= -20))`, 224 | }, 225 | "escape_quotes": { 226 | input: "a:'b'", 227 | want: `"a" = '''b'''`, 228 | }, 229 | "name_starts_with_number": { 230 | input: "1a:b", 231 | want: `"1a" = 'b'`, 232 | }, 233 | "default_field_and": { 234 | input: `title:"The Right Way" AND go`, 235 | want: `("title" = 'The Right Way') AND ("default" = 'go')`, 236 | defaultField: "default", 237 | }, 238 | "default_field_or": { 239 | input: `title:"The Right Way" OR go`, 240 | want: `("title" = 'The Right Way') OR ("default" = 'go')`, 241 | defaultField: "default", 242 | }, 243 | "default_field_not": { 244 | input: `title:"The Right Way" AND NOT(go)`, 245 | want: `("title" = 'The Right Way') AND (NOT("default" = 'go'))`, 246 | defaultField: "default", 247 | }, 248 | "asterisk_in_literal_are_regular_expression": { 249 | input: `foo:*`, 250 | want: `"foo" SIMILAR TO '%'`, 251 | }, 252 | } 253 | 254 | for name, tc := range tcs { 255 | t.Run(name, func(t *testing.T) { 256 | got, err := ToPostgres(tc.input, WithDefaultField(tc.defaultField)) 257 | if err != nil { 258 | // if we got an expect error then we are fine 259 | if tc.err != "" && strings.Contains(err.Error(), tc.err) { 260 | return 261 | } 262 | t.Fatalf("unexpected error rendering expression: %v", err) 263 | } 264 | 265 | if tc.err != "" { 266 | t.Fatalf("\nexpected error [%s]\ngot: %s", tc.err, got) 267 | } 268 | 269 | if got != tc.want { 270 | expr, err := Parse(tc.input) 271 | if err != nil { 272 | t.Fatalf("unable to parse expression: %v", err) 273 | } 274 | t.Fatalf("\nwant %s\ngot %s\nparsed expression: %#v\n", tc.want, got, expr) 275 | } 276 | }) 277 | } 278 | } 279 | 280 | func TestPostgresParameterizedSQLEndToEnd(t *testing.T) { 281 | type tc struct { 282 | input string 283 | wantStr string 284 | wantParams []any 285 | defaultField string 286 | err string 287 | } 288 | 289 | tcs := map[string]tc{ 290 | // "single_literal": { 291 | // input: "a", 292 | // want: `a`, 293 | // }, 294 | "basic_equal": { 295 | input: "a:b", 296 | wantStr: `"a" = $1`, 297 | wantParams: []any{"b"}, 298 | }, 299 | "basic_equal_with_number": { 300 | input: "a:5", 301 | wantStr: `"a" = $1`, 302 | wantParams: []any{5}, 303 | }, 304 | "basic_greater_with_number": { 305 | input: "a:>22", 306 | wantStr: `"a" > $1`, 307 | wantParams: []any{22}, 308 | }, 309 | "basic_greater_eq_with_number": { 310 | input: "a:>=22", 311 | wantStr: `"a" >= $1`, 312 | wantParams: []any{22}, 313 | }, 314 | "basic_less_with_number": { 315 | input: "a:<22", 316 | wantStr: `"a" < $1`, 317 | wantParams: []any{22}, 318 | }, 319 | "basic_less_eq_with_number": { 320 | input: "a:<=22", 321 | wantStr: `"a" <= $1`, 322 | wantParams: []any{22}, 323 | }, 324 | "basic_greater_less_with_number": { 325 | input: "a:<22 AND b:>33", 326 | wantStr: `("a" < $1) AND ("b" > $2)`, 327 | wantParams: []any{22, 33}, 328 | }, 329 | "basic_greater_less_eq_with_number": { 330 | input: "a:<=22 AND b:>=33", 331 | wantStr: `("a" <= $1) AND ("b" >= $2)`, 332 | wantParams: []any{22, 33}, 333 | }, 334 | "basic_wild_equal_with_*": { 335 | input: "a:b*", 336 | wantStr: `"a" SIMILAR TO $1`, 337 | wantParams: []any{"b%"}, 338 | }, 339 | "basic_wild_equal_with_?": { 340 | input: "a:b?z", 341 | wantStr: `"a" SIMILAR TO $1`, 342 | wantParams: []any{"b_z"}, 343 | }, 344 | "basic_inclusive_range": { 345 | input: "a:[* TO 5]", 346 | wantStr: `"a" <= $1`, 347 | wantParams: []any{5}, 348 | }, 349 | "basic_exclusive_range": { 350 | input: "a:{* TO 5}", 351 | wantStr: `"a" < $1`, 352 | wantParams: []any{5}, 353 | }, 354 | "range_over_strings": { 355 | input: "a:{foo TO bar}", 356 | wantStr: `"a" BETWEEN $1 AND $2`, 357 | wantParams: []any{"foo", "bar"}, 358 | }, 359 | "basic_fuzzy": { 360 | input: "b AND a~", 361 | err: "unable to render operator [FUZZY]", 362 | }, 363 | "fuzzy_power": { 364 | input: "b AND a~10", 365 | err: "unable to render operator [FUZZY]", 366 | }, 367 | "basic_boost": { 368 | input: "b AND a^", 369 | err: "unable to render operator [BOOST]", 370 | }, 371 | "boost_power": { 372 | input: "b AND a^10", 373 | err: "unable to render operator [BOOST]", 374 | }, 375 | "regexp": { 376 | input: "a:/b [c]/", 377 | wantStr: `"a" ~ $1`, 378 | wantParams: []any{"/b [c]/"}, 379 | }, 380 | "regexp_with_keywords": { 381 | input: `a:/b "[c]/`, 382 | wantStr: `"a" ~ $1`, 383 | wantParams: []any{`/b "[c]/`}, 384 | }, 385 | "regexp_with_escaped_chars": { 386 | input: `url:/example.com\/foo\/bar\/.*/`, 387 | wantStr: `"url" ~ $1`, 388 | wantParams: []any{`/example.com\/foo\/bar\/.*/`}, 389 | }, 390 | "basic_default_AND": { 391 | input: "a b", 392 | wantStr: `$1 AND $2`, 393 | wantParams: []any{"a", "b"}, 394 | }, 395 | "default_to_AND_with_subexpressions": { 396 | input: "a:b c:d", 397 | wantStr: `("a" = $1) AND ("c" = $2)`, 398 | wantParams: []any{"b", "d"}, 399 | }, 400 | "basic_and": { 401 | input: "a AND b", 402 | wantStr: `$1 AND $2`, 403 | wantParams: []any{"a", "b"}, 404 | }, 405 | "and_with_nesting": { 406 | input: "a:foo AND b:bar", 407 | wantStr: `("a" = $1) AND ("b" = $2)`, 408 | wantParams: []any{"foo", "bar"}, 409 | }, 410 | "basic_or": { 411 | input: "a OR b", 412 | wantStr: `$1 OR $2`, 413 | wantParams: []any{"a", "b"}, 414 | }, 415 | "or_with_nesting": { 416 | input: "a:foo OR b:bar", 417 | wantStr: `("a" = $1) OR ("b" = $2)`, 418 | wantParams: []any{"foo", "bar"}, 419 | }, 420 | "range_operator_inclusive": { 421 | input: "a:[1 TO 5]", 422 | wantStr: `"a" >= $1 AND "a" <= $2`, 423 | wantParams: []any{1, 5}, 424 | }, 425 | "range_operator_inclusive_unbound": { 426 | input: `a:[* TO 200]`, 427 | wantStr: `"a" <= $1`, 428 | wantParams: []any{200}, 429 | }, 430 | "range_operator_exclusive": { 431 | input: `a:{"ab" TO "az"}`, 432 | wantStr: `"a" BETWEEN $1 AND $2`, 433 | wantParams: []any{"ab", "az"}, 434 | }, 435 | "range_operator_exclusive_unbound": { 436 | input: `a:{2 TO *}`, 437 | wantStr: `"a" > $1`, 438 | wantParams: []any{2}, 439 | }, 440 | "basic_not": { 441 | input: "NOT b", 442 | wantStr: `NOT($1)`, 443 | wantParams: []any{"b"}, 444 | }, 445 | "nested_not": { 446 | input: "a:foo OR NOT b:bar", 447 | wantStr: `("a" = $1) OR (NOT("b" = $2))`, 448 | wantParams: []any{"foo", "bar"}, 449 | }, 450 | "term_grouping": { 451 | input: "(a:foo OR b:bar) AND c:baz", 452 | wantStr: `(("a" = $1) OR ("b" = $2)) AND ("c" = $3)`, 453 | wantParams: []any{"foo", "bar", "baz"}, 454 | }, 455 | "value_grouping": { 456 | input: "a:(foo OR baz OR bar)", 457 | wantStr: `"a" IN ($1, $2, $3)`, 458 | wantParams: []any{"foo", "baz", "bar"}, 459 | }, 460 | "basic_must": { 461 | input: "+a:b", 462 | wantStr: `"a" = $1`, 463 | wantParams: []any{"b"}, 464 | }, 465 | "basic_must_not": { 466 | input: "-a:b", 467 | wantStr: `NOT("a" = $1)`, 468 | wantParams: []any{"b"}, 469 | }, 470 | "basic_nested_must_not": { 471 | input: "d:e AND (-a:b AND +f:e)", 472 | wantStr: `("d" = $1) AND ((NOT("a" = $2)) AND ("f" = $3))`, 473 | wantParams: []any{"e", "b", "e"}, 474 | }, 475 | "basic_escaping": { 476 | input: `a:\(1\+1\)\:2`, 477 | wantStr: `"a" = $1`, 478 | wantParams: []any{"(1+1):2"}, 479 | }, 480 | "escaped_column_name": { 481 | input: `foo\ bar:b`, 482 | wantStr: `"foo bar" = $1`, 483 | wantParams: []any{"b"}, 484 | }, 485 | "boost_key_value": { 486 | input: "a:b^2 AND foo", 487 | err: "unable to render operator [BOOST]", 488 | }, 489 | "nested_sub_expressions": { 490 | input: "((title:foo OR title:bar) AND (body:foo OR body:bar)) OR k:v", 491 | wantStr: `((("title" = $1) OR ("title" = $2)) AND (("body" = $3) OR ("body" = $4))) OR ("k" = $5)`, 492 | wantParams: []any{"foo", "bar", "foo", "bar", "v"}, 493 | }, 494 | "fuzzy_key_value": { 495 | input: "a:b~2 AND foo", 496 | err: "unable to render operator [FUZZY]", 497 | }, 498 | "precedence_works": { 499 | input: "a:b AND c:d OR e:f OR h:i AND j:k", 500 | wantStr: `((("a" = $1) AND ("c" = $2)) OR ("e" = $3)) OR (("h" = $4) AND ("j" = $5))`, 501 | wantParams: []any{"b", "d", "f", "i", "k"}, 502 | }, 503 | "test_precedence_weaving": { 504 | input: "a OR b AND c OR d", 505 | wantStr: `($1 OR ($2 AND $3)) OR $4`, 506 | wantParams: []any{"a", "b", "c", "d"}, 507 | }, 508 | "test_precedence_weaving_with_not": { 509 | input: "NOT a OR b AND NOT c OR d", 510 | wantStr: `((NOT($1)) OR ($2 AND (NOT($3)))) OR $4`, 511 | wantParams: []any{"a", "b", "c", "d"}, 512 | }, 513 | "test_equals_in_precedence": { 514 | input: "a:az OR b:bz AND NOT c:z OR d", 515 | wantStr: `(("a" = $1) OR (("b" = $2) AND (NOT("c" = $3)))) OR $4`, 516 | wantParams: []any{"az", "bz", "z", "d"}, 517 | }, 518 | "test_parens_in_precedence": { 519 | input: "a AND (c OR d)", 520 | wantStr: `$1 AND ($2 OR $3)`, 521 | wantParams: []any{"a", "c", "d"}, 522 | }, 523 | "test_range_precedence_simple": { 524 | input: "c:[* to -1] OR d", 525 | wantStr: `("c" <= $1) OR $2`, 526 | wantParams: []any{-1, "d"}, 527 | }, 528 | "test_range_precedence": { 529 | input: "a OR b AND c:[* to -1] OR d", 530 | wantStr: `($1 OR ($2 AND ("c" <= $3))) OR $4`, 531 | wantParams: []any{"a", "b", -1, "d"}, 532 | }, 533 | "test_full_precedence": { 534 | input: "a OR b AND c:[* to -1] OR d AND NOT +e:f", 535 | wantStr: `($1 OR ($2 AND ("c" <= $3))) OR ($4 AND (NOT("e" = $5)))`, 536 | wantParams: []any{"a", "b", -1, "d", "f"}, 537 | }, 538 | "test_elastic_greater_than_precedence": { 539 | input: "a:>10 AND -b:<=-20", 540 | wantStr: `("a" > $1) AND (NOT("b" <= $2))`, 541 | wantParams: []any{10, -20}, 542 | }, 543 | "escape_quotes": { 544 | input: "a:'b'", 545 | wantStr: `"a" = $1`, 546 | wantParams: []any{"'b'"}, 547 | }, 548 | "name_starts_with_number": { 549 | input: "1a:b", 550 | wantStr: `"1a" = $1`, 551 | wantParams: []any{"b"}, 552 | }, 553 | "default_field_and": { 554 | input: `title:"The Right Way" AND go`, 555 | wantStr: `("title" = $1) AND ("default" = $2)`, 556 | wantParams: []any{"The Right Way", "go"}, 557 | defaultField: "default", 558 | }, 559 | "default_field_or": { 560 | input: `title:"The Right Way" OR go`, 561 | wantStr: `("title" = $1) OR ("default" = $2)`, 562 | wantParams: []any{"The Right Way", "go"}, 563 | defaultField: "default", 564 | }, 565 | "default_field_not": { 566 | input: `title:"The Right Way" AND NOT(go)`, 567 | wantStr: `("title" = $1) AND (NOT("default" = $2))`, 568 | wantParams: []any{"The Right Way", "go"}, 569 | defaultField: "default", 570 | }, 571 | "default_bare_field": { 572 | input: `this is an example`, 573 | wantStr: `((("default" = $1) AND ("default" = $2)) AND ("default" = $3)) AND ("default" = $4)`, 574 | wantParams: []any{"this", "is", "an", "example"}, 575 | defaultField: "default", 576 | }, 577 | "default_single_literal": { 578 | input: `a`, 579 | wantStr: `"default" = $1`, 580 | wantParams: []any{"a"}, 581 | defaultField: "default", 582 | }, 583 | "question_marks_in_literal_are_regular_expression": { 584 | input: `foo:abc?`, 585 | wantStr: `"foo" SIMILAR TO $1`, 586 | wantParams: []any{"abc_"}, 587 | }, 588 | "start asterisk_in_literal_are_regular_expression": { 589 | input: `foo:*`, 590 | wantStr: `"foo" SIMILAR TO $1`, 591 | wantParams: []any{"%"}, 592 | }, 593 | } 594 | 595 | for name, tc := range tcs { 596 | t.Run(name, func(t *testing.T) { 597 | gotStr, gotParams, err := ToParameterizedPostgres(tc.input, WithDefaultField(tc.defaultField)) 598 | if err != nil { 599 | // if we got an expect error then we are fine 600 | if tc.err != "" && strings.Contains(err.Error(), tc.err) { 601 | return 602 | } 603 | t.Fatalf("unexpected error rendering expression: %v", err) 604 | } 605 | 606 | if tc.err != "" { 607 | t.Fatalf("\nexpected error [%s]\ngot: %s", tc.err, gotStr) 608 | } 609 | 610 | if gotStr != tc.wantStr { 611 | expr, err := Parse(tc.input) 612 | if err != nil { 613 | t.Fatalf("unable to parse expression: %v", err) 614 | } 615 | t.Fatalf("\nwant %s\ngot %s\nparsed expression: %#v\n", tc.wantStr, gotStr, expr) 616 | } 617 | 618 | if len(gotParams) != len(tc.wantParams) { 619 | t.Fatalf("expected %d params(%v), got %d (%v)", len(tc.wantParams), tc.wantParams, len(gotParams), gotParams) 620 | } 621 | 622 | for i := range gotParams { 623 | if gotParams[i] != tc.wantParams[i] { 624 | t.Fatalf("expected param %d to be %v, got %v", i, tc.wantParams[i], gotParams[i]) 625 | } 626 | } 627 | }) 628 | } 629 | } 630 | --------------------------------------------------------------------------------