├── .github
    └── workflows
    │   └── ci.yaml
├── LICENSE
├── README.md
├── conversions.go
├── error.go
├── expr.go
├── go.mod
├── go.sum
├── interpreter.go
├── interpreter_test.go
├── lexer.go
├── parser.go
├── testdata
    └── fuzz
    │   └── FuzzMexpr
    │       ├── 0420946f64ef7f59fb34768f7918efff84357bad9e79d7a883cb8114e861e298
    │       ├── 0cdba2769206c77962ed40a6f6d36ca202c930f80a4b673d7b7382d886740414
    │       ├── 3381fab6d13d3fdf00437171af1c170de75caf45db43bddbaabc598f26032225
    │       ├── 50f5b81a904c39d7c20f0fd09f1394bf6e60e30eea5bc3421f5848e1a477a3d2
    │       ├── 5fd9184d945ce47fad21a9481ccd6ee1aa512ac28c991d995fe41af19cef16e9
    │       ├── 624b0c32d8fc63d8fc610a2f3d363baa32d554da3fb5d3c6d2060021f80b36c6
    │       ├── 9083a15555e8793ce5ed4a28fca588702d92ab32db80747ccef9322a1c9eb01d
    │       ├── ab53cd47184b4ce065b4bca4a5e5ba4f03987bd60882b02217fefd8e4eee127f
    │       ├── b074d9d373d06c31a9646f7f7756c283f624d1974d02bfdd9eb3018bb77badf2
    │       ├── d16cef97b88ea33670547ca16ba8c3f4f794dd9cda46b90a8bba5694230f32b6
    │       ├── dfddfa56e05613142f0130541c77d0f912e1c841e28da5669f403d95a12d98cb
    │       ├── e30f687995b5521363052c9f1b107729957ec2cd41cf059b748ae6a923c55a32
    │       └── f7acdb7129425776d8863f092a9654328efe3ecdf265e38536c924866b72546f
└── typecheck.go


/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on: [push]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     name: Build & Test
 7 |     steps:
 8 |       - uses: actions/checkout@v2
 9 |       - name: Setup go
10 |         uses: actions/setup-go@v1
11 |         with:
12 |           go-version: "1.18"
13 |       - run: go test -coverprofile=coverage.txt -covermode=atomic ./...
14 |       - uses: codecov/codecov-action@v1
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Daniel G. Taylor
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MicroExpr
  2 | 
  3 | [![Go Reference](https://pkg.go.dev/badge/github.com/danielgtaylor/mexpr.svg)](https://pkg.go.dev/github.com/danielgtaylor/mexpr) [![Go Report Card](https://goreportcard.com/badge/github.com/danielgtaylor/mexpr)](https://goreportcard.com/report/github.com/danielgtaylor/mexpr) ![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/danielgtaylor/mexpr)
  4 | 
  5 | A small & fast dependency-free library for parsing micro expressions.
  6 | 
  7 | This library was originally built for use in templating languages (e.g. for-loop variable selection, if-statement evaluation) so is minimal in what it supports by design. If you need a more full-featured expression parser, check out [antonmedv/expr](https://github.com/antonmedv/expr) instead.
  8 | 
  9 | Features:
 10 | 
 11 | - Fast, low-allocation parser and runtime
 12 |   - Many simple expressions are zero-allocation
 13 | - Type checking during parsing
 14 | - Simple
 15 |   - Easy to learn
 16 |   - Easy to read
 17 |   - No hiding complex branching logic in expressions
 18 | - Intuitive, e.g. `"id" + 1` => `"id1"`
 19 | - Useful error messages, example:
 20 |   ```
 21 |   missing right operand
 22 |     not (1- <= 5)
 23 |     ......^
 24 |   ```
 25 | - Fuzz tested to prevent crashes
 26 | 
 27 | ## Usage
 28 | 
 29 | Try it out on the [Go Playground](https://play.golang.org/p/Z0UcEBgfxu_r)! You can find many [example expressions in the tests](https://github.com/danielgtaylor/mexpr/blob/main/interpreter_test.go#L18).
 30 | 
 31 | ```go
 32 | import "github.com/danielgtaylor/mexpr"
 33 | 
 34 | // Convenience for lexing/parsing/running in one step:
 35 | result, err := mexpr.Eval("a > b", map[string]interface{}{
 36 | 	"a": 2,
 37 | 	"b": 1,
 38 | })
 39 | 
 40 | // Manual method with type checking and fast AST re-use. Error handling is
 41 | // omitted for brevity.
 42 | l := mexpr.NewLexer("a > b")
 43 | p := mexpr.NewParser(l)
 44 | ast, err := mexpr.Parse()
 45 | typeExamples = map[string]interface{}{
 46 | 	"a": 2,
 47 | 	"b": 1,
 48 | }
 49 | err := mexpr.TypeCheck(ast, typeExamples)
 50 | interpreter := mexpr.NewInterpreter(ast)
 51 | result1, err := interpreter.Run(map[string]interface{}{
 52 | 	"a": 1,
 53 | 	"b": 2,
 54 | })
 55 | result2, err := interpreter.Run(map[string]interfae{}{
 56 | 	"a": 150,
 57 | 	"b": 30,
 58 | })
 59 | ```
 60 | 
 61 | Pretty errors use the passed-in input along with the error's offset to display an arrow of where within the expression the error occurs.
 62 | 
 63 | ```go
 64 | inputStr := "2 * foo"
 65 | _, err := mexpr.Eval(inputStr, nil)
 66 | if err != nil {
 67 | 	fmt.Println(err.Pretty(inputStr))
 68 | }
 69 | ```
 70 | 
 71 | ### Options
 72 | 
 73 | When running the interpreter a set of options can be passed in to change behavior. Available options:
 74 | 
 75 | | Option            | Default | Description                                                                                        |
 76 | | ----------------- | ------- | -------------------------------------------------------------------------------------------------- |
 77 | | `StrictMode`      | `false` | Be more strict, for example return an error when an identifier is not found rather than `nil`      |
 78 | | `UnquotedStrings` | `false` | Enable the use of unquoted strings, i.e. return a string instead of `nil` for undefined parameters |
 79 | 
 80 | ```go
 81 | // Using the top-level eval
 82 | mexpr.Eval(expression, inputObj, StrictMode)
 83 | 
 84 | // Using an interpreter instance
 85 | interpreter.Run(inputObj, StrictMode)
 86 | ```
 87 | 
 88 | ## Syntax
 89 | 
 90 | ### Literals
 91 | 
 92 | - **strings** double quoted e.g. `"hello"`
 93 | - **numbers** e.g. `123`, `2.5`, `1_000_000`
 94 | 
 95 | Internally all numbers are treated as `float64`, which means fewer conversions/casts when taking arbitrary JSON/YAML inputs.
 96 | 
 97 | ### Accessing properties
 98 | 
 99 | - Use `.` between property names
100 | - Use `[` and `]` for indexes, which can be negative
101 | 
102 | ```py
103 | foo.bar[0].value
104 | ```
105 | 
106 | ### Arithmetic operators
107 | 
108 | - `+` (addition)
109 | - `-` (subtration)
110 | - `*` (multiplication)
111 | - `/` (division)
112 | - `%` (modulus)
113 | - `^` (power)
114 | 
115 | ```py
116 | (1 + 2) * 3^2
117 | ```
118 | 
119 | Math operations between constants are precomputed when possible, so it is efficient to write meaningful operations like `size <= 4 * 1024 * 1024`. The interpreter will see this as `size <= 4194304`.
120 | 
121 | ### Comparison operators
122 | 
123 | - `==` (equal)
124 | - `!=` (not equal)
125 | - `<` (less than)
126 | - `>` (greater than)
127 | - `<=` (less than or equal to)
128 | - `>=` (greater than or equal to)
129 | 
130 | ```py
131 | 100 >= 42
132 | ```
133 | 
134 | ### Logical operators
135 | 
136 | - `not` (negation)
137 | - `and`
138 | - `or`
139 | 
140 | ```py
141 | 1 < 2 and 3 < 4
142 | ```
143 | 
144 | Non-boolean values are converted to booleans. The following result in `true`:
145 | 
146 | - numbers greater than zero
147 | - non-empty string
148 | - array with at least one item
149 | - map with at least one key/value pair
150 | 
151 | ### String operators
152 | 
153 | - Indexing, e.g. `foo[0]`
154 | - Slicing, e.g. `foo[1:2]` or `foo[2:]`
155 | - `.length` pseudo-property, e.g. `foo.length`
156 | - `.lower` pseudo-property for lowercase, e.g. `foo.lower`
157 | - `.upper` pseudo-property for uppercase, e.g. `foo.upper`
158 | - `+` (concatenation)
159 | - `in` e.g. `"f" in "foo"`
160 | - `contains` e.g. `"foo" contains "f"`
161 | - `startsWith` e.g. `"foo" startsWith "f"`
162 | - `endsWith` e.g. `"foo" endsWith "o"`
163 | 
164 | Indexes are zero-based. Slice indexes are optional and are _inclusive_. `foo[1:2]` returns `el` if the `foo` is `hello`. Indexes can be negative, e.g. `foo[-1]` selects the last item in the array.
165 | 
166 | Any value concatenated with a string will result in a string. For example `"id" + 1` will result in `"id1"`.
167 | 
168 | There is no distinction between strings, bytes, or runes. Everything is treated as a string.
169 | 
170 | #### Date Comparisons
171 | 
172 | String dates & times can be compared if they follow RFC 3339 / ISO 8601 with or without timezones.
173 | 
174 | - `before`, e.g. `start before "2020-01-01"`
175 | - `after`, e.g. `created after "2020-01-01T12:00:00Z"`
176 | 
177 | ### Array/slice operators
178 | 
179 | - Indexing, e.g. `foo[1]`
180 | - Slicing, e.g. `foo[1:2]` or `foo[2:]`
181 | - `.length` pseudo-property, e.g. `foo.length`
182 | - `+` (concatenation)
183 | - `in` (has item), e.g. `1 in foo`
184 | - `contains` e.g. `foo contains 1`
185 | 
186 | Indexes are zero-based. Slice indexes are optional and are _inclusive_. `foo[1:2]` returns `[2, 3]` if the `foo` is `[1, 2, 3, 4]`. Indexes can be negative, e.g. `foo[-1]` selects the last item in the array.
187 | 
188 | #### Array/slice filtering
189 | 
190 | A `where` clause can be used to filter the items in an array. The left side of the clause is the array to be filtered, while the right side is an expression to run on each item of the array. If the right side expression evaluates to true then the item is added to the result slice. For example:
191 | 
192 | ```
193 | // Get a list of items where the item.id is bigger than 3
194 | items where id > 3
195 | 
196 | // More complex example
197 | items where (id > 3 and labels contains "best")
198 | ```
199 | 
200 | This also makes it possible to implement one/any/all/none logic:
201 | 
202 | ```
203 | // One
204 | (items where id > 3).length == 1
205 | 
206 | // Any
207 | items where id > 3
208 | (items where id > 3).length > 0
209 | 
210 | // All
211 | (items where id > 3).length == items.length
212 | 
213 | // None
214 | not (items where id > 3)
215 | (items where id > 3).length == 0
216 | ```
217 | 
218 | ### Map operators
219 | 
220 | - Accessing values, e.g. `foo.bar.baz`
221 | - `in` (has key), e.g. `"key" in foo`
222 | - `contains` e.g. `foo contains "key"`
223 | 
224 | #### Map wildcard filtering
225 | 
226 | A `where` clause can be used as a wildcard key to filter values for all keys in a map. The left side of the clause is the map to be filtered, while the right side is an expression to run on each value of the map. If the right side expression evaluates to true then the value is added to the result slice. For example, given:
227 | 
228 | ```json
229 | {
230 |   "operations": {
231 |     "id1": { "method": "GET", "path": "/op1" },
232 |     "id2": { "method": "PUT", "path": "/op2" },
233 |     "id3": { "method": "DELETE", "path": "/op3" }
234 |   }
235 | }
236 | ```
237 | 
238 | You can run:
239 | 
240 | ```
241 | // Get all operations where the HTTP method is GET
242 | operations where method == "GET"
243 | ```
244 | 
245 | And the result would be a slice of matched values:
246 | 
247 | ```json
248 | [{ "method": "GET", "path": "/op1" }]
249 | ```
250 | 
251 | ## Performance
252 | 
253 | Performance compares favorably to [antonmedv/expr](https://github.com/antonmedv/expr) for both `Eval(...)` and cached program performance, which is expected given the more limited feature set. The `slow` benchmarks include lexing/parsing/interpreting while the `cached` ones are just the interpreting step. The `complex` example expression used is non-trivial: `foo.bar / (1 * 1024 * 1024) >= 1.0 and "v" in baz and baz.length > 3 and arr[2:].length == 1`.
254 | 
255 | ```
256 | goos: darwin
257 | goarch: amd64
258 | pkg: github.com/danielgtaylor/mexpr
259 | cpu: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
260 | Benchmark/mexpr-field-slow-12           3673572       286.5 ns/op    144 B/op      6 allocs/op
261 | Benchmark/_expr-field-slow-12            956689      1276 ns/op     1096 B/op     23 allocs/op
262 | 
263 | Benchmark/mexpr-comparison-slow-12      1000000      1020 ns/op      656 B/op     16 allocs/op
264 | Benchmark/_expr-comparison-slow-12       383491      3069 ns/op     2224 B/op     38 allocs/op
265 | 
266 | Benchmark/mexpr-logical-slow-12         1000000      1063 ns/op      464 B/op     17 allocs/op
267 | Benchmark/_expr-logical-slow-12          292824      4148 ns/op     2336 B/op     38 allocs/op
268 | 
269 | Benchmark/mexpr-math-slow-12            1000000      1035 ns/op      656 B/op     16 allocs/op
270 | Benchmark/_expr-math-slow-12             399708      3004 ns/op     2184 B/op     38 allocs/op
271 | 
272 | Benchmark/mexpr-string-slow-12          1822945       655.6 ns/op    258 B/op     10 allocs/op
273 | Benchmark/_expr-string-slow-12           428604      2508 ns/op     1640 B/op     35 allocs/op
274 | 
275 | Benchmark/mexpr-index-slow-12           2015856       592.0 ns/op    280 B/op     10 allocs/op
276 | Benchmark/_expr-index-slow-12            517360      2301 ns/op     1872 B/op     30 allocs/op
277 | 
278 | Benchmark/mexpr-complex-slow-12          244039      5078 ns/op     2232 B/op     64 allocs/op
279 | Benchmark/_expr-complex-slow-12           69387     16825 ns/op    14378 B/op    107 allocs/op
280 | 
281 | Benchmark/mexpr-field-cached-12       100000000        11.37 ns/op     0 B/op      0 allocs/op
282 | Benchmark/_expr-field-cached-12         7761153       146.5 ns/op     48 B/op      2 allocs/op
283 | 
284 | Benchmark/mexpr-comparison-cached-12   38098502        30.93 ns/op     0 B/op      0 allocs/op
285 | Benchmark/_expr-comparison-cached-12    4563463       251.0 ns/op     64 B/op      3 allocs/op
286 | 
287 | Benchmark/mexpr-logical-cached-12      37563720        31.35 ns/op     0 B/op      0 allocs/op
288 | Benchmark/_expr-logical-cached-12      11000991       105.9 ns/op     32 B/op      1 allocs/op
289 | 
290 | Benchmark/mexpr-math-cached-12         24463279        47.41 ns/op     8 B/op      1 allocs/op
291 | Benchmark/_expr-math-cached-12          4531693       268.0 ns/op     72 B/op      4 allocs/op
292 | 
293 | Benchmark/mexpr-string-cached-12       43399368        26.83 ns/op     0 B/op      0 allocs/op
294 | Benchmark/_expr-string-cached-12        7302940       162.0 ns/op     48 B/op      2 allocs/op
295 | 
296 | Benchmark/mexpr-index-cached-12        45289230        25.67 ns/op     0 B/op      0 allocs/op
297 | Benchmark/_expr-index-cached-12         6057562       180.0 ns/op     48 B/op      2 allocs/op
298 | 
299 | Benchmark/mexpr-complex-cached-12       4271955       278.7 ns/op     40 B/op      3 allocs/op
300 | Benchmark/_expr-complex-cached-12       1456266       818.7 ns/op    208 B/op      9 allocs/op
301 | 
302 | ```
303 | 
304 | On average mexpr is around 3-10x faster for both full parsing and cached performance.
305 | 
306 | ## References
307 | 
308 | These were a big help in understanding how Pratt parsers work:
309 | 
310 | - https://dev.to/jrop/pratt-parsing
311 | - https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
312 | - https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
313 | - https://www.oilshell.org/blog/2017/03/31.html
314 | 


--------------------------------------------------------------------------------
/conversions.go:
--------------------------------------------------------------------------------
  1 | package mexpr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"time"
  7 | )
  8 | 
  9 | func isNumber(v interface{}) bool {
 10 | 	switch v.(type) {
 11 | 	case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
 12 | 		return true
 13 | 	case float32, float64:
 14 | 		return true
 15 | 	}
 16 | 	return false
 17 | }
 18 | 
 19 | func toNumber(ast *Node, v interface{}) (float64, Error) {
 20 | 	switch n := v.(type) {
 21 | 	case float64:
 22 | 		return n, nil
 23 | 	case int:
 24 | 		return float64(n), nil
 25 | 	case int8:
 26 | 		return float64(n), nil
 27 | 	case int16:
 28 | 		return float64(n), nil
 29 | 	case int32:
 30 | 		return float64(n), nil
 31 | 	case int64:
 32 | 		return float64(n), nil
 33 | 	case uint:
 34 | 		return float64(n), nil
 35 | 	case uint8:
 36 | 		return float64(n), nil
 37 | 	case uint16:
 38 | 		return float64(n), nil
 39 | 	case uint32:
 40 | 		return float64(n), nil
 41 | 	case uint64:
 42 | 		return float64(n), nil
 43 | 	case float32:
 44 | 		return float64(n), nil
 45 | 	}
 46 | 	return 0, NewError(ast.Offset, ast.Length, "unable to convert to number: %v", v)
 47 | }
 48 | 
 49 | func isString(v interface{}) bool {
 50 | 	switch v.(type) {
 51 | 	case string, rune, byte, []byte:
 52 | 		return true
 53 | 	}
 54 | 	return false
 55 | }
 56 | 
 57 | func toString(v interface{}) string {
 58 | 	switch s := v.(type) {
 59 | 	case string:
 60 | 		return s
 61 | 	case rune:
 62 | 		return string(s)
 63 | 	case byte:
 64 | 		return string(s)
 65 | 	case []byte:
 66 | 		return string(s)
 67 | 	}
 68 | 	return fmt.Sprintf("%v", v)
 69 | }
 70 | 
 71 | // toTime converts a string value into a time.Time if possible, otherwise
 72 | // returns a zero time.
 73 | func toTime(v interface{}) time.Time {
 74 | 	vStr := toString(v)
 75 | 	if t, err := time.Parse(time.RFC3339, vStr); err == nil {
 76 | 		return t
 77 | 	}
 78 | 	if t, err := time.Parse("2006-01-02T15:04:05", vStr); err == nil {
 79 | 		return t
 80 | 	}
 81 | 	if t, err := time.Parse("2006-01-02", vStr); err == nil {
 82 | 		return t
 83 | 	}
 84 | 	return time.Time{}
 85 | }
 86 | 
 87 | func isSlice(v interface{}) bool {
 88 | 	if _, ok := v.([]interface{}); ok {
 89 | 		return true
 90 | 	}
 91 | 	return false
 92 | }
 93 | 
 94 | func toBool(v interface{}) bool {
 95 | 	switch n := v.(type) {
 96 | 	case bool:
 97 | 		return n
 98 | 	case int:
 99 | 		return n > 0
100 | 	case int8:
101 | 		return n > 0
102 | 	case int16:
103 | 		return n > 0
104 | 	case int32:
105 | 		return n > 0
106 | 	case int64:
107 | 		return n > 0
108 | 	case uint:
109 | 		return n > 0
110 | 	case uint8:
111 | 		return n > 0
112 | 	case uint16:
113 | 		return n > 0
114 | 	case uint32:
115 | 		return n > 0
116 | 	case uint64:
117 | 		return n > 0
118 | 	case float32:
119 | 		return n > 0
120 | 	case float64:
121 | 		return n > 0
122 | 	case string:
123 | 		return len(n) > 0
124 | 	case []byte:
125 | 		return len(n) > 0
126 | 	case []interface{}:
127 | 		return len(n) > 0
128 | 	case map[string]interface{}:
129 | 		return len(n) > 0
130 | 	case map[any]any:
131 | 		return len(n) > 0
132 | 	}
133 | 	return false
134 | }
135 | 
136 | // normalize an input for equality checks. All numbers -> float64, []byte to
137 | // string, etc. Since `rune` is an alias for int32, we can't differentiate it
138 | // for comparison with strings.
139 | func normalize(v interface{}) interface{} {
140 | 	switch n := v.(type) {
141 | 	case int:
142 | 		return float64(n)
143 | 	case int8:
144 | 		return float64(n)
145 | 	case int16:
146 | 		return float64(n)
147 | 	case int32:
148 | 		return float64(n)
149 | 	case int64:
150 | 		return float64(n)
151 | 	case uint:
152 | 		return float64(n)
153 | 	case uint8:
154 | 		return float64(n)
155 | 	case uint16:
156 | 		return float64(n)
157 | 	case uint32:
158 | 		return float64(n)
159 | 	case uint64:
160 | 		return float64(n)
161 | 	case float32:
162 | 		return float64(n)
163 | 	case []byte:
164 | 		return string(n)
165 | 	}
166 | 
167 | 	return v
168 | }
169 | 
170 | // deepEqual returns whether two values are deeply equal.
171 | func deepEqual(left, right any) bool {
172 | 	l := normalize(left)
173 | 	r := normalize(right)
174 | 
175 | 	// Optimization for simple types to prevent allocations
176 | 	switch l.(type) {
177 | 	case float64:
178 | 		if f, ok := r.(float64); ok {
179 | 			return l == f
180 | 		}
181 | 	case string:
182 | 		if s, ok := r.(string); ok {
183 | 			return l == s
184 | 		}
185 | 	}
186 | 
187 | 	// Otherwise, just use the built-in deep equality check.
188 | 	return reflect.DeepEqual(left, right)
189 | }
190 | 


--------------------------------------------------------------------------------
/error.go:
--------------------------------------------------------------------------------
 1 | package mexpr
 2 | 
 3 | import "fmt"
 4 | 
 5 | // Error represents an error at a specific location.
 6 | type Error interface {
 7 | 	Error() string
 8 | 
 9 | 	// Offset returns the character offset of the error within the experssion.
10 | 	Offset() uint16
11 | 
12 | 	// Length returns the length in bytes after the offset where the error ends.
13 | 	Length() uint8
14 | 
15 | 	// Pretty prints out a message with a pointer to the source location of the
16 | 	// error.
17 | 	Pretty(source string) string
18 | }
19 | 
20 | type exprErr struct {
21 | 	offset  uint16
22 | 	length  uint8
23 | 	message string
24 | }
25 | 
26 | func (e *exprErr) Error() string {
27 | 	return e.message
28 | }
29 | 
30 | func (e *exprErr) Offset() uint16 {
31 | 	return e.offset
32 | }
33 | 
34 | func (e *exprErr) Length() uint8 {
35 | 	return e.length
36 | }
37 | 
38 | func (e *exprErr) Pretty(source string) string {
39 | 	msg := e.Error() + "\n" + source + "\n"
40 | 	for i := uint16(0); i < e.offset; i++ {
41 | 		msg += "."
42 | 	}
43 | 	for i := uint8(0); i < e.length; i++ {
44 | 		msg += "^"
45 | 	}
46 | 	return msg
47 | }
48 | 
49 | // NewError creates a new error at a specific location.
50 | func NewError(offset uint16, length uint8, format string, a ...interface{}) Error {
51 | 	return &exprErr{
52 | 		offset:  offset,
53 | 		length:  length,
54 | 		message: fmt.Sprintf(format, a...),
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/expr.go:
--------------------------------------------------------------------------------
 1 | // Package mexpr provides a simple expression parser.
 2 | package mexpr
 3 | 
 4 | // Parse an expression and return the abstract syntax tree. If `types` is
 5 | // passed, it should be a set of representative example values for the input
 6 | // which will be used to type check the expression against.
 7 | func Parse(expression string, types any, options ...InterpreterOption) (*Node, Error) {
 8 | 	l := NewLexer(expression)
 9 | 	p := NewParser(l)
10 | 	ast, err := p.Parse()
11 | 	if err != nil {
12 | 		return nil, err
13 | 	}
14 | 	if types != nil {
15 | 		if err := TypeCheck(ast, types, options...); err != nil {
16 | 			return ast, err
17 | 		}
18 | 	}
19 | 	return ast, nil
20 | }
21 | 
22 | // TypeCheck will take a parsed AST and type check against the given input
23 | // structure with representative example values.
24 | func TypeCheck(ast *Node, types any, options ...InterpreterOption) Error {
25 | 	i := NewTypeChecker(ast, options...)
26 | 	return i.Run(types)
27 | }
28 | 
29 | // Run executes an AST with the given input and returns the output.
30 | func Run(ast *Node, input any, options ...InterpreterOption) (any, Error) {
31 | 	i := NewInterpreter(ast, options...)
32 | 	return i.Run(input)
33 | }
34 | 
35 | // Eval is a convenience function which lexes, parses, and executes an
36 | // expression with the given input. If you plan to execute the expression
37 | // multiple times consider caching the output of `Parse(...)` instead for a
38 | // big speed improvement.
39 | func Eval(expression string, input any, options ...InterpreterOption) (any, Error) {
40 | 	// No need to type check because we are about to run with the input.
41 | 	ast, err := Parse(expression, nil)
42 | 	if err != nil {
43 | 		return nil, err
44 | 	}
45 | 	if ast == nil {
46 | 		return nil, nil
47 | 	}
48 | 	return Run(ast, input, options...)
49 | }
50 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/danielgtaylor/mexpr
2 | 
3 | go 1.18
4 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielgtaylor/mexpr/cc7bbaa8c7fb4df8daeb27e20339a062fc342e47/go.sum


--------------------------------------------------------------------------------
/interpreter.go:
--------------------------------------------------------------------------------
  1 | package mexpr
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"strings"
  6 | )
  7 | 
  8 | // InterpreterOption passes configuration settings when creating a new
  9 | // interpreter instance.
 10 | type InterpreterOption int
 11 | 
 12 | const (
 13 | 	// StrictMode does extra checks like making sure identifiers exist.
 14 | 	StrictMode InterpreterOption = iota
 15 | 
 16 | 	// UnqoutedStrings enables the use of unquoted string values rather than
 17 | 	// returning nil or a missing identifier error. Identifiers get priority
 18 | 	// over unquoted strings.
 19 | 	UnquotedStrings
 20 | )
 21 | 
 22 | // mapValues returns the values of the map m.
 23 | // The values will be in an indeterminate order.
 24 | func mapValues[M ~map[K]V, K comparable, V any](m M) []V {
 25 | 	r := make([]V, 0, len(m))
 26 | 	for _, v := range m {
 27 | 		r = append(r, v)
 28 | 	}
 29 | 	return r
 30 | }
 31 | 
 32 | // checkBounds returns an error if the index is out of bounds.
 33 | func checkBounds(ast *Node, input any, idx int) Error {
 34 | 	if v, ok := input.([]any); ok {
 35 | 		if idx < 0 || idx >= len(v) {
 36 | 			return NewError(ast.Offset, ast.Length, "invalid index %d for slice of length %d", int(idx), len(v))
 37 | 		}
 38 | 	}
 39 | 	if v, ok := input.(string); ok {
 40 | 		if idx < 0 || idx >= len(v) {
 41 | 			return NewError(ast.Offset, ast.Length, "invalid index %d for string of length %d", int(idx), len(v))
 42 | 		}
 43 | 	}
 44 | 	return nil
 45 | }
 46 | 
 47 | // Interpreter executes expression AST programs.
 48 | type Interpreter interface {
 49 | 	Run(value any) (any, Error)
 50 | }
 51 | 
 52 | // NewInterpreter returns an interpreter for the given AST.
 53 | func NewInterpreter(ast *Node, options ...InterpreterOption) Interpreter {
 54 | 	strict := false
 55 | 	unquoted := false
 56 | 
 57 | 	for _, opt := range options {
 58 | 		switch opt {
 59 | 		case StrictMode:
 60 | 			strict = true
 61 | 		case UnquotedStrings:
 62 | 			unquoted = true
 63 | 		}
 64 | 	}
 65 | 
 66 | 	return &interpreter{
 67 | 		ast:      ast,
 68 | 		strict:   strict,
 69 | 		unquoted: unquoted,
 70 | 	}
 71 | }
 72 | 
 73 | type interpreter struct {
 74 | 	ast             *Node
 75 | 	prevFieldSelect bool
 76 | 	strict          bool
 77 | 	unquoted        bool
 78 | }
 79 | 
 80 | func (i *interpreter) Run(value any) (any, Error) {
 81 | 	return i.run(i.ast, value)
 82 | }
 83 | 
 84 | func (i *interpreter) run(ast *Node, value any) (any, Error) {
 85 | 	if ast == nil {
 86 | 		return nil, nil
 87 | 	}
 88 | 
 89 | 	fromSelect := i.prevFieldSelect
 90 | 	i.prevFieldSelect = false
 91 | 
 92 | 	switch ast.Type {
 93 | 	case NodeIdentifier:
 94 | 		switch ast.Value.(string) {
 95 | 		case "@":
 96 | 			return value, nil
 97 | 		case "length":
 98 | 			// Special pseudo-property to get the value's length.
 99 | 			if s, ok := value.(string); ok {
100 | 				return len(s), nil
101 | 			}
102 | 			if a, ok := value.([]any); ok {
103 | 				return len(a), nil
104 | 			}
105 | 		case "lower":
106 | 			if s, ok := value.(string); ok {
107 | 				return strings.ToLower(s), nil
108 | 			}
109 | 		case "upper":
110 | 			if s, ok := value.(string); ok {
111 | 				return strings.ToUpper(s), nil
112 | 			}
113 | 		}
114 | 		if m, ok := value.(map[string]any); ok {
115 | 			if v, ok := m[ast.Value.(string)]; ok {
116 | 				return v, nil
117 | 			}
118 | 		}
119 | 		if m, ok := value.(map[any]any); ok {
120 | 			if v, ok := m[ast.Value]; ok {
121 | 				return v, nil
122 | 			}
123 | 		}
124 | 		if i.unquoted && !fromSelect {
125 | 			// Identifiers not found in the map are treated as strings, but only if
126 | 			// the previous item was not a `.` like `obj.field`.
127 | 			return ast.Value.(string), nil
128 | 		}
129 | 		if !i.strict {
130 | 			return nil, nil
131 | 		}
132 | 		return nil, NewError(ast.Offset, ast.Length, "cannot get %v from %v", ast.Value, value)
133 | 	case NodeFieldSelect:
134 | 		i.prevFieldSelect = true
135 | 		leftValue, err := i.run(ast.Left, value)
136 | 		if err != nil {
137 | 			return nil, err
138 | 		}
139 | 		i.prevFieldSelect = true
140 | 		return i.run(ast.Right, leftValue)
141 | 	case NodeArrayIndex:
142 | 		resultLeft, err := i.run(ast.Left, value)
143 | 		if err != nil {
144 | 			return nil, err
145 | 		}
146 | 		if !isSlice(resultLeft) && !isString(resultLeft) {
147 | 			return nil, NewError(ast.Offset, ast.Length, "can only index strings or arrays but got %v", resultLeft)
148 | 		}
149 | 		resultRight, err := i.run(ast.Right, value)
150 | 		if err != nil {
151 | 			return nil, err
152 | 		}
153 | 		if isSlice(resultRight) && len(resultRight.([]any)) == 2 {
154 | 			start, err := toNumber(ast, resultRight.([]any)[0])
155 | 			if err != nil {
156 | 				return nil, err
157 | 			}
158 | 			end, err := toNumber(ast, resultRight.([]any)[1])
159 | 			if err != nil {
160 | 				return nil, err
161 | 			}
162 | 			if left, ok := resultLeft.([]any); ok {
163 | 				if start < 0 {
164 | 					start += float64(len(left))
165 | 				}
166 | 				if end < 0 {
167 | 					end += float64(len(left))
168 | 				}
169 | 				if err := checkBounds(ast, left, int(start)); err != nil {
170 | 					return nil, err
171 | 				}
172 | 				if err := checkBounds(ast, left, int(end)); err != nil {
173 | 					return nil, err
174 | 				}
175 | 				if int(start) > int(end) {
176 | 					return nil, NewError(ast.Offset, ast.Length, "slice start cannot be greater than end")
177 | 				}
178 | 				return left[int(start) : int(end)+1], nil
179 | 			}
180 | 			left := toString(resultLeft)
181 | 			if start < 0 {
182 | 				start += float64(len(left))
183 | 			}
184 | 			if end < 0 {
185 | 				end += float64(len(left))
186 | 			}
187 | 			if err := checkBounds(ast, left, int(start)); err != nil {
188 | 				return nil, err
189 | 			}
190 | 			if int(start) > int(end) {
191 | 				return nil, NewError(ast.Offset, ast.Length, "string slice start cannot be greater than end")
192 | 			}
193 | 			if err := checkBounds(ast, left, int(end)); err != nil {
194 | 				return nil, err
195 | 			}
196 | 			return left[int(start) : int(end)+1], nil
197 | 		}
198 | 		if isNumber(resultRight) {
199 | 			idx, err := toNumber(ast, resultRight)
200 | 			if err != nil {
201 | 				return nil, err
202 | 			}
203 | 			if left, ok := resultLeft.([]any); ok {
204 | 				if idx < 0 {
205 | 					idx += float64(len(left))
206 | 				}
207 | 				if err := checkBounds(ast, left, int(idx)); err != nil {
208 | 					return nil, err
209 | 				}
210 | 				return left[int(idx)], nil
211 | 			}
212 | 			left := toString(resultLeft)
213 | 			if idx < 0 {
214 | 				idx += float64(len(left))
215 | 			}
216 | 			if err := checkBounds(ast, left, int(idx)); err != nil {
217 | 				return nil, err
218 | 			}
219 | 			return string(left[int(idx)]), nil
220 | 		}
221 | 		return nil, NewError(ast.Offset, ast.Length, "array index must be number or slice %v", resultRight)
222 | 	case NodeSlice:
223 | 		resultLeft, err := i.run(ast.Left, value)
224 | 		if err != nil {
225 | 			return nil, err
226 | 		}
227 | 		resultRight, err := i.run(ast.Right, value)
228 | 		if err != nil {
229 | 			return nil, err
230 | 		}
231 | 		ast.Value.([]any)[0] = resultLeft
232 | 		ast.Value.([]any)[1] = resultRight
233 | 		return ast.Value, nil
234 | 	case NodeLiteral:
235 | 		return ast.Value, nil
236 | 	case NodeSign:
237 | 		resultRight, err := i.run(ast.Right, value)
238 | 		if err != nil {
239 | 			return nil, err
240 | 		}
241 | 		right, err := toNumber(ast, resultRight)
242 | 		if err != nil {
243 | 			return nil, err
244 | 		}
245 | 		if ast.Value.(string) == "-" {
246 | 			right = -right
247 | 		}
248 | 		return right, nil
249 | 	case NodeAdd, NodeSubtract, NodeMultiply, NodeDivide, NodeModulus, NodePower:
250 | 		resultLeft, err := i.run(ast.Left, value)
251 | 		if err != nil {
252 | 			return nil, err
253 | 		}
254 | 		resultRight, err := i.run(ast.Right, value)
255 | 		if err != nil {
256 | 			return nil, err
257 | 		}
258 | 		if ast.Type == NodeAdd {
259 | 			if isString(resultLeft) || isString(resultRight) {
260 | 				return toString(resultLeft) + toString(resultRight), nil
261 | 			}
262 | 			if isSlice(resultLeft) && isSlice(resultRight) {
263 | 				tmp := append([]any{}, resultLeft.([]any)...)
264 | 				return append(tmp, resultRight.([]any)...), nil
265 | 			}
266 | 		}
267 | 		if isNumber(resultLeft) && isNumber(resultRight) {
268 | 			left, err := toNumber(ast.Left, resultLeft)
269 | 			if err != nil {
270 | 				return nil, err
271 | 			}
272 | 			right, err := toNumber(ast.Right, resultRight)
273 | 			if err != nil {
274 | 				return nil, err
275 | 			}
276 | 			switch ast.Type {
277 | 			case NodeAdd:
278 | 				return left + right, nil
279 | 			case NodeSubtract:
280 | 				return left - right, nil
281 | 			case NodeMultiply:
282 | 				return left * right, nil
283 | 			case NodeDivide:
284 | 				if right == 0.0 {
285 | 					return nil, NewError(ast.Offset, ast.Length, "cannot divide by zero")
286 | 				}
287 | 				return left / right, nil
288 | 			case NodeModulus:
289 | 				if int(right) == 0 {
290 | 					return nil, NewError(ast.Offset, ast.Length, "cannot divide by zero")
291 | 				}
292 | 				return int(left) % int(right), nil
293 | 			case NodePower:
294 | 				return math.Pow(left, right), nil
295 | 			}
296 | 		}
297 | 		return nil, NewError(ast.Offset, ast.Length, "cannot add incompatible types %v and %v", resultLeft, resultRight)
298 | 	case NodeEqual, NodeNotEqual, NodeLessThan, NodeLessThanEqual, NodeGreaterThan, NodeGreaterThanEqual:
299 | 		resultLeft, err := i.run(ast.Left, value)
300 | 		if err != nil {
301 | 			return nil, err
302 | 		}
303 | 		resultRight, err := i.run(ast.Right, value)
304 | 		if err != nil {
305 | 			return nil, err
306 | 		}
307 | 		if ast.Type == NodeEqual {
308 | 			return deepEqual(resultLeft, resultRight), nil
309 | 		}
310 | 		if ast.Type == NodeNotEqual {
311 | 			return !deepEqual(resultLeft, resultRight), nil
312 | 		}
313 | 
314 | 		left, err := toNumber(ast.Left, resultLeft)
315 | 		if err != nil {
316 | 			return nil, err
317 | 		}
318 | 		right, err := toNumber(ast.Right, resultRight)
319 | 		if err != nil {
320 | 			return nil, err
321 | 		}
322 | 
323 | 		switch ast.Type {
324 | 		case NodeGreaterThan:
325 | 			return left > right, nil
326 | 		case NodeGreaterThanEqual:
327 | 			return left >= right, nil
328 | 		case NodeLessThan:
329 | 			return left < right, nil
330 | 		case NodeLessThanEqual:
331 | 			return left <= right, nil
332 | 		}
333 | 	case NodeAnd, NodeOr:
334 | 		resultLeft, err := i.run(ast.Left, value)
335 | 		if err != nil {
336 | 			return nil, err
337 | 		}
338 | 		resultRight, err := i.run(ast.Right, value)
339 | 		if err != nil {
340 | 			return nil, err
341 | 		}
342 | 		left := toBool(resultLeft)
343 | 		right := toBool(resultRight)
344 | 		switch ast.Type {
345 | 		case NodeAnd:
346 | 			return left && right, nil
347 | 		case NodeOr:
348 | 			return left || right, nil
349 | 		}
350 | 	case NodeBefore, NodeAfter:
351 | 		resultLeft, err := i.run(ast.Left, value)
352 | 		if err != nil {
353 | 			return nil, err
354 | 		}
355 | 		leftTime := toTime(resultLeft)
356 | 		if leftTime.IsZero() {
357 | 			return nil, NewError(ast.Offset, ast.Length, "unable to convert %v to date or time", resultLeft)
358 | 		}
359 | 		resultRight, err := i.run(ast.Right, value)
360 | 		if err != nil {
361 | 			return nil, err
362 | 		}
363 | 		rightTime := toTime(resultRight)
364 | 		if rightTime.IsZero() {
365 | 			return nil, NewError(ast.Offset, ast.Length, "unable to convert %v to date or time", resultRight)
366 | 		}
367 | 		if ast.Type == NodeBefore {
368 | 			return leftTime.Before(rightTime), nil
369 | 		} else {
370 | 			return leftTime.After(rightTime), nil
371 | 		}
372 | 	case NodeIn, NodeContains, NodeStartsWith, NodeEndsWith:
373 | 		resultLeft, err := i.run(ast.Left, value)
374 | 		if err != nil {
375 | 			return nil, err
376 | 		}
377 | 		resultRight, err := i.run(ast.Right, value)
378 | 		if err != nil {
379 | 			return nil, err
380 | 		}
381 | 		switch ast.Type {
382 | 		case NodeIn:
383 | 			if a, ok := resultRight.([]any); ok {
384 | 				for _, item := range a {
385 | 					if deepEqual(item, resultLeft) {
386 | 						return true, nil
387 | 					}
388 | 				}
389 | 				return false, nil
390 | 			}
391 | 			if m, ok := resultRight.(map[string]any); ok {
392 | 				if m[toString(resultLeft)] != nil {
393 | 					return true, nil
394 | 				}
395 | 				return false, nil
396 | 			}
397 | 			if m, ok := resultRight.(map[any]any); ok {
398 | 				if m[resultLeft] != nil {
399 | 					return true, nil
400 | 				}
401 | 				return false, nil
402 | 			}
403 | 			return strings.Contains(toString(resultRight), toString(resultLeft)), nil
404 | 		case NodeContains:
405 | 			if a, ok := resultLeft.([]any); ok {
406 | 				for _, item := range a {
407 | 					if deepEqual(item, resultRight) {
408 | 						return true, nil
409 | 					}
410 | 				}
411 | 				return false, nil
412 | 			}
413 | 			if m, ok := resultLeft.(map[string]any); ok {
414 | 				if m[toString(resultRight)] != nil {
415 | 					return true, nil
416 | 				}
417 | 				return false, nil
418 | 			}
419 | 			if m, ok := resultLeft.(map[any]any); ok {
420 | 				if m[resultRight] != nil {
421 | 					return true, nil
422 | 				}
423 | 				return false, nil
424 | 			}
425 | 			return strings.Contains(toString(resultLeft), toString(resultRight)), nil
426 | 		case NodeStartsWith:
427 | 			return strings.HasPrefix(toString(resultLeft), toString(resultRight)), nil
428 | 		case NodeEndsWith:
429 | 			return strings.HasSuffix(toString(resultLeft), toString(resultRight)), nil
430 | 		}
431 | 	case NodeNot:
432 | 		resultRight, err := i.run(ast.Right, value)
433 | 		if err != nil {
434 | 			return nil, err
435 | 		}
436 | 		right := toBool(resultRight)
437 | 		return !right, nil
438 | 	case NodeWhere:
439 | 		resultLeft, err := i.run(ast.Left, value)
440 | 		if err != nil {
441 | 			return nil, err
442 | 		}
443 | 		results := []any{}
444 | 		if resultLeft == nil {
445 | 			return nil, nil
446 | 		}
447 | 		if m, ok := resultLeft.(map[string]any); ok {
448 | 			resultLeft = mapValues(m)
449 | 		}
450 | 		if m, ok := resultLeft.(map[any]any); ok {
451 | 			values := make([]any, 0, len(m))
452 | 			for _, v := range m {
453 | 				values = append(values, v)
454 | 			}
455 | 			resultLeft = values
456 | 		}
457 | 		if leftSlice, ok := resultLeft.([]any); ok {
458 | 			for _, item := range leftSlice {
459 | 				// In an unquoted string scenario it makes no sense for the first/only
460 | 				// token after a `where` clause to be treated as a string. Instead we
461 | 				// treat a `where` the same as a field select `.` in this scenario.
462 | 				i.prevFieldSelect = true
463 | 				resultRight, err := i.run(ast.Right, item)
464 | 				if i.strict && err != nil {
465 | 					return nil, err
466 | 				}
467 | 				if toBool(resultRight) {
468 | 					results = append(results, item)
469 | 				}
470 | 			}
471 | 		}
472 | 		return results, nil
473 | 	}
474 | 	return nil, nil
475 | }
476 | 


--------------------------------------------------------------------------------
/interpreter_test.go:
--------------------------------------------------------------------------------
  1 | package mexpr
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"reflect"
  6 | 	"strings"
  7 | 	"testing"
  8 | )
  9 | 
 10 | func TestInterpreter(t *testing.T) {
 11 | 	type test struct {
 12 | 		expr        string
 13 | 		input       string
 14 | 		inputParsed any
 15 | 		skipTC      bool
 16 | 		opts        []InterpreterOption
 17 | 		err         string
 18 | 		output      interface{}
 19 | 	}
 20 | 	cases := []test{
 21 | 		// Add/sub
 22 | 		{expr: "1 + 2 - 3", output: 0.0},
 23 | 		{expr: "-1 + +3", output: 2.0},
 24 | 		{expr: "-1 + -3 - -4", output: 0.0},
 25 | 		{expr: `0.5 + 0.2`, output: 0.7},
 26 | 		{expr: `.5 + .2`, output: 0.7},
 27 | 		{expr: `1_000_000 + 1`, output: 1000001.0},
 28 | 		// Mul/div
 29 | 		{expr: "4 * 5 / 10", output: 2.0},
 30 | 		{expr: `19 % x`, input: `{"x": 5}`, output: 4},
 31 | 		// Power
 32 | 		{expr: "2^3", output: 8.0},
 33 | 		{expr: "2^3^2", output: 512.0},
 34 | 		{expr: "16^.5", output: 4.0},
 35 | 		// Parentheses
 36 | 		{expr: "((1 + (2)) * 3)", output: 9.0},
 37 | 		// Comparison
 38 | 		{expr: "1 < 2", output: true},
 39 | 		{expr: "1 > 2", output: false},
 40 | 		{expr: "1 > 1", output: false},
 41 | 		{expr: "1 >= 1", output: true},
 42 | 		{expr: "1 < 1", output: false},
 43 | 		{expr: "1 <= 1", output: true},
 44 | 		{expr: "1 == 1", output: true},
 45 | 		{expr: "1 == 2", output: false},
 46 | 		{expr: "1 != 1", output: false},
 47 | 		{expr: "1 != 2", output: true},
 48 | 		{expr: "x.length == 3", input: `{"x": "abc"}`, output: true},
 49 | 		{expr: `19 % 5 == 4`, output: true},
 50 | 		{expr: `foo == 1`, input: `{"foo": []}`, output: false},
 51 | 		{expr: `foo == 1`, input: `{"foo": {}}`, output: false},
 52 | 		// Boolean comparisons
 53 | 		{expr: "1 < 2 and 1 > 2", output: false},
 54 | 		{expr: "1 < 2 and 2 > 1", output: true},
 55 | 		{expr: "1 < 2 or 1 > 2", output: true},
 56 | 		{expr: "1 < 2 or 2 > 1", output: true},
 57 | 		{expr: `1 and "a"`, output: true},
 58 | 		// Negation
 59 | 		{expr: "not (1 < 2)", output: false},
 60 | 		{expr: "not (1 < 2) and (3 < 4)", output: false},
 61 | 		{expr: "not foo.bar", input: `{"foo": {"bar": true}}`, output: false},
 62 | 		{expr: "not foo[0].bar", input: `{"foo": [{"bar": true}]}`, output: false},
 63 | 		// Strings
 64 | 		{expr: `"foo" == "foo"`, output: true},
 65 | 		{expr: `"foo" == "bar"`, output: false},
 66 | 		{expr: `"foo\"bar"`, output: `foo"bar`},
 67 | 		{expr: `"foo" + "bar" == "foobar"`, output: true},
 68 | 		{expr: `foo + "a"`, input: `{"foo": 1}`, output: "1a"},
 69 | 		{expr: `foo + bar`, input: `{"foo": "id", "bar": 1}`, output: "id1"},
 70 | 		{expr: `foo[0]`, input: `{"foo": "hello"}`, output: "h"},
 71 | 		{expr: `foo[-1]`, input: `{"foo": "hello"}`, output: "o"},
 72 | 		{expr: `foo[0:-3]`, input: `{"foo": "hello"}`, output: "hel"},
 73 | 		// Unquoted strings
 74 | 		{expr: `"foo" == foo`, skipTC: true, output: false},
 75 | 		{expr: `"foo" == foo`, opts: []InterpreterOption{UnquotedStrings}, output: true},
 76 | 		{expr: `"foo" == bar`, opts: []InterpreterOption{UnquotedStrings}, output: false},
 77 | 		{expr: `foo == foo`, opts: []InterpreterOption{UnquotedStrings}, output: true},
 78 | 		{expr: `foo == foo`, opts: []InterpreterOption{UnquotedStrings, StrictMode}, output: true},
 79 | 		{expr: `foo + 1`, opts: []InterpreterOption{UnquotedStrings}, output: "foo1"},
 80 | 		{expr: `@.foo + 1`, opts: []InterpreterOption{UnquotedStrings}, err: "cannot add incompatible types"},
 81 | 		{expr: `@.foo + 1`, opts: []InterpreterOption{UnquotedStrings, StrictMode}, err: "cannot get foo"},
 82 | 		{expr: `foo.bar == bar`, opts: []InterpreterOption{UnquotedStrings}, output: false},
 83 | 		{expr: `foo.bar == bar`, skipTC: true, opts: []InterpreterOption{UnquotedStrings}, input: `{"foo": {}}`, output: false},
 84 | 		{expr: `foo.bar == baz`, opts: []InterpreterOption{UnquotedStrings}, input: `{"foo": {"bar": "baz"}}`, output: true},
 85 | 		{expr: `(items where foo).length == 1`, input: `{"items": [{"foo": 1}, {"bar": 2}, {"baz": 3}]}`, opts: []InterpreterOption{UnquotedStrings}, output: true},
 86 | 		{expr: `(items where @.foo).length == 1`, input: `{"items": [{"foo": 1}, {"bar": 2}, {"baz": 3}]}`, opts: []InterpreterOption{UnquotedStrings}, output: true},
 87 | 		{expr: `(items where foo in id).length == 1`, input: `{"items": [{"id": "foo123"}, {"id": "bar456"}, {"id": "baz789"}]}`, opts: []InterpreterOption{UnquotedStrings}, output: true},
 88 | 		// Identifier / fields
 89 | 		{expr: "foo", input: `{"foo": 1.0}`, output: 1.0},
 90 | 		{expr: "foo.bar.baz", input: `{"foo": {"bar": {"baz": 1.0}}}`, output: 1.0},
 91 | 		{expr: `foo == "foo"`, input: `{"foo": "foo"}`, output: true},
 92 | 		{expr: `foo.in.not`, input: `{"foo": {"in": {"not": 1}}}`, output: 1.0},
 93 | 		{expr: `@`, input: `{"hello": "world"}`, output: map[string]interface{}{"hello": "world"}},
 94 | 		{expr: `hello.@`, input: `{"hello": "world"}`, output: "world"},
 95 | 		// Arrays
 96 | 		{expr: "foo[0]", input: `{"foo": [1, 2]}`, output: 1.0},
 97 | 		{expr: "foo[-1]", input: `{"foo": [1, 2]}`, output: 2.0},
 98 | 		{expr: "foo[:1]", input: `{"foo": [1, 2, 3]}`, output: []interface{}{1.0, 2.0}},
 99 | 		{expr: "foo[2:]", input: `{"foo": [1, 2, 3]}`, output: []interface{}{3.0}},
100 | 		{expr: "foo[:-1]", input: `{"foo": [1, 2, 3]}`, output: []interface{}{1.0, 2.0, 3.0}},
101 | 		{expr: "foo[1 + 2 / 2]", input: `{"foo": [1, 2, 3]}`, output: 3.0},
102 | 		{expr: "foo[1:1 + 2]", input: `{"foo": [1, 2, 3, 4]}`, output: []interface{}{2.0, 3.0, 4.0}},
103 | 		{expr: "foo[foo[0]:bar.baz * 1^2]", input: `{"foo": [1, 2, 3, 4], "bar": {"baz": 3}}`, output: []interface{}{2.0, 3.0, 4.0}},
104 | 		{expr: "foo + bar", input: `{"foo": [1, 2], "bar": [3, 4]}`, output: []interface{}{1.0, 2.0, 3.0, 4.0}},
105 | 		{expr: "foo[bar]", input: `{"foo": [1, 2, 3], "bar": [0, 1]}`, output: []interface{}{1.0, 2.0}},
106 | 		// In
107 | 		{expr: `"foo" in "foobar"`, output: true},
108 | 		{expr: `"foo" in bar`, input: `{"bar": ["foo", "other"]}`, output: true},
109 | 		{expr: `123 in 12345`, output: true},
110 | 		{expr: `1 in "best 1"`, output: true},
111 | 		{expr: `1 < 2 in "this is true"`, output: true},
112 | 		{expr: `1 < 2 in "this is false"`, output: false},
113 | 		{expr: `"bar" in foo`, input: `{"foo": {"bar": 1}}`, output: true},
114 | 		// Contains
115 | 		{expr: `"foobar" contains "foo"`, output: true},
116 | 		{expr: `"foobar" contains "baz"`, output: false},
117 | 		{expr: `labels contains "foo"`, input: `{"labels": ["foo", "bar"]}`, output: true},
118 | 		// Starts / ends with
119 | 		{expr: `"foo" startsWith "f"`, output: true},
120 | 		{expr: `"foo" startsWith "o"`, output: false},
121 | 		{expr: `foo startsWith "f"`, input: `{"foo": "foo"}`, output: true},
122 | 		{expr: `name startsWith "/groups/" + group`, input: `{"name": "/groups/foo/bar", "group": "foo"}`, output: true},
123 | 		{expr: `"foo" endsWith "f"`, output: false},
124 | 		{expr: `"foo" endsWith "o"`, output: true},
125 | 		{expr: `"id1" endsWith 1`, output: true},
126 | 		// Before / after
127 | 		{expr: `start before end`, input: `{"start": "2022-01-01T12:00:00Z", "end": "2022-01-01T23:59:59Z"}`, output: true},
128 | 		{expr: `start before end`, input: `{"start": "2022-01-01T12:00:00", "end": "2022-01-01T23:59:59"}`, output: true},
129 | 		{expr: `start before end`, input: `{"start": "2022-01-01", "end": "2022-01-02"}`, output: true},
130 | 		{expr: `start after end`, input: `{"start": "2022-01-01T12:00:00Z", "end": "2022-01-01T23:59:59Z"}`, output: false},
131 | 		// Length
132 | 		{expr: `"foo".length`, output: 3},
133 | 		{expr: `str.length`, input: `{"str": "abcdef"}`, output: 6},
134 | 		{expr: `arr.length`, input: `{"arr": [1, 2]}`, output: 2},
135 | 		// Lower/Upper
136 | 		{expr: `"foo".upper`, output: "FOO"},
137 | 		{expr: `str.lower`, input: `{"str": "ABCD"}`, output: "abcd"},
138 | 		{expr: `str.lower == abcd`, input: `{"str": "ABCD"}`, opts: []InterpreterOption{UnquotedStrings}, skipTC: true, output: true},
139 | 		// Where
140 | 		{expr: `items where id > 3`, input: `{"items": [{"id": 1}, {"id": 3}, {"id": 5}, {"id": 7}]}`, output: []interface{}{map[string]interface{}{"id": 5.0}, map[string]interface{}{"id": 7.0}}},
141 | 		{expr: `items where id > 3 where labels contains "foo"`, input: `{"items": [{"id": 1, "labels": ["foo"]}, {"id": 3}, {"id": 5, "labels": ["foo"]}, {"id": 7}]}`, output: []interface{}{map[string]interface{}{"id": 5.0, "labels": []interface{}{"foo"}}}},
142 | 		{expr: `(items where id > 3).length == 2`, input: `{"items": [{"id": 1}, {"id": 3}, {"id": 5}, {"id": 7}]}`, output: true},
143 | 		{expr: `not (items where id > 3)`, input: `{"items": [{"id": 1}, {"id": 3}, {"id": 5}, {"id": 7}]}`, output: false},
144 | 		{expr: `items where id > 3`, input: `{}`, skipTC: true, output: nil},
145 | 		{expr: `foo where method == "GET"`, input: `{"foo": {"op1": {"method": "GET", "path": "/op1"}, "op2": {"method": "PUT", "path": "/op2"}, "op3": {"method": "DELETE", "path": "/op3"}}}`, output: []any{map[string]any{"method": "GET", "path": "/op1"}}},
146 | 		{expr: `foo where method == "GET"`, inputParsed: map[any]any{"foo": map[any]any{"op1": map[any]any{"method": "GET", "path": "/op1"}, "op2": map[any]any{"method": "PUT", "path": "/op2"}, "op3": map[any]any{"method": "DELETE", "path": "/op3"}}}, output: []any{map[any]any{"method": "GET", "path": "/op1"}}},
147 | 		{expr: `items where id > 3`, input: `{"items": []}`, err: "where clause requires a non-empty array or object"},
148 | 		{expr: `items where id > 3`, input: `{"items": 1}`, skipTC: true, output: []any{}},
149 | 		// Order of operations
150 | 		{expr: "1 + 2 + 3", output: 6.0},
151 | 		{expr: "1 + 2 * 3", output: 7.0},
152 | 		{expr: "(1 + 2) * 3", output: 9.0},
153 | 		{expr: "6 / 3 + 2 * 5", output: 12.0},
154 | 		// failure
155 | 		{expr: "foo + 1", input: `{}`, err: "no property foo"},
156 | 		{expr: "6 -", err: "incomplete expression"},
157 | 		{expr: `foo.bar + "baz"`, input: `{"foo": 1}`, err: "no property bar"},
158 | 		{expr: `foo + 1`, input: `{"foo": [1, 2]}`, err: "cannot operate on incompatible types"},
159 | 		{expr: `foo > 1`, input: `{"foo": []}`, err: "cannot compare array[<nil>] with number"},
160 | 		{expr: `foo[1-]`, input: `{"foo": "hello"}`, err: "unexpected right-bracket"},
161 | 		{expr: `not (1- <= 5)`, err: "missing right operand"},
162 | 		{expr: `(1 >=)`, err: "unexpected right-paren"},
163 | 		{expr: `foo[foo[0] != bar]`, input: `{"foo": [1, 2, 3], "bar": true}`, err: "array index must be number or slice"},
164 | 		{expr: `1 < "foo"`, err: "unable to convert to number"},
165 | 		{expr: `1 <`, err: "incomplete expression"},
166 | 		{expr: `1 +`, err: "incomplete expression"},
167 | 		{expr: `1 ]`, err: "expected eof but found right-bracket"},
168 | 		{expr: `0.5 + 1"`, err: "expected eof but found string"},
169 | 		{expr: `0.5 > "some kind of string"`, err: "unable to convert to number"},
170 | 		{expr: `foo beginswith "bar"`, input: `{"foo": "bar"}`, err: "expected eof"},
171 | 		{expr: `1 / (foo * 1)`, input: `{"foo": 0}`, err: "cannot divide by zero"},
172 | 		{expr: `1 before "2020-01-01"`, err: "unable to convert 1 to date or time"},
173 | 		{expr: `"2020-01-01" after "invalid"`, err: "unable to convert invalid to date or time"},
174 | 		{expr: `a[2:0]`, input: `{"a": [0, 1, 2]}`, err: "slice start cannot be greater than end"},
175 | 		{expr: `a[2:0]`, input: `{"a": "hello"}`, err: "slice start cannot be greater than end"},
176 | 		{expr: `a[0][-7]`, input: `{"a": [[]]}`, skipTC: true, err: "invalid index"},
177 | 		{expr: `a[0]`, input: `{"a": []}`, skipTC: true, err: "invalid index"},
178 | 	}
179 | 
180 | 	for _, tc := range cases {
181 | 		t.Run(tc.expr, func(t *testing.T) {
182 | 			var input any
183 | 			if tc.inputParsed != nil {
184 | 				input = tc.inputParsed
185 | 			} else if tc.input != "" {
186 | 				if err := json.Unmarshal([]byte(tc.input), &input); err != nil {
187 | 					t.Fatal(err)
188 | 				}
189 | 			}
190 | 			types := input
191 | 			if tc.skipTC {
192 | 				// Skip type check
193 | 				types = nil
194 | 			}
195 | 			ast, err := Parse(tc.expr, types, tc.opts...)
196 | 
197 | 			if ast != nil {
198 | 				t.Log("graph G {\n" + ast.Dot("") + "\n}")
199 | 			}
200 | 
201 | 			if tc.err != "" {
202 | 				if err != nil {
203 | 					if strings.Contains(err.Error(), tc.err) {
204 | 						return
205 | 					}
206 | 					t.Fatal(err.Pretty(tc.expr))
207 | 				}
208 | 			} else {
209 | 				if err != nil {
210 | 					t.Fatal(err.Pretty(tc.expr))
211 | 				}
212 | 			}
213 | 
214 | 			result, err := Run(ast, input, tc.opts...)
215 | 			if tc.err != "" {
216 | 				if err == nil {
217 | 					t.Fatal("expected error but found none")
218 | 				}
219 | 				if strings.Contains(err.Error(), tc.err) {
220 | 					return
221 | 				}
222 | 				t.Fatal(err.Pretty(tc.expr))
223 | 			} else {
224 | 				if err != nil {
225 | 					t.Fatal(err.Pretty(tc.expr))
226 | 				}
227 | 				if !reflect.DeepEqual(tc.output, result) {
228 | 					t.Fatalf("expected %v but found %v", tc.output, result)
229 | 				}
230 | 			}
231 | 		})
232 | 	}
233 | }
234 | 
235 | func FuzzMexpr(f *testing.F) {
236 | 	f.Fuzz(func(t *testing.T, s string) {
237 | 		Eval(s, nil)
238 | 		Eval(s, map[string]any{
239 | 			"b": true,
240 | 			"i": 5,
241 | 			"f": 1.0,
242 | 			"s": "Hello",
243 | 			"a": []any{false, 1, "a"},
244 | 			"o": map[any]any{
245 | 				"prop": 123,
246 | 			},
247 | 		})
248 | 	})
249 | }
250 | 
251 | func Benchmark(b *testing.B) {
252 | 	benchmarks := []struct {
253 | 		name   string
254 | 		mexpr  string
255 | 		expr   string
256 | 		result interface{}
257 | 	}{
258 | 		{"field", `baz`, `baz`, "value"},
259 | 		{"comparison", `foo.bar > 1000`, `foo.bar > 1000`, true},
260 | 		{"logical", `1 > 2 or 3 > 4`, `1 > 2 or 3 > 4`, false},
261 | 		{"math", `foo.bar + 1`, `foo.bar + 1`, 1000000001.0},
262 | 		{"string", `baz startsWith "va"`, `baz startsWith "va"`, true},
263 | 		{"index", `arr[1]`, `arr[1]`, 2},
264 | 		{
265 | 			name:   "complex",
266 | 			mexpr:  `foo.bar / (1 * 1024 * 1024) >= 1.0 and "v" in baz and baz.length > 3 and arr[2:].length == 1`,
267 | 			expr:   `foo.bar / (1 * 1024 * 1024) >= 1.0 and baz contains "v" and len(baz) > 3 and len(arr[2:]) == 1`,
268 | 			result: true,
269 | 		},
270 | 	}
271 | 
272 | 	var r interface{}
273 | 	input := map[string]interface{}{
274 | 		"foo": map[string]interface{}{
275 | 			"bar": 1000000000.0,
276 | 		},
277 | 		"baz": "value",
278 | 		"arr": []interface{}{1, 2, 3},
279 | 	}
280 | 
281 | 	for _, bm := range benchmarks {
282 | 		b.Run("mexpr-"+bm.name+"-slow", func(b *testing.B) {
283 | 			b.ReportAllocs()
284 | 			for n := 0; n < b.N; n++ {
285 | 				ast, _ := Parse(bm.mexpr, input)
286 | 				r, _ = Run(ast, input, StrictMode)
287 | 			}
288 | 			if !reflect.DeepEqual(bm.result, r) {
289 | 				b.Fatalf("expected %v but found %v", bm.result, r)
290 | 			}
291 | 		})
292 | 
293 | 		// b.Run(" expr-"+bm.name+"-slow", func(b *testing.B) {
294 | 		// 	b.ReportAllocs()
295 | 		// 	for n := 0; n < b.N; n++ {
296 | 		// 		r, _ = expr.Eval(bm.expr, input)
297 | 		// 	}
298 | 		// 	assert.Equal(b, bm.result, r)
299 | 		// })
300 | 	}
301 | 
302 | 	for _, bm := range benchmarks {
303 | 		b.Run("mexpr-"+bm.name+"-cached", func(b *testing.B) {
304 | 			b.ReportAllocs()
305 | 			ast, err := Parse(bm.mexpr, input)
306 | 			if err != nil {
307 | 				b.Fatal(err)
308 | 			}
309 | 			i := NewInterpreter(ast)
310 | 			b.ResetTimer()
311 | 			for n := 0; n < b.N; n++ {
312 | 				r, _ = i.Run(input)
313 | 			}
314 | 			if !reflect.DeepEqual(bm.result, r) {
315 | 				b.Fatalf("expected %v but found %v", bm.result, r)
316 | 			}
317 | 		})
318 | 
319 | 		// b.Run(" expr-"+bm.name+"-cached", func(b *testing.B) {
320 | 		// 	b.ReportAllocs()
321 | 		// 	program, err := expr.Compile(bm.expr)
322 | 		// 	assert.NoError(b, err)
323 | 		// 	b.ResetTimer()
324 | 		// 	for n := 0; n < b.N; n++ {
325 | 		// 		r, _ = expr.Run(program, input)
326 | 		// 	}
327 | 		// 	assert.Equal(b, bm.result, r)
328 | 		// })
329 | 	}
330 | }
331 | 


--------------------------------------------------------------------------------
/lexer.go:
--------------------------------------------------------------------------------
  1 | package mexpr
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"unicode/utf8"
  7 | )
  8 | 
  9 | // TokenType defines the type of token produced by the lexer.
 10 | type TokenType uint8
 11 | 
 12 | // Token
 13 | const (
 14 | 	TokenUnknown TokenType = iota
 15 | 	TokenIdentifier
 16 | 	TokenDot
 17 | 	TokenNumber
 18 | 	TokenString
 19 | 	TokenLeftParen
 20 | 	TokenRightParen
 21 | 	TokenLeftBracket
 22 | 	TokenRightBracket
 23 | 	TokenSlice
 24 | 	TokenAddSub
 25 | 	TokenMulDiv
 26 | 	TokenPower
 27 | 	TokenComparison
 28 | 	TokenAnd
 29 | 	TokenOr
 30 | 	TokenNot
 31 | 	TokenStringCompare
 32 | 	TokenWhere
 33 | 	TokenEOF
 34 | )
 35 | 
 36 | func (t TokenType) String() string {
 37 | 	switch t {
 38 | 	case TokenIdentifier:
 39 | 		return "identifier"
 40 | 	case TokenDot:
 41 | 		return "dot"
 42 | 	case TokenNumber:
 43 | 		return "number"
 44 | 	case TokenString:
 45 | 		return "string"
 46 | 	case TokenLeftParen:
 47 | 		return "left-paren"
 48 | 	case TokenRightParen:
 49 | 		return "right-paren"
 50 | 	case TokenLeftBracket:
 51 | 		return "left-bracket"
 52 | 	case TokenRightBracket:
 53 | 		return "right-bracket"
 54 | 	case TokenSlice:
 55 | 		return "slice"
 56 | 	case TokenAddSub:
 57 | 		return "add-sub"
 58 | 	case TokenMulDiv:
 59 | 		return "mul-div"
 60 | 	case TokenPower:
 61 | 		return "power"
 62 | 	case TokenComparison:
 63 | 		return "comparison"
 64 | 	case TokenAnd:
 65 | 		return "and"
 66 | 	case TokenOr:
 67 | 		return "or"
 68 | 	case TokenNot:
 69 | 		return "not"
 70 | 	case TokenStringCompare:
 71 | 		return "string-compare"
 72 | 	case TokenWhere:
 73 | 		return "where"
 74 | 	case TokenEOF:
 75 | 		return "eof"
 76 | 	}
 77 | 	return "unknown"
 78 | }
 79 | 
 80 | func basic(input rune) TokenType {
 81 | 	switch input {
 82 | 	case '.':
 83 | 		return TokenDot
 84 | 	case '(':
 85 | 		return TokenLeftParen
 86 | 	case ')':
 87 | 		return TokenRightParen
 88 | 	case '[':
 89 | 		return TokenLeftBracket
 90 | 	case ']':
 91 | 		return TokenRightBracket
 92 | 	case ':':
 93 | 		return TokenSlice
 94 | 	case '+', '-':
 95 | 		return TokenAddSub
 96 | 	case '*', '/', '%':
 97 | 		return TokenMulDiv
 98 | 	case '^':
 99 | 		return TokenPower
100 | 	}
101 | 
102 | 	return TokenUnknown
103 | }
104 | 
105 | // Token describes a single token produced by the lexer.
106 | type Token struct {
107 | 	Type   TokenType
108 | 	Length uint8
109 | 	Offset uint16
110 | 	Value  string
111 | }
112 | 
113 | func (t *Token) String() string {
114 | 	return fmt.Sprintf("%d (%s) %s", t.Offset, t.Type, t.Value)
115 | }
116 | 
117 | // Lexer returns tokens from an input expression.
118 | type Lexer interface {
119 | 	// Next returns the next token from the expression. The returned token may
120 | 	// be changed in-place on subsequent calls and should not be stored.
121 | 	Next() (*Token, Error)
122 | }
123 | 
124 | // NewLexer creates a new lexer for the given expression.
125 | func NewLexer(expression string) Lexer {
126 | 	return &lexer{
127 | 		expression: expression,
128 | 		pos:        0,
129 | 		lastWidth:  0,
130 | 		token:      &Token{},
131 | 	}
132 | }
133 | 
134 | type lexer struct {
135 | 	expression string
136 | 	pos        uint16
137 | 	lastWidth  uint16
138 | 
139 | 	// token is a cached token to prevent new tokens from being allocated.
140 | 	// It is re-used on each call to `Next()`.
141 | 	token *Token
142 | }
143 | 
144 | // next returns the next rune in the expression at the current position.
145 | func (l *lexer) next() rune {
146 | 	if l.pos >= uint16(len(l.expression)) {
147 | 		l.lastWidth = 0
148 | 		return -1
149 | 	}
150 | 	r, w := utf8.DecodeRuneInString(l.expression[l.pos:])
151 | 	l.pos += uint16(w)
152 | 	l.lastWidth = uint16(w)
153 | 	return r
154 | }
155 | 
156 | // back moves back one rune.
157 | func (l *lexer) back() {
158 | 	l.pos -= l.lastWidth
159 | }
160 | 
161 | // peek returns the next rune without moving the position forward.
162 | func (l *lexer) peek() rune {
163 | 	r := l.next()
164 | 	l.back()
165 | 	return r
166 | }
167 | 
168 | func (l *lexer) newToken(typ TokenType, value string) *Token {
169 | 	l.token.Type = typ
170 | 	l.token.Value = value
171 | 	l.token.Offset = l.pos - uint16(len(value))
172 | 	l.token.Length = uint8(len(value))
173 | 	if l.token.Length == 0 {
174 | 		l.token.Length = 1
175 | 	}
176 | 	if typ == TokenString {
177 | 		// Account for quotes
178 | 		l.token.Offset--
179 | 	}
180 | 	return l.token
181 | }
182 | 
183 | // consumeNumber reads runes from the expression until a non-number or
184 | // non-decimal is encountered.
185 | func (l *lexer) consumeNumber() *Token {
186 | 	start := l.pos - l.lastWidth
187 | 	for {
188 | 		r := l.next()
189 | 		if r != '.' && r != '_' && (r < '0' || r > '9') {
190 | 			l.back()
191 | 			break
192 | 		}
193 | 	}
194 | 	return l.newToken(TokenNumber, l.expression[start:l.pos])
195 | }
196 | 
197 | // consumeIdentifier reads runes from the expression until a non-identifier
198 | // character is encountered. If the identifier is a known operator like `in`
199 | // then that corresponding token is returned, otherwise a normal identifier.
200 | func (l *lexer) consumeIdentifier() *Token {
201 | 	start := l.pos - l.lastWidth
202 | 	for {
203 | 		r := l.next()
204 | 		if r == -1 || basic(r) != TokenUnknown || r == ' ' || r == '\t' || r == '\r' || r == '\n' || r == '<' || r == '>' || r == '=' || r == '!' || r == '.' || r == '[' || r == '(' {
205 | 			l.back()
206 | 			break
207 | 		}
208 | 	}
209 | 	value := l.expression[start:l.pos]
210 | 	if l.token.Type != TokenDot {
211 | 		// Only parse special identifiers if the last token type was *not* an object
212 | 		// property selector, e.g. `foo.in.not` vs `foo in ...`. This enables
213 | 		// keywords to be used as properties without issue.
214 | 		switch string(value) {
215 | 		case "and":
216 | 			return l.newToken(TokenAnd, value)
217 | 		case "or":
218 | 			return l.newToken(TokenOr, value)
219 | 		case "not":
220 | 			return l.newToken(TokenNot, value)
221 | 		case "in", "contains", "startsWith", "endsWith", "before", "after":
222 | 			return l.newToken(TokenStringCompare, value)
223 | 		case "where":
224 | 			return l.newToken(TokenWhere, value)
225 | 		}
226 | 	}
227 | 	return l.newToken(TokenIdentifier, value)
228 | }
229 | 
230 | // consumeString reads runes from the expression until a non-escaped double
231 | // quote is encountered. Only double-quoted strings are supported.
232 | func (l *lexer) consumeString() *Token {
233 | 	buf := bytes.NewBuffer(make([]byte, 0, 8))
234 | 	for {
235 | 		r := l.next()
236 | 		if r == '\\' && l.peek() == '"' {
237 | 			l.next()
238 | 			buf.WriteRune('"')
239 | 			continue
240 | 		}
241 | 		if r == -1 || r == '"' {
242 | 			break
243 | 		}
244 | 		buf.WriteRune(r)
245 | 	}
246 | 	return l.newToken(TokenString, buf.String())
247 | }
248 | 
249 | func (l *lexer) Next() (*Token, Error) {
250 | 	r := l.next()
251 | 	for r == ' ' || r == '\t' || r == '\r' || r == '\n' {
252 | 		r = l.next()
253 | 	}
254 | 	if r == -1 {
255 | 		return l.newToken(TokenEOF, ""), nil
256 | 	}
257 | 
258 | 	b := basic(r)
259 | 	if b != TokenUnknown {
260 | 		if r == '.' {
261 | 			n := l.peek()
262 | 			if n >= '0' && n <= '9' {
263 | 				return l.consumeNumber(), nil
264 | 			}
265 | 		}
266 | 		if l.pos-l.lastWidth > uint16(len(l.expression)-1) {
267 | 			return l.newToken(TokenEOF, ""), nil
268 | 		}
269 | 		return l.newToken(b, l.expression[l.pos-l.lastWidth:l.pos]), nil
270 | 	}
271 | 
272 | 	if r >= '0' && r <= '9' {
273 | 		return l.consumeNumber(), nil
274 | 	}
275 | 
276 | 	if r == '<' || r == '>' || r == '!' {
277 | 		eq := l.next()
278 | 		if eq == '=' {
279 | 			return l.newToken(TokenComparison, string([]rune{r, eq})), nil
280 | 		}
281 | 		l.back()
282 | 		return l.newToken(TokenComparison, string(r)), nil
283 | 	}
284 | 
285 | 	if r == '=' {
286 | 		if l.peek() == '=' {
287 | 			l.next()
288 | 			return l.newToken(TokenComparison, "=="), nil
289 | 		}
290 | 		return nil, NewError(l.pos, 1, "= should be ==")
291 | 	}
292 | 
293 | 	if r == '"' {
294 | 		return l.consumeString(), nil
295 | 	}
296 | 
297 | 	return l.consumeIdentifier(), nil
298 | }
299 | 


--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
  1 | package mexpr
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"strconv"
  6 | )
  7 | 
  8 | // NodeType defines the type of the abstract syntax tree node.
  9 | type NodeType uint8
 10 | 
 11 | // Possible node types
 12 | const (
 13 | 	NodeUnknown NodeType = iota
 14 | 	NodeIdentifier
 15 | 	NodeLiteral
 16 | 	NodeAdd
 17 | 	NodeSubtract
 18 | 	NodeMultiply
 19 | 	NodeDivide
 20 | 	NodeModulus
 21 | 	NodePower
 22 | 	NodeEqual
 23 | 	NodeNotEqual
 24 | 	NodeLessThan
 25 | 	NodeLessThanEqual
 26 | 	NodeGreaterThan
 27 | 	NodeGreaterThanEqual
 28 | 	NodeAnd
 29 | 	NodeOr
 30 | 	NodeNot
 31 | 	NodeFieldSelect
 32 | 	NodeArrayIndex
 33 | 	NodeSlice
 34 | 	NodeSign
 35 | 	NodeIn
 36 | 	NodeContains
 37 | 	NodeStartsWith
 38 | 	NodeEndsWith
 39 | 	NodeBefore
 40 | 	NodeAfter
 41 | 	NodeWhere
 42 | )
 43 | 
 44 | // Node is a unit of the binary tree that makes up the abstract syntax tree.
 45 | type Node struct {
 46 | 	Type   NodeType
 47 | 	Length uint8
 48 | 	Offset uint16
 49 | 	Left   *Node
 50 | 	Right  *Node
 51 | 	Value  interface{}
 52 | }
 53 | 
 54 | // String converts the node to a string representation (basically the node name
 55 | // or the node's value for identifiers/literals).
 56 | func (n Node) String() string {
 57 | 	switch n.Type {
 58 | 	case NodeIdentifier, NodeLiteral:
 59 | 		return toString(n.Value)
 60 | 	case NodeAdd:
 61 | 		return "+"
 62 | 	case NodeSubtract:
 63 | 		return "-"
 64 | 	case NodeMultiply:
 65 | 		return "*"
 66 | 	case NodeDivide:
 67 | 		return "/"
 68 | 	case NodeModulus:
 69 | 		return "%"
 70 | 	case NodePower:
 71 | 		return "^"
 72 | 	case NodeEqual:
 73 | 		return "=="
 74 | 	case NodeNotEqual:
 75 | 		return "!="
 76 | 	case NodeLessThan:
 77 | 		return "<"
 78 | 	case NodeLessThanEqual:
 79 | 		return "<="
 80 | 	case NodeGreaterThan:
 81 | 		return ">"
 82 | 	case NodeGreaterThanEqual:
 83 | 		return ">="
 84 | 	case NodeAnd:
 85 | 		return "and"
 86 | 	case NodeOr:
 87 | 		return "or"
 88 | 	case NodeNot:
 89 | 		return "not"
 90 | 	case NodeFieldSelect:
 91 | 		return "."
 92 | 	case NodeArrayIndex:
 93 | 		return "[]"
 94 | 	case NodeSlice:
 95 | 		return ":"
 96 | 	case NodeIn:
 97 | 		return "in"
 98 | 	case NodeContains:
 99 | 		return "contains"
100 | 	case NodeStartsWith:
101 | 		return "startsWith"
102 | 	case NodeEndsWith:
103 | 		return "endsWith"
104 | 	case NodeBefore:
105 | 		return "before"
106 | 	case NodeAfter:
107 | 		return "after"
108 | 	case NodeWhere:
109 | 		return "where"
110 | 	}
111 | 
112 | 	return ""
113 | }
114 | 
115 | // Dot returns a graphviz-compatible dot output, which can be used to render
116 | // the parse tree at e.g. https://dreampuf.github.io/GraphvizOnline/ or
117 | // locally. You must wrap the output with `graph G {` and `}`.
118 | func (n Node) Dot(prefix string) string {
119 | 	value := "\"" + prefix + n.String() + "\" [label=\"" + n.String() + "\"];\n"
120 | 	if n.Left != nil {
121 | 		value += "\"" + prefix + n.String() + "\" -- \"" + prefix + "l" + n.Left.String() + "\"\n"
122 | 		value += n.Left.Dot(prefix+"l") + "\n"
123 | 	}
124 | 	if n.Right != nil {
125 | 		value += "\"" + prefix + n.String() + "\" -- \"" + prefix + "r" + n.Right.String() + "\"\n"
126 | 		value += n.Right.Dot(prefix+"r") + "\n"
127 | 	}
128 | 	return value
129 | }
130 | 
131 | // bindingPowers for different tokens. Not listed means zero. The higher the
132 | // number, the higher the token is in the order of operations.
133 | var bindingPowers = map[TokenType]int{
134 | 	TokenOr:            1,
135 | 	TokenAnd:           2,
136 | 	TokenWhere:         3,
137 | 	TokenStringCompare: 4,
138 | 	TokenComparison:    5,
139 | 	TokenSlice:         5,
140 | 	TokenAddSub:        10,
141 | 	TokenMulDiv:        15,
142 | 	TokenNot:           40,
143 | 	TokenDot:           45,
144 | 	TokenPower:         50,
145 | 	TokenLeftBracket:   60,
146 | 	TokenLeftParen:     70,
147 | }
148 | 
149 | // precomputeLiterals takes two `NodeLiteral` nodes and a math operation and
150 | // generates a single literal node for the resutl. This prevents the interpreter
151 | // from needing to re-compute the value each time.
152 | func precomputeLiterals(offset uint16, nodeType NodeType, left, right *Node) (*Node, Error) {
153 | 	leftValue, err := toNumber(left, left.Value)
154 | 	if err != nil {
155 | 		return nil, err
156 | 	}
157 | 	rightValue, err := toNumber(right, right.Value)
158 | 	if err != nil {
159 | 		return nil, err
160 | 	}
161 | 	l := left.Length + right.Length
162 | 	switch nodeType {
163 | 	case NodeAdd:
164 | 		return &Node{Type: NodeLiteral, Offset: offset, Length: l, Value: leftValue + rightValue}, nil
165 | 	case NodeSubtract:
166 | 		return &Node{Type: NodeLiteral, Offset: offset, Length: l, Value: leftValue - rightValue}, nil
167 | 	case NodeMultiply:
168 | 		return &Node{Type: NodeLiteral, Offset: offset, Length: l, Value: leftValue * rightValue}, nil
169 | 	case NodeDivide:
170 | 		if rightValue == 0 {
171 | 			return nil, NewError(offset, 1, "cannot divide by zero")
172 | 		}
173 | 		return &Node{Type: NodeLiteral, Offset: offset, Length: l, Value: leftValue / rightValue}, nil
174 | 	case NodeModulus:
175 | 		if int(rightValue) == 0 {
176 | 			return nil, NewError(offset, 1, "cannot divide by zero")
177 | 		}
178 | 		return &Node{Type: NodeLiteral, Offset: offset, Length: l, Value: float64(int(leftValue) % int(rightValue))}, nil
179 | 	case NodePower:
180 | 		return &Node{Type: NodeLiteral, Offset: offset, Length: l, Value: math.Pow(leftValue, rightValue)}, nil
181 | 	}
182 | 	return nil, NewError(offset, 1, "cannot precompute unknown operator")
183 | }
184 | 
185 | // Parser takes a lexer and parses its tokens into an abstract syntax tree.
186 | type Parser interface {
187 | 	// Parse the expression and return the root node.
188 | 	Parse() (*Node, Error)
189 | }
190 | 
191 | // NewParser creates a new parser that uses the given lexer to get and process
192 | // tokens into an abstract syntax tree.
193 | func NewParser(lexer Lexer) Parser {
194 | 	return &parser{
195 | 		lexer: lexer,
196 | 	}
197 | }
198 | 
199 | // parser is an implementation of a Pratt or top-down operator precedence parser
200 | type parser struct {
201 | 	lexer Lexer
202 | 	token *Token
203 | }
204 | 
205 | func (p *parser) advance() Error {
206 | 	t, err := p.lexer.Next()
207 | 	if err != nil {
208 | 		return err
209 | 	}
210 | 	p.token = t
211 | 	return nil
212 | }
213 | 
214 | func (p *parser) parse(bindingPower int) (*Node, Error) {
215 | 	leftToken := *p.token
216 | 	if err := p.advance(); err != nil {
217 | 		return nil, err
218 | 	}
219 | 	leftNode, err := p.nud(&leftToken)
220 | 	if err != nil {
221 | 		return nil, err
222 | 	}
223 | 	currentToken := *p.token
224 | 	for bindingPower < bindingPowers[currentToken.Type] {
225 | 		if leftNode == nil {
226 | 			return nil, nil
227 | 		}
228 | 		if err := p.advance(); err != nil {
229 | 			return nil, err
230 | 		}
231 | 		leftNode, err = p.led(&currentToken, leftNode)
232 | 		if err != nil {
233 | 			return nil, err
234 | 		}
235 | 		currentToken = *p.token
236 | 	}
237 | 	return leftNode, nil
238 | }
239 | 
240 | // ensure the current token is `typ`, returning the `result` unless `err` is
241 | // set or some other error occurs. Advances past the expected token type.
242 | func (p *parser) ensure(result *Node, err Error, typ TokenType) (*Node, Error) {
243 | 	if err != nil {
244 | 		return nil, err
245 | 	}
246 | 	if p.token.Type == typ {
247 | 		if err := p.advance(); err != nil {
248 | 			return nil, err
249 | 		}
250 | 		return result, nil
251 | 	}
252 | 
253 | 	extra := ""
254 | 	if typ == TokenEOF && p.token.Type == TokenIdentifier {
255 | 		switch p.token.Value {
256 | 		case "startswith", "beginswith", "beginsWith", "hasprefix", "hasPrefix":
257 | 			extra = " (did you mean `startsWith`?)"
258 | 		case "endswith", "hassuffix", "hasSuffix":
259 | 			extra = " (did you mean `endsWith`?)"
260 | 		case "contains":
261 | 			extra = " (did you mean `in`?)"
262 | 		}
263 | 	}
264 | 
265 | 	return nil, NewError(p.token.Offset, p.token.Length, "expected %s but found %s%s", typ, p.token.Type, extra)
266 | }
267 | 
268 | // nud: null denotation. These nodes have no left context and only
269 | // consume to the right. Examples: identifiers, numbers, unary operators like
270 | // minus.
271 | func (p *parser) nud(t *Token) (*Node, Error) {
272 | 	switch t.Type {
273 | 	case TokenIdentifier:
274 | 		return &Node{Type: NodeIdentifier, Value: t.Value, Offset: t.Offset, Length: t.Length}, nil
275 | 	case TokenNumber:
276 | 		f, err := strconv.ParseFloat(t.Value, 64)
277 | 		if err != nil {
278 | 			return nil, NewError(p.token.Offset, p.token.Length, err.Error())
279 | 		}
280 | 		return &Node{Type: NodeLiteral, Value: f, Offset: t.Offset, Length: t.Length}, nil
281 | 	case TokenString:
282 | 		return &Node{Type: NodeLiteral, Value: t.Value, Offset: t.Offset, Length: t.Length}, nil
283 | 	case TokenLeftParen:
284 | 		result, err := p.parse(0)
285 | 		return p.ensure(result, err, TokenRightParen)
286 | 	case TokenNot:
287 | 		offset := t.Offset
288 | 		result, err := p.parse(bindingPowers[t.Type])
289 | 		if err != nil {
290 | 			return nil, err
291 | 		}
292 | 		return &Node{Type: NodeNot, Offset: offset, Length: uint8(t.Offset + uint16(t.Length) - offset), Right: result}, nil
293 | 	case TokenAddSub:
294 | 		value := t.Value
295 | 		offset := t.Offset
296 | 		result, err := p.parse(bindingPowers[t.Type])
297 | 		if err != nil {
298 | 			return nil, err
299 | 		}
300 | 		return &Node{Type: NodeSign, Value: value, Offset: offset, Length: uint8(t.Offset + uint16(t.Length) - offset), Right: result}, nil
301 | 	case TokenSlice:
302 | 		offset := t.Offset
303 | 		result, err := p.parse(bindingPowers[t.Type])
304 | 		if err != nil {
305 | 			return nil, err
306 | 		}
307 | 		// Create a dummy left node with value 0, the start of the slice. This also
308 | 		// sets the parent node's value to a pre-allocated list of [0, 0] which is
309 | 		// used later by the interpreter. It prevents additional allocations.
310 | 		return &Node{Type: NodeSlice, Offset: offset, Length: uint8(t.Offset + uint16(t.Length) - offset), Left: &Node{Type: NodeLiteral, Value: 0.0, Offset: offset}, Right: result, Value: []interface{}{0.0, 0.0}}, nil
311 | 	case TokenRightParen:
312 | 		return nil, NewError(t.Offset, t.Length, "unexpected right-paren")
313 | 	case TokenRightBracket:
314 | 		return nil, NewError(t.Offset, t.Length, "unexpected right-bracket")
315 | 	case TokenEOF:
316 | 		return nil, NewError(t.Offset, t.Length, "incomplete expression, EOF found")
317 | 	}
318 | 	return nil, nil
319 | }
320 | 
321 | // newNodeParseRight creates a new node with the right tree set to the
322 | // output of recursively parsing until a lower binding power is encountered.
323 | func (p *parser) newNodeParseRight(left *Node, t *Token, typ NodeType, bindingPower int) (*Node, Error) {
324 | 	offset := t.Offset
325 | 	right, err := p.parse(bindingPower)
326 | 	if err != nil {
327 | 		return nil, err
328 | 	}
329 | 	if right == nil {
330 | 		return nil, NewError(t.Offset, t.Length, "missing right operand")
331 | 	}
332 | 	return &Node{Type: typ, Offset: offset, Length: uint8(p.token.Offset + uint16(p.token.Length) - offset), Left: left, Right: right}, nil
333 | }
334 | 
335 | // led: left denotation. These tokens produce nodes that operate on two operands
336 | // a left and a right. Examples: addition, multiplication, etc.
337 | func (p *parser) led(t *Token, n *Node) (*Node, Error) {
338 | 	switch t.Type {
339 | 	case TokenAddSub, TokenMulDiv, TokenPower:
340 | 		var nodeType NodeType
341 | 		switch t.Value[0] {
342 | 		case '+':
343 | 			nodeType = NodeAdd
344 | 		case '-':
345 | 			nodeType = NodeSubtract
346 | 		case '*':
347 | 			nodeType = NodeMultiply
348 | 		case '/':
349 | 			nodeType = NodeDivide
350 | 		case '%':
351 | 			nodeType = NodeModulus
352 | 		case '^':
353 | 			nodeType = NodePower
354 | 		}
355 | 		offset := t.Offset
356 | 		binding := bindingPowers[t.Type]
357 | 		if t.Type == TokenPower {
358 | 			// Power operations should be right-associative, so we lower the binding
359 | 			// power slightly so it prefers going right.
360 | 			binding--
361 | 		}
362 | 		right, err := p.parse(binding)
363 | 		if err != nil {
364 | 			return nil, err
365 | 		}
366 | 		if right == nil {
367 | 			return nil, NewError(t.Offset, t.Length, "missing right operand")
368 | 		}
369 | 		if n.Type == NodeLiteral && right.Type == NodeLiteral {
370 | 			if !(isString(n.Value) || isString(right.Value)) {
371 | 				return precomputeLiterals(offset, nodeType, n, right)
372 | 			}
373 | 		}
374 | 		return &Node{Type: nodeType, Offset: offset, Length: uint8(t.Offset + uint16(t.Length) - offset), Left: n, Right: right, Value: 0.0}, nil
375 | 	case TokenComparison:
376 | 		var nodeType NodeType
377 | 		switch t.Value {
378 | 		case "==":
379 | 			nodeType = NodeEqual
380 | 		case "!=":
381 | 			nodeType = NodeNotEqual
382 | 		case "<":
383 | 			nodeType = NodeLessThan
384 | 		case "<=":
385 | 			nodeType = NodeLessThanEqual
386 | 		case ">":
387 | 			nodeType = NodeGreaterThan
388 | 		case ">=":
389 | 			nodeType = NodeGreaterThanEqual
390 | 		}
391 | 		return p.newNodeParseRight(n, t, nodeType, bindingPowers[t.Type])
392 | 	case TokenAnd:
393 | 		return p.newNodeParseRight(n, t, NodeAnd, bindingPowers[t.Type])
394 | 	case TokenOr:
395 | 		return p.newNodeParseRight(n, t, NodeOr, bindingPowers[t.Type])
396 | 	case TokenStringCompare:
397 | 		var nodeType NodeType
398 | 		switch t.Value {
399 | 		case "in":
400 | 			nodeType = NodeIn
401 | 		case "contains":
402 | 			nodeType = NodeContains
403 | 		case "startsWith":
404 | 			nodeType = NodeStartsWith
405 | 		case "endsWith":
406 | 			nodeType = NodeEndsWith
407 | 		case "before":
408 | 			nodeType = NodeBefore
409 | 		case "after":
410 | 			nodeType = NodeAfter
411 | 		}
412 | 		return p.newNodeParseRight(n, t, nodeType, bindingPowers[t.Type])
413 | 	case TokenWhere:
414 | 		return p.newNodeParseRight(n, t, NodeWhere, bindingPowers[t.Type])
415 | 	case TokenDot:
416 | 		return p.newNodeParseRight(n, t, NodeFieldSelect, bindingPowers[t.Type])
417 | 	case TokenLeftBracket:
418 | 		n, err := p.newNodeParseRight(n, t, NodeArrayIndex, 0)
419 | 		return p.ensure(n, err, TokenRightBracket)
420 | 	case TokenSlice:
421 | 		if p.token.Type == TokenRightBracket {
422 | 			// This sets the parent node's value to a pre-allocated list of [0, 0]
423 | 			// which is used later by the interpreter. It prevents additional
424 | 			// allocations.
425 | 			return &Node{Type: NodeSlice, Offset: t.Offset, Length: t.Length, Left: n, Right: &Node{Type: NodeLiteral, Offset: t.Offset, Value: -1.0}, Value: []interface{}{0.0, 0.0}}, nil
426 | 		}
427 | 		nn, err := p.newNodeParseRight(n, t, NodeSlice, bindingPowers[t.Type])
428 | 		if err != nil {
429 | 			return nil, err
430 | 		}
431 | 		nn.Value = []interface{}{0.0, 0.0}
432 | 		return nn, nil
433 | 	}
434 | 	return nil, NewError(t.Offset, t.Length, "unexpected token %s", t.Type)
435 | }
436 | 
437 | func (p *parser) Parse() (*Node, Error) {
438 | 	if err := p.advance(); err != nil {
439 | 		return nil, err
440 | 	}
441 | 	n, err := p.parse(0)
442 | 	return p.ensure(n, err, TokenEOF)
443 | }
444 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/0420946f64ef7f59fb34768f7918efff84357bad9e79d7a883cb8114e861e298:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("+!")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/0cdba2769206c77962ed40a6f6d36ca202c930f80a4b673d7b7382d886740414:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("a[7]")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/3381fab6d13d3fdf00437171af1c170de75caf45db43bddbaabc598f26032225:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("a[:7]")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/50f5b81a904c39d7c20f0fd09f1394bf6e60e30eea5bc3421f5848e1a477a3d2:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("0%.1")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/5fd9184d945ce47fad21a9481ccd6ee1aa512ac28c991d995fe41af19cef16e9:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("0:00in:a")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/624b0c32d8fc63d8fc610a2f3d363baa32d554da3fb5d3c6d2060021f80b36c6:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("!")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/9083a15555e8793ce5ed4a28fca588702d92ab32db80747ccef9322a1c9eb01d:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string(".͟")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/ab53cd47184b4ce065b4bca4a5e5ba4f03987bd60882b02217fefd8e4eee127f:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("A. 0%.1")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/b074d9d373d06c31a9646f7f7756c283f624d1974d02bfdd9eb3018bb77badf2:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("=")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/d16cef97b88ea33670547ca16ba8c3f4f794dd9cda46b90a8bba5694230f32b6:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("!%0")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/dfddfa56e05613142f0130541c77d0f912e1c841e28da5669f403d95a12d98cb:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string(":0!=:0")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/e30f687995b5521363052c9f1b107729957ec2cd41cf059b748ae6a923c55a32:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("0%00")
3 | 


--------------------------------------------------------------------------------
/testdata/fuzz/FuzzMexpr/f7acdb7129425776d8863f092a9654328efe3ecdf265e38536c924866b72546f:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("0%+0")
3 | 


--------------------------------------------------------------------------------
/typecheck.go:
--------------------------------------------------------------------------------
  1 | package mexpr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 	"strings"
  7 | )
  8 | 
  9 | type valueType string
 10 | 
 11 | const (
 12 | 	typeUnknown valueType = "unknown"
 13 | 	typeBool    valueType = "boolean"
 14 | 	typeNumber  valueType = "number"
 15 | 	typeString  valueType = "string"
 16 | 	typeArray   valueType = "array"
 17 | 	typeObject  valueType = "object"
 18 | )
 19 | 
 20 | // mapKeys returns the keys of the map m.
 21 | // The keys will be in an indeterminate order.
 22 | func mapKeys[M ~map[K]V, K comparable, V any](m M) []K {
 23 | 	r := make([]K, 0, len(m))
 24 | 	for k := range m {
 25 | 		r = append(r, k)
 26 | 	}
 27 | 	return r
 28 | }
 29 | 
 30 | type schema struct {
 31 | 	typeName   valueType
 32 | 	items      *schema
 33 | 	properties map[string]*schema
 34 | }
 35 | 
 36 | func (s *schema) String() string {
 37 | 	if s.isArray() {
 38 | 		return fmt.Sprintf("%s[%s]", s.typeName, s.items)
 39 | 	}
 40 | 	if s.isObject() {
 41 | 		return fmt.Sprintf("%s{%v}", s.typeName, mapKeys(s.properties))
 42 | 	}
 43 | 	return string(s.typeName)
 44 | }
 45 | 
 46 | func (s *schema) isNumber() bool {
 47 | 	return s != nil && s.typeName == typeNumber
 48 | }
 49 | 
 50 | func (s *schema) isString() bool {
 51 | 	return s != nil && s.typeName == typeString
 52 | }
 53 | 
 54 | func (s *schema) isArray() bool {
 55 | 	return s != nil && s.typeName == typeArray
 56 | }
 57 | 
 58 | func (s *schema) isObject() bool {
 59 | 	return s != nil && s.typeName == typeObject
 60 | }
 61 | 
 62 | var (
 63 | 	schemaBool   = newSchema(typeBool)
 64 | 	schemaNumber = newSchema(typeNumber)
 65 | 	schemaString = newSchema(typeString)
 66 | )
 67 | 
 68 | func newSchema(t valueType) *schema {
 69 | 	return &schema{typeName: t}
 70 | }
 71 | 
 72 | func getSchema(v any) *schema {
 73 | 	switch i := v.(type) {
 74 | 	case bool:
 75 | 		return schemaBool
 76 | 	case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64:
 77 | 		return schemaNumber
 78 | 	case string, []byte:
 79 | 		return schemaString
 80 | 	case []any:
 81 | 		s := newSchema(typeArray)
 82 | 		if len(i) > 0 {
 83 | 			s.items = getSchema(i[0])
 84 | 		}
 85 | 		return s
 86 | 	case map[string]any:
 87 | 		m := newSchema(typeObject)
 88 | 		m.properties = make(map[string]*schema, len(i))
 89 | 		for k, v := range i {
 90 | 			m.properties[k] = getSchema(v)
 91 | 		}
 92 | 		return m
 93 | 	case map[any]any:
 94 | 		m := newSchema(typeObject)
 95 | 		m.properties = make(map[string]*schema, len(i))
 96 | 		for k, v := range i {
 97 | 			m.properties[toString(k)] = getSchema(v)
 98 | 		}
 99 | 		return m
100 | 	}
101 | 	return newSchema(typeUnknown)
102 | }
103 | 
104 | // TypeChecker checks to ensure types used for operations will work.
105 | type TypeChecker interface {
106 | 	Run(value any) Error
107 | }
108 | 
109 | // NewTypeChecker returns a type checker for the given AST.
110 | func NewTypeChecker(ast *Node, options ...InterpreterOption) TypeChecker {
111 | 	unquoted := false
112 | 
113 | 	for _, opt := range options {
114 | 		switch opt {
115 | 		case UnquotedStrings:
116 | 			unquoted = true
117 | 		}
118 | 	}
119 | 
120 | 	return &typeChecker{
121 | 		ast:      ast,
122 | 		unquoted: unquoted,
123 | 	}
124 | }
125 | 
126 | type typeChecker struct {
127 | 	ast             *Node
128 | 	prevFieldSelect bool
129 | 	unquoted        bool
130 | }
131 | 
132 | func (i *typeChecker) Run(value any) Error {
133 | 	_, err := i.run(i.ast, value)
134 | 	return err
135 | }
136 | 
137 | func (i *typeChecker) runBoth(ast *Node, value any) (*schema, *schema, Error) {
138 | 	leftType, err := i.run(ast.Left, value)
139 | 	if err != nil {
140 | 		return nil, nil, err
141 | 	}
142 | 	rightType, err := i.run(ast.Right, value)
143 | 	if err != nil {
144 | 		return nil, nil, err
145 | 	}
146 | 	return leftType, rightType, nil
147 | }
148 | 
149 | func (i *typeChecker) run(ast *Node, value any) (*schema, Error) {
150 | 	fromSelect := i.prevFieldSelect
151 | 	i.prevFieldSelect = false
152 | 
153 | 	switch ast.Type {
154 | 	case NodeIdentifier:
155 | 		switch ast.Value.(string) {
156 | 		case "@":
157 | 			if s, ok := value.(*schema); ok {
158 | 				return s, nil
159 | 			}
160 | 			return getSchema(value), nil
161 | 		case "length":
162 | 			return schemaNumber, nil
163 | 		case "lower", "upper":
164 | 			return schemaString, nil
165 | 		}
166 | 		errValue := value
167 | 		if s, ok := value.(*schema); ok {
168 | 			if v, ok := s.properties[ast.Value.(string)]; ok {
169 | 				return v, nil
170 | 			}
171 | 			keys := []string{}
172 | 			for k := range s.properties {
173 | 				keys = append(keys, k)
174 | 			}
175 | 			errValue = "map with keys [" + strings.Join(keys, ", ") + "]"
176 | 		}
177 | 		if m, ok := value.(map[string]any); ok {
178 | 			if v, ok := m[ast.Value.(string)]; ok {
179 | 				return getSchema(v), nil
180 | 			}
181 | 			keys := []string{}
182 | 			for k := range m {
183 | 				keys = append(keys, k)
184 | 			}
185 | 			errValue = "map with keys [" + strings.Join(keys, ", ") + "]"
186 | 		}
187 | 		if m, ok := value.(map[any]any); ok {
188 | 			if v, ok := m[ast.Value]; ok {
189 | 				return getSchema(v), nil
190 | 			}
191 | 			keys := []string{}
192 | 			for k := range m {
193 | 				keys = append(keys, toString(k))
194 | 			}
195 | 			errValue = "map with keys [" + strings.Join(keys, ", ") + "]"
196 | 		}
197 | 		if i.unquoted && !fromSelect {
198 | 			// Identifiers not found in the map are treated as strings, but only if
199 | 			// the previous item was not a `.` like `obj.field`.
200 | 			return schemaString, nil
201 | 		}
202 | 		return nil, NewError(ast.Offset, ast.Length, "no property %v in %v", ast.Value, errValue)
203 | 	case NodeFieldSelect:
204 | 		i.prevFieldSelect = true
205 | 		leftType, err := i.run(ast.Left, value)
206 | 		if err != nil {
207 | 			return nil, err
208 | 		}
209 | 		i.prevFieldSelect = true
210 | 		return i.run(ast.Right, leftType)
211 | 	case NodeArrayIndex:
212 | 		leftType, rightType, err := i.runBoth(ast, value)
213 | 		if err != nil {
214 | 			return nil, err
215 | 		}
216 | 		if !(leftType.isString() || leftType.isArray()) {
217 | 			return nil, NewError(ast.Offset, ast.Length, "can only index strings or arrays but got %v", leftType)
218 | 		}
219 | 		if rightType.isArray() {
220 | 			// This is a slice!
221 | 			return leftType, nil
222 | 		}
223 | 		if rightType.isNumber() {
224 | 			if leftType.isString() {
225 | 				return leftType, nil
226 | 			}
227 | 			return leftType.items, nil
228 | 		}
229 | 		return nil, NewError(ast.Offset, ast.Length, "array index must be number or slice but found %v", rightType)
230 | 	case NodeSlice:
231 | 		leftType, rightType, err := i.runBoth(ast, value)
232 | 		if err != nil {
233 | 			return nil, err
234 | 		}
235 | 		if !leftType.isNumber() {
236 | 			return nil, NewError(ast.Offset, ast.Length, "slice index must be a number but found %s", leftType)
237 | 		}
238 | 		if !rightType.isNumber() {
239 | 			return nil, NewError(ast.Offset, ast.Length, "slice index must be a number but found %s", rightType)
240 | 		}
241 | 		s := newSchema(typeArray)
242 | 		s.items = leftType
243 | 		return s, nil
244 | 	case NodeLiteral:
245 | 		return getSchema(ast.Value), nil
246 | 	case NodeSign:
247 | 		rightType, err := i.run(ast.Right, value)
248 | 		if err != nil {
249 | 			return nil, err
250 | 		}
251 | 		if !rightType.isNumber() {
252 | 			return nil, NewError(ast.Offset, ast.Length, "expected number but found %s", rightType)
253 | 		}
254 | 		return schemaNumber, nil
255 | 	case NodeAdd, NodeSubtract, NodeMultiply, NodeDivide, NodeModulus, NodePower:
256 | 		leftType, rightType, err := i.runBoth(ast, value)
257 | 		if err != nil {
258 | 			return nil, err
259 | 		}
260 | 		if ast.Type == NodeAdd {
261 | 			if leftType.isString() || rightType.isString() {
262 | 				return schemaString, nil
263 | 			}
264 | 			if leftType.isArray() && rightType.isArray() {
265 | 				if leftType.items.typeName != rightType.items.typeName {
266 | 					return nil, NewError(ast.Offset, ast.Length, "array item types don't match: %s vs %s", leftType.items, rightType.items)
267 | 				}
268 | 				return leftType, nil
269 | 			}
270 | 		}
271 | 		if leftType.isNumber() && rightType.isNumber() {
272 | 			return leftType, nil
273 | 		}
274 | 		return nil, NewError(ast.Offset, ast.Length, "cannot operate on incompatible types %v and %v", leftType.typeName, rightType.typeName)
275 | 	case NodeLessThan, NodeLessThanEqual, NodeGreaterThan, NodeGreaterThanEqual:
276 | 		leftType, rightType, err := i.runBoth(ast, value)
277 | 		if err != nil {
278 | 			return nil, err
279 | 		}
280 | 		if !leftType.isNumber() || !rightType.isNumber() {
281 | 			return nil, NewError(ast.Offset, ast.Length, "cannot compare %s with %s", leftType, rightType)
282 | 		}
283 | 		return schemaBool, nil
284 | 	case NodeEqual, NodeNotEqual, NodeAnd, NodeOr, NodeIn, NodeContains, NodeStartsWith, NodeEndsWith, NodeBefore, NodeAfter:
285 | 		_, _, err := i.runBoth(ast, value)
286 | 		if err != nil {
287 | 			return nil, err
288 | 		}
289 | 		return schemaBool, nil
290 | 	case NodeWhere:
291 | 		leftType, err := i.run(ast.Left, value)
292 | 		if err != nil {
293 | 			return nil, err
294 | 		}
295 | 		if leftType.isObject() {
296 | 			keys := mapKeys(leftType.properties)
297 | 			sort.Strings(keys)
298 | 			if len(keys) > 0 {
299 | 				// Pick the first prop as the representative item type.
300 | 				prop := leftType.properties[keys[0]]
301 | 				leftType = newSchema(typeArray)
302 | 				leftType.items = prop
303 | 			}
304 | 		}
305 | 		if !leftType.isArray() || leftType.items == nil {
306 | 			return nil, NewError(ast.Offset, ast.Length, "where clause requires a non-empty array or object, but found %s", leftType)
307 | 		}
308 | 		// In an unquoted string scenario it makes no sense for the first/only
309 | 		// token after a `where` clause to be treated as a string. Instead we
310 | 		// treat a `where` the same as a field select `.` in this scenario.
311 | 		i.prevFieldSelect = true
312 | 		_, err = i.run(ast.Right, leftType.items)
313 | 		if err != nil {
314 | 			return nil, err
315 | 		}
316 | 		return leftType, nil
317 | 	case NodeNot:
318 | 		_, err := i.run(ast.Right, value)
319 | 		if err != nil {
320 | 			return nil, err
321 | 		}
322 | 		return schemaBool, nil
323 | 	}
324 | 	return nil, NewError(ast.Offset, ast.Length, "unexpected node %v", ast)
325 | }
326 | 


--------------------------------------------------------------------------------