├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── aggr_func.go ├── aggregate_plan.go ├── checker.go ├── delete_plan.go ├── errors.go ├── examples └── memkv │ ├── go.mod │ ├── go.sum │ └── memkv.go ├── expression.go ├── expression_exec.go ├── expression_exec_test.go ├── expression_exec_vec.go ├── expression_optimizer.go ├── expression_test.go ├── filter_optimizer.go ├── filter_optimizer_test.go ├── func.go ├── go.mod ├── go.sum ├── kv.go ├── lexer.go ├── lexer_test.go ├── limit_plan.go ├── optimizer.go ├── optimizer_test.go ├── order_plan.go ├── parser.go ├── parser_fuzz_test.go ├── parser_test.go ├── plan.go ├── projection_plan.go ├── put_plan.go ├── remove_plan.go ├── scalar_func.go ├── scalar_func_vec.go ├── scan_plan.go ├── spec.md ├── statement.go ├── utils.go └── walker.go /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | 23 | testdata/* 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test fuzz 2 | 3 | test: 4 | go test -v 5 | 6 | fuzz: 7 | go test -fuzz FuzzSQLParser 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kvql 2 | 3 | [![GoDoc](https://pkg.go.dev/badge/github.com/c4pt0r/kvql?utm_source=godoc)](https://pkg.go.dev/github.com/c4pt0r/kvql) 4 | 5 | A SQL-like query language on general Key-Value DB 6 | 7 | ## Syntax 8 | 9 | Basic Types: 10 | 11 | ``` 12 | Number: number such as integer or float 13 | 14 | String: string around by ', ", \`, 15 | 16 | Boolean: true or false 17 | ``` 18 | 19 | Select Statement: 20 | 21 | ``` 22 | SelectStmt ::= "SELECT" Fields "WHERE" WhereConditions ("ORDER" "BY" OrderByFields)? ("GROUP" "BY" GroupByFields)? ("LIMIT" LimitParameter)? 23 | 24 | Fields ::= Field (, Field)* | 25 | "*" 26 | 27 | Field ::= Expression ("AS" FieldName)? 28 | 29 | FieldName ::= String 30 | 31 | OrderByFields ::= OrderByField (, OrderByField)* 32 | 33 | OrderByField ::= FieldName ("ASC" | "DESC")* 34 | 35 | GroupByFields ::= FieldName (, FieldName)* 36 | 37 | LimitParameter ::= Number "," Number | 38 | Number 39 | 40 | WhereConditions ::= "!"? Expression 41 | 42 | Expression ::= "("? BinaryExpression | UnaryExpression ")"? 43 | 44 | UnaryExpression ::= KeyValueField | String | Number | Boolean | FunctionCall | FieldName 45 | 46 | BinaryExpression ::= Expression Operator Expression | 47 | Expression "BETWEEN" Expression "AND" Expression | 48 | Expression "IN" "(" Expression (, Expression)* ")" | 49 | Expression "IN" FunctionCall 50 | 51 | Operator ::= MathOperator | CompareOperator | AndOrOperator 52 | 53 | AndOrOperator ::= "&" | "|" | "AND" | "OR" 54 | 55 | MathOperator ::= "+" | "-" | "*" | "/" 56 | 57 | CompareOperator ::= "=" | "!=" | "^=" | "~=" | ">" | ">=" | "<" | "<=" 58 | 59 | KeyValueField ::= "KEY" | "VALUE" 60 | 61 | FunctionCall ::= FunctionName "(" FunctionArgs ")" | 62 | FunctionName "(" FunctionArgs ")" FieldAccessExpression* 63 | 64 | FunctionName ::= String 65 | 66 | FunctionArgs ::= FunctionArg ("," FunctionArg)* 67 | 68 | FunctionArg ::= Expression 69 | 70 | FieldAccessExpression ::= "[" String "]" | 71 | "[" Number "]" 72 | ``` 73 | 74 | Put Statement: 75 | 76 | ``` 77 | PutStmt ::= "PUT" KVPair (, KVPair)* 78 | KVPair ::= "(" Expression, Expression ")" 79 | ``` 80 | 81 | Delete Statement: 82 | 83 | ``` 84 | DeleteStmt ::= "DELETE" "WHERE" WhereConditions ("LIMIT" LimitParameter)? 85 | ``` 86 | 87 | Features: 88 | 89 | 1. Scan ranger optimize: EmptyResult, PrefixScan, RangeScan, MultiGet 90 | 2. Plan support Volcano model and Batch model 91 | 3. Expression constant folding 92 | 4. Support scalar function and aggregate function 93 | 5. Support hash aggregate plan 94 | 6. Support JSON and field access expression 95 | 96 | ## Known User 97 | 98 | - [c4pt0r/tcli](https://github.com/c4pt0r/tcli) CLI tool for TiKV 99 | 100 | ## Examples: 101 | 102 | ``` 103 | # Simple query, get all the key-value pairs with key prefix 'k' 104 | select * where key ^= 'k' 105 | 106 | # Projection and complex condition 107 | select key, int(value) + 1 where key in ('k1', 'k2', 'k3') & is_int(value) 108 | 109 | # Aggregation query 110 | select count(1), sum(int(value)) as sum, substr(key, 0, 2) as kprefix where key between 'k' and 'l' group by kprefix order by sum desc 111 | 112 | # JSON access 113 | select key, json(value)['x']['y'] where key ^= 'k' & int(json(value)['test']) >= 1 114 | select key, json(value)['list'][1] where key ^= 'k' 115 | 116 | # Filter by field name defined in select statement 117 | select key, int(value) as f1 where f1 > 10 118 | select key, split(value) as f1 where 'a' in f1 119 | select key, value, l2_distance(list(1,2,3,4), json(value)) as l2_dis where key ^= 'embedding_json' & l2_dis > 0.6 order by l2_dis desc limit 5 120 | 121 | # Put data 122 | put ('k1', 'v1'), ('k2', upper('v' + key)) 123 | 124 | # Delete data by filter and limit delete rows 125 | delete where key ^= 'prefix' and value ~= '^val_' limit 10 126 | delete where key in ('k1', 'k2', 'k3') 127 | ``` 128 | 129 | 130 | ## How to use this library 131 | 132 | A full example: 133 | 134 | [https://github.com/c4pt0r/kvql/blob/master/examples/memkv/memkv.go](https://github.com/c4pt0r/kvql/blob/master/examples/memkv/memkv.go) 135 | 136 | 137 | To get better error report, you can conver the error to `QueryBinder` and set the origin query like below: 138 | 139 | ```golang 140 | ... 141 | opt := kvql.NewOptimizer(query) 142 | plan, err := opt.BuildPlan(storage) 143 | if err != nil { 144 | if qerr, ok := err.(kvql.QueryBinder); ok { 145 | qerr.BindQuery(query) 146 | } 147 | fmt.Printf("Error: %s\n", err.Error()) 148 | } 149 | ... 150 | ``` 151 | 152 | After bind the query to error it will output error result like: 153 | 154 | ``` 155 | padding query 156 | v-----vv--------------------------------------------v 157 | Error: select * where key ^= 'asdf' and val ^= 'test' < query line 158 | ^-- < error position 159 | Syntax Error: ^= operator with invalid left expression < error message 160 | ``` 161 | 162 | About padding: user can use `kvql.DefaultErrorPadding` to change the default left padding spaces. Or can use `kvql.QueryBinder.SetPadding` function to change specify error's padding. The default padding is 7 space characters (length of `Error: `). 163 | 164 | If you want to display the plan tree, like `EXPLAIN` statement in SQL, the `kvql.FinalPlan.Explain` function will return the plan tree in a string list, you can use below code to format the explain output: 165 | 166 | ```golang 167 | ... 168 | opt := kvql.NewOptimizer(query) 169 | plan, err := opt.BuildPlan(storage) 170 | if err != nil { 171 | fatal(err) 172 | } 173 | 174 | output := "" 175 | for i, plan := range plan.Explain() { 176 | padding := "" 177 | for x := 0; x < i*3; x++ { 178 | padding += " " 179 | } 180 | if i == 0 { 181 | output += fmt.Sprintf("%s%s\n", padding, plan) 182 | } else { 183 | output += fmt.Sprintf("%s`-%s\n", padding, plan) 184 | } 185 | } 186 | fmt.Println(output) 187 | ``` 188 | 189 | ## Operators and Functions 190 | 191 | ### Operators 192 | 193 | **Conparation operators** 194 | 195 | * `=`: bytes level equals 196 | * `!=`: bytes level not equals 197 | * `^=`: prefix match 198 | * `~=`: regexp match 199 | * `>`: number or string greater than 200 | * `>=`: number or string greater or equals than 201 | * `<`: number or string less than 202 | * `<=`: number or string less or equals than 203 | * `BETWEEN x AND y`: great or equals than `x` and less or equals than `y` 204 | * `IN (...)`: in list followed by `in` operator 205 | 206 | **Logical operators** 207 | 208 | * `&`, `AND`: logical and 209 | * `|`, `OR`: logical or 210 | * `!`: logical not 211 | 212 | **Math operators** 213 | 214 | * `+`: number add or string concate 215 | * `-`: number subtraction 216 | * `*`: number multiply 217 | * `/`: number division 218 | 219 | ### Scalar Functions 220 | 221 | | Function | Description | 222 | | -------- | ----------- | 223 | | lower(value: str): str | convert value string into lower case | 224 | | upper(value: str): str | convert value string into upper case | 225 | | int(value: any): int | convert value into integer, if cannot convert to integer just return error 226 | | float(value: any): float | convert value into float, if cannot convert to float just return error | 227 | | str(value: any): str | convert value into string | 228 | | strlen(value: any): int | convert value into string and then calculate string length | 229 | | is_int(value: any): bool | return is value can be converted into integer | 230 | | is_float(value: any): bool | return is value can be converted into float | 231 | | substr(value: str, start: int, end: int): str | return substring of value from `start` position to `end` position | 232 | | split(value: str, spliter: str): list | split value into a string list by spliter string | 233 | | list(elem1: any, elem2: any...): list | convert many elements into a list, list elements' type must be same, the list type support `int`, `str`, `float` types | 234 | | float_list(elem1: float, elem2: float...): list | convert many float elements into a list | 235 | | flist(elem1: float, elem2: float...): list | same as float_list | 236 | | int_list(elem1: int, elem2: int...): list | convert many integer elements into a list | 237 | | ilist(elem1: int, elem2: int...): list | same as int_list | 238 | | len(value: list): int | return value list length | 239 | | l2_distance(left: list, right: list): float | calculate l2 distance of two list | 240 | | cosine_distance(left: list, right: list): float | calculate cosine distance of two list | 241 | | json(value: str): json | parse string value into json type | 242 | | join(seperator: str, val1: any, val2: any...): str | join values by seperator | 243 | 244 | ### Aggregation Functions 245 | 246 | | Function | Description | 247 | | -------- | ----------- | 248 | | count(value: int): int | Count value by group | 249 | | sum(value: int): int | Sum value by group | 250 | | avg(value: int): int | Calculate average value by group | 251 | | min(value: int): int | Find the minimum value by group | 252 | | max(value: int): int | Find the maxmum value by group | 253 | | quantile(value: float, percent: float): float | Calculate the Quantile by group | 254 | | json_arrayagg(value: any): string | Aggregate all values into a JSON array | 255 | | group_concat(value: any, seperator: str): string | Join all values into a string by seperator | 256 | -------------------------------------------------------------------------------- /aggr_func.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "encoding/json" 5 | "strconv" 6 | "strings" 7 | 8 | "github.com/beorn7/perks/quantile" 9 | ) 10 | 11 | var ( 12 | _ AggrFunction = (*aggrCountFunc)(nil) 13 | _ AggrFunction = (*aggrSumFunc)(nil) 14 | _ AggrFunction = (*aggrAvgFunc)(nil) 15 | _ AggrFunction = (*aggrMinFunc)(nil) 16 | _ AggrFunction = (*aggrMaxFunc)(nil) 17 | _ AggrFunction = (*aggrQuantileFunc)(nil) 18 | _ AggrFunction = (*aggrJsonArrayAggFunc)(nil) 19 | _ AggrFunction = (*aggrGroupConcatFunc)(nil) 20 | ) 21 | 22 | func convertToNumber(value any) (int64, float64, bool) { 23 | switch val := value.(type) { 24 | case string: 25 | ival, err := strconv.ParseInt(val, 10, 64) 26 | if err == nil { 27 | return ival, float64(ival), false 28 | } 29 | fval, err := strconv.ParseFloat(val, 64) 30 | if err == nil { 31 | return int64(fval), fval, true 32 | } 33 | case []byte: 34 | ival, err := strconv.ParseInt(string(val), 10, 64) 35 | if err == nil { 36 | return ival, float64(ival), false 37 | } 38 | fval, err := strconv.ParseFloat(string(val), 64) 39 | if err == nil { 40 | return int64(fval), fval, true 41 | } 42 | case int8: 43 | return int64(val), float64(val), false 44 | case int16: 45 | return int64(val), float64(val), false 46 | case int: 47 | return int64(val), float64(val), false 48 | case int32: 49 | return int64(val), float64(val), false 50 | case int64: 51 | return val, float64(val), false 52 | case uint8: 53 | return int64(val), float64(val), false 54 | case uint16: 55 | return int64(val), float64(val), false 56 | case uint: 57 | return int64(val), float64(val), false 58 | case uint32: 59 | return int64(val), float64(val), false 60 | case uint64: 61 | return int64(val), float64(val), false 62 | case float32: 63 | return int64(val), float64(val), true 64 | case float64: 65 | return int64(val), val, true 66 | case bool: 67 | if val { 68 | return 1, 1.0, false 69 | } 70 | } 71 | return 0, 0.0, false 72 | } 73 | 74 | // Aggr Count 75 | type aggrCountFunc struct { 76 | args []Expression 77 | counter int64 78 | } 79 | 80 | func newAggrCountFunc(args []Expression) (AggrFunction, error) { 81 | return &aggrCountFunc{counter: 0}, nil 82 | } 83 | 84 | func (f *aggrCountFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 85 | f.counter++ 86 | return nil 87 | } 88 | 89 | func (f *aggrCountFunc) Complete() (any, error) { 90 | return f.counter, nil 91 | } 92 | 93 | func (f *aggrCountFunc) Clone() AggrFunction { 94 | ret, _ := newAggrCountFunc(f.args) 95 | return ret 96 | } 97 | 98 | // Aggr Sum 99 | type aggrSumFunc struct { 100 | args []Expression 101 | isum int64 102 | fsum float64 103 | isFloat bool 104 | } 105 | 106 | func newAggrSumFunc(args []Expression) (AggrFunction, error) { 107 | return &aggrSumFunc{ 108 | args: args, 109 | isum: 0, 110 | fsum: 0.0, 111 | isFloat: false, 112 | }, nil 113 | } 114 | 115 | func (f *aggrSumFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 116 | rarg, err := args[0].Execute(kv, ctx) 117 | if err != nil { 118 | return err 119 | } 120 | ival, fval, isFloat := convertToNumber(rarg) 121 | f.isum += ival 122 | f.fsum += fval 123 | if !f.isFloat && isFloat { 124 | f.isFloat = true 125 | } 126 | return nil 127 | } 128 | 129 | func (f *aggrSumFunc) Complete() (any, error) { 130 | if f.isFloat { 131 | return f.fsum, nil 132 | } 133 | return f.isum, nil 134 | } 135 | 136 | func (f *aggrSumFunc) Clone() AggrFunction { 137 | ret, _ := newAggrSumFunc(f.args) 138 | return ret 139 | } 140 | 141 | // Aggr Avg 142 | type aggrAvgFunc struct { 143 | args []Expression 144 | isum int64 145 | fsum float64 146 | count int64 147 | isFloat bool 148 | } 149 | 150 | func newAggrAvgFunc(args []Expression) (AggrFunction, error) { 151 | return &aggrAvgFunc{ 152 | args: args, 153 | isum: 0, 154 | fsum: 0.0, 155 | count: 0, 156 | isFloat: false, 157 | }, nil 158 | } 159 | 160 | func (f *aggrAvgFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 161 | rarg, err := args[0].Execute(kv, ctx) 162 | if err != nil { 163 | return err 164 | } 165 | ival, fval, isFloat := convertToNumber(rarg) 166 | f.isum += ival 167 | f.fsum += fval 168 | if !f.isFloat && isFloat { 169 | f.isFloat = true 170 | } 171 | f.count++ 172 | return nil 173 | } 174 | 175 | func (f *aggrAvgFunc) Complete() (any, error) { 176 | if f.isFloat { 177 | return f.fsum / float64(f.count), nil 178 | } 179 | return float64(f.isum) / float64(f.count), nil 180 | } 181 | 182 | func (f *aggrAvgFunc) Clone() AggrFunction { 183 | ret, _ := newAggrAvgFunc(f.args) 184 | return ret 185 | } 186 | 187 | // Aggr Min 188 | type aggrMinFunc struct { 189 | args []Expression 190 | imin int64 191 | fmin float64 192 | isFloat bool 193 | first bool 194 | } 195 | 196 | func newAggrMinFunc(args []Expression) (AggrFunction, error) { 197 | return &aggrMinFunc{ 198 | args: args, 199 | imin: 0, 200 | fmin: 0.0, 201 | isFloat: false, 202 | first: false, 203 | }, nil 204 | } 205 | 206 | func (f *aggrMinFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 207 | rarg, err := args[0].Execute(kv, ctx) 208 | if err != nil { 209 | return err 210 | } 211 | ival, fval, isFloat := convertToNumber(rarg) 212 | if !f.first { 213 | f.first = true 214 | f.imin = ival 215 | f.fmin = fval 216 | f.isFloat = isFloat 217 | return nil 218 | } 219 | if f.isFloat { 220 | if f.fmin > fval { 221 | f.imin = ival 222 | f.fmin = fval 223 | f.isFloat = isFloat 224 | } 225 | } else { 226 | if f.imin > ival { 227 | f.imin = ival 228 | f.fmin = fval 229 | f.isFloat = isFloat 230 | } 231 | } 232 | return nil 233 | } 234 | 235 | func (f *aggrMinFunc) Complete() (any, error) { 236 | if f.isFloat { 237 | return f.fmin, nil 238 | } 239 | return f.imin, nil 240 | } 241 | 242 | func (f *aggrMinFunc) Clone() AggrFunction { 243 | ret, _ := newAggrMinFunc(f.args) 244 | return ret 245 | } 246 | 247 | // Aggr Max 248 | type aggrMaxFunc struct { 249 | args []Expression 250 | imax int64 251 | fmax float64 252 | isFloat bool 253 | first bool 254 | } 255 | 256 | func newAggrMaxFunc(args []Expression) (AggrFunction, error) { 257 | return &aggrMaxFunc{ 258 | args: args, 259 | imax: 0, 260 | fmax: 0.0, 261 | isFloat: false, 262 | first: false, 263 | }, nil 264 | } 265 | 266 | func (f *aggrMaxFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 267 | rarg, err := args[0].Execute(kv, ctx) 268 | if err != nil { 269 | return err 270 | } 271 | ival, fval, isFloat := convertToNumber(rarg) 272 | if !f.first { 273 | f.first = true 274 | f.imax = ival 275 | f.fmax = fval 276 | f.isFloat = isFloat 277 | return nil 278 | } 279 | if f.isFloat { 280 | if f.fmax < fval { 281 | f.imax = ival 282 | f.fmax = fval 283 | f.isFloat = isFloat 284 | } 285 | } else { 286 | if f.imax < ival { 287 | f.imax = ival 288 | f.fmax = fval 289 | f.isFloat = isFloat 290 | } 291 | } 292 | return nil 293 | } 294 | 295 | func (f *aggrMaxFunc) Complete() (any, error) { 296 | if f.isFloat { 297 | return f.fmax, nil 298 | } 299 | return f.imax, nil 300 | } 301 | 302 | func (f *aggrMaxFunc) Clone() AggrFunction { 303 | ret, _ := newAggrMaxFunc(f.args) 304 | return ret 305 | } 306 | 307 | // Aggr Quantile 308 | type aggrQuantileFunc struct { 309 | args []Expression 310 | percent float64 311 | stream *quantile.Stream 312 | } 313 | 314 | func newAggrQuantileFunc(args []Expression) (AggrFunction, error) { 315 | if args[1].ReturnType() != TNUMBER { 316 | return nil, NewSyntaxError(args[1].GetPos(), "quantile function second parameter require number type") 317 | } 318 | 319 | pvar, err := args[1].Execute(NewKVP(nil, nil), nil) 320 | if err != nil { 321 | return nil, err 322 | } 323 | percent, ok := convertToFloat(pvar) 324 | if !ok { 325 | return nil, NewExecuteError(args[1].GetPos(), "quantile function second parameter type should be float") 326 | } 327 | if percent > 1.0 { 328 | return nil, NewExecuteError(args[1].GetPos(), "quantile function second parameter type should be less than 1") 329 | } 330 | stream := quantile.NewTargeted(map[float64]float64{ 331 | percent: 0.0001, 332 | }) 333 | return &aggrQuantileFunc{ 334 | percent: percent, 335 | stream: stream, 336 | }, nil 337 | } 338 | 339 | func (f *aggrQuantileFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 340 | rarg, err := args[0].Execute(kv, ctx) 341 | if err != nil { 342 | return err 343 | } 344 | _, fval, _ := convertToNumber(rarg) 345 | f.stream.Insert(fval) 346 | return nil 347 | } 348 | 349 | func (f *aggrQuantileFunc) Complete() (any, error) { 350 | ret := f.stream.Query(f.percent) 351 | return ret, nil 352 | } 353 | 354 | func (f *aggrQuantileFunc) Clone() AggrFunction { 355 | percent := f.percent 356 | return &aggrQuantileFunc{ 357 | args: f.args, 358 | percent: percent, 359 | stream: quantile.NewTargeted(map[float64]float64{ 360 | percent: 0.0001, 361 | }), 362 | } 363 | } 364 | 365 | // Aggr json_arrayagg 366 | type aggrJsonArrayAggFunc struct { 367 | args []Expression 368 | items []any 369 | } 370 | 371 | func newAggrJsonArrayAggFunc(args []Expression) (AggrFunction, error) { 372 | return &aggrJsonArrayAggFunc{ 373 | args: args, 374 | items: make([]any, 0, 10), 375 | }, nil 376 | } 377 | 378 | func (f *aggrJsonArrayAggFunc) Clone() AggrFunction { 379 | ret, _ := newAggrJsonArrayAggFunc(f.args) 380 | return ret 381 | } 382 | 383 | func (f *aggrJsonArrayAggFunc) Complete() (any, error) { 384 | ret, err := json.Marshal(f.items) 385 | if err != nil { 386 | return nil, err 387 | } 388 | return string(ret), nil 389 | } 390 | 391 | func (f *aggrJsonArrayAggFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 392 | rarg, err := args[0].Execute(kv, ctx) 393 | if err != nil { 394 | return err 395 | } 396 | switch val := rarg.(type) { 397 | case int8, int16, int, int32, int64, 398 | uint8, uint16, uint, uint32, uint64: 399 | f.items = append(f.items, val) 400 | case float32, float64: 401 | f.items = append(f.items, val) 402 | case []byte: 403 | f.items = append(f.items, string(val)) 404 | case bool: 405 | f.items = append(f.items, val) 406 | default: 407 | f.items = append(f.items, toString(val)) 408 | } 409 | return nil 410 | } 411 | 412 | // Aggr group_concat 413 | type aggrGroupConcatFunc struct { 414 | args []Expression 415 | sep string 416 | items []string 417 | } 418 | 419 | func newAggrGroupConcatFunc(args []Expression) (AggrFunction, error) { 420 | if args[1].ReturnType() != TSTR { 421 | return nil, NewSyntaxError(args[1].GetPos(), "group concat second parameter require string type") 422 | } 423 | svar, err := args[1].Execute(NewKVP(nil, nil), nil) 424 | if err != nil { 425 | return nil, err 426 | } 427 | return &aggrGroupConcatFunc{ 428 | args: args, 429 | sep: toString(svar), 430 | items: make([]string, 0, 10), 431 | }, nil 432 | } 433 | 434 | func (f *aggrGroupConcatFunc) Clone() AggrFunction { 435 | return &aggrGroupConcatFunc{ 436 | args: f.args, 437 | sep: f.sep, 438 | items: make([]string, 0, 10), 439 | } 440 | } 441 | 442 | func (f *aggrGroupConcatFunc) Complete() (any, error) { 443 | return strings.Join(f.items, f.sep), nil 444 | } 445 | 446 | func (f *aggrGroupConcatFunc) Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error { 447 | rarg, err := args[0].Execute(kv, ctx) 448 | if err != nil { 449 | return err 450 | } 451 | sval := toString(rarg) 452 | f.items = append(f.items, sval) 453 | return nil 454 | } 455 | -------------------------------------------------------------------------------- /checker.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | func (e *BinaryOpExpr) Check(ctx *CheckCtx) error { 4 | if err := e.Left.Check(ctx); err != nil { 5 | return err 6 | } 7 | if err := e.Right.Check(ctx); err != nil { 8 | return err 9 | } 10 | e.tryRewriteExpr(ctx) 11 | switch e.Op { 12 | case And, Or: 13 | return e.checkWithAndOr(ctx) 14 | case Not: 15 | return NewSyntaxError(e.GetPos(), "Invalid operator !") 16 | case Add, Sub, Mul, Div: 17 | return e.checkWithMath(ctx) 18 | case In: 19 | return e.checkWithIn(ctx) 20 | case Between: 21 | return e.checkWithBetween(ctx) 22 | default: 23 | return e.checkWithCompares(ctx) 24 | } 25 | } 26 | 27 | func (e *BinaryOpExpr) tryRewriteExpr(ctx *CheckCtx) { 28 | switch lexp := e.Left.(type) { 29 | case *NameExpr: 30 | if nexpr, have := ctx.GetNamedExpr(lexp.Data); have { 31 | e.Left = &FieldReferenceExpr{ 32 | Name: lexp, 33 | FieldExpr: nexpr, 34 | } 35 | } 36 | } 37 | switch rexp := e.Right.(type) { 38 | case *NameExpr: 39 | if nexpr, have := ctx.GetNamedExpr(rexp.Data); have { 40 | e.Right = &FieldReferenceExpr{ 41 | Name: rexp, 42 | FieldExpr: nexpr, 43 | } 44 | } 45 | } 46 | } 47 | 48 | func (e *BinaryOpExpr) checkWithAndOr(ctx *CheckCtx) error { 49 | op := OperatorToString[e.Op] 50 | switch exp := e.Left.(type) { 51 | case *BinaryOpExpr, *FunctionCallExpr, *NotExpr, *FieldReferenceExpr: 52 | if e.Left.ReturnType() != TBOOL { 53 | return NewSyntaxError(e.Left.GetPos(), "%s operator has wrong type of left expression %s", op, exp) 54 | } 55 | default: 56 | return NewSyntaxError(e.Left.GetPos(), "%s operator with invalid left expression %s", op, exp) 57 | } 58 | 59 | switch exp := e.Right.(type) { 60 | case *BinaryOpExpr, *FunctionCallExpr, *NotExpr, *FieldReferenceExpr: 61 | if exp.ReturnType() != TBOOL { 62 | return NewSyntaxError(e.Right.GetPos(), "%s operator has wrong type of right expression %s", op, exp) 63 | } 64 | default: 65 | return NewSyntaxError(e.Right.GetPos(), "%s operator with invalid right expression %s", op, exp) 66 | } 67 | return nil 68 | } 69 | 70 | func (e *BinaryOpExpr) checkWithMath(ctx *CheckCtx) error { 71 | op := OperatorToString[e.Op] 72 | lstring := false 73 | rstring := false 74 | switch exp := e.Left.(type) { 75 | case *BinaryOpExpr, *FunctionCallExpr, *NumberExpr, *FloatExpr, *FieldReferenceExpr: 76 | if e.Left.ReturnType() != TNUMBER { 77 | if e.Left.ReturnType() == TSTR { 78 | lstring = true 79 | } else { 80 | return NewSyntaxError(e.Left.GetPos(), "%s operator has wrong type of left expression %s", op, exp) 81 | } 82 | } 83 | case *StringExpr, *FieldExpr, *FieldAccessExpr: 84 | lstring = true 85 | default: 86 | return NewSyntaxError(e.Left.GetPos(), "%s operator with invalid left expression %s", op, exp) 87 | } 88 | 89 | switch exp := e.Right.(type) { 90 | case *BinaryOpExpr, *FunctionCallExpr, *NumberExpr, *FloatExpr, *FieldReferenceExpr: 91 | if e.Right.ReturnType() != TNUMBER { 92 | if e.Right.ReturnType() == TSTR { 93 | rstring = true 94 | } else { 95 | return NewSyntaxError(e.Right.GetPos(), "%s operator has wrong type of right expression %s", op, exp) 96 | } 97 | } 98 | case *StringExpr, *FieldExpr, *FieldAccessExpr: 99 | rstring = true 100 | default: 101 | return NewSyntaxError(e.Right.GetPos(), "%s operator with invalid right expression %s", op, exp) 102 | } 103 | 104 | if op == "+" && lstring && rstring { 105 | } else { 106 | if lstring { 107 | return NewSyntaxError(e.Left.GetPos(), "%s operator with invalid left expression %s", op, e.Left) 108 | } 109 | if rstring { 110 | return NewSyntaxError(e.Right.GetPos(), "%s operator with invalid right expression %s", op, e.Left) 111 | } 112 | } 113 | if op == "/" { 114 | switch rval := e.Right.(type) { 115 | case *NumberExpr: 116 | if rval.Int == 0 { 117 | return NewSyntaxError(e.Right.GetPos(), "/ operator divide by zero") 118 | } 119 | case *FloatExpr: 120 | if rval.Float == 0.0 { 121 | return NewSyntaxError(e.Right.GetPos(), "/ operator divide by zero") 122 | } 123 | } 124 | } 125 | return nil 126 | } 127 | 128 | func (e *BinaryOpExpr) checkWithCompares(ctx *CheckCtx) error { 129 | var ( 130 | numKeyFieldExpr = 0 131 | numValueFieldExpr = 0 132 | numCallExpr = 0 133 | ) 134 | op := OperatorToString[e.Op] 135 | 136 | switch exp := e.Left.(type) { 137 | case *FieldExpr: 138 | switch exp.Field { 139 | case KeyKW: 140 | numKeyFieldExpr++ 141 | case ValueKW: 142 | numValueFieldExpr++ 143 | } 144 | case *FunctionCallExpr, *FieldReferenceExpr: 145 | numCallExpr++ 146 | case *StringExpr, *BoolExpr, *NumberExpr, *FloatExpr, *BinaryOpExpr, *FieldAccessExpr: 147 | default: 148 | return NewSyntaxError(e.Left.GetPos(), "%s operator with invalid left expression", op) 149 | } 150 | 151 | switch exp := e.Right.(type) { 152 | case *FieldExpr: 153 | switch exp.Field { 154 | case KeyKW: 155 | numKeyFieldExpr++ 156 | case ValueKW: 157 | numValueFieldExpr++ 158 | } 159 | case *FunctionCallExpr, *FieldReferenceExpr: 160 | numCallExpr++ 161 | case *StringExpr, *BoolExpr, *NumberExpr, *FloatExpr, *BinaryOpExpr, *FieldAccessExpr: 162 | default: 163 | return NewSyntaxError(e.Right.GetPos(), "%s operator with invalid right expression", op) 164 | } 165 | 166 | if numKeyFieldExpr == 2 || numValueFieldExpr == 2 { 167 | return NewSyntaxError(e.GetPos(), "%s operator with two same field", op) 168 | } 169 | 170 | ltype := e.Left.ReturnType() 171 | rtype := e.Right.ReturnType() 172 | if ltype != rtype { 173 | return NewSyntaxError(e.GetPos(), "%s operator left and right type not same", op) 174 | } 175 | switch e.Op { 176 | case Gt, Gte, Lt, Lte: 177 | if ltype != TNUMBER && ltype != TSTR { 178 | return NewSyntaxError(e.Left.GetPos(), "%s operator has wrong type of left expression", op) 179 | } 180 | case PrefixMatch, RegExpMatch: 181 | if ltype != TSTR { 182 | return NewSyntaxError(e.Left.GetPos(), "%s operator has wrong type of left expression", op) 183 | } 184 | } 185 | return nil 186 | } 187 | 188 | func (e *BinaryOpExpr) checkWithIn(ctx *CheckCtx) error { 189 | ltype := e.Left.ReturnType() 190 | switch r := e.Right.(type) { 191 | case *ListExpr: 192 | for _, expr := range r.List { 193 | if expr.ReturnType() != ltype { 194 | return NewSyntaxError(expr.GetPos(), "in operator element has wrong type") 195 | } 196 | } 197 | case *FunctionCallExpr, *FieldReferenceExpr: 198 | if r.ReturnType() != TLIST { 199 | return NewSyntaxError(r.GetPos(), "in operator element has wrong type") 200 | } 201 | default: 202 | return NewSyntaxError(e.Right.GetPos(), "in operator right expression must be list expression") 203 | } 204 | return nil 205 | } 206 | 207 | func (e *BinaryOpExpr) checkWithBetween(ctx *CheckCtx) error { 208 | ltype := e.Left.ReturnType() 209 | rlist, ok := e.Right.(*ListExpr) 210 | if !ok || len(rlist.List) != 2 { 211 | return NewSyntaxError(e.Right.GetPos(), "between operator invalid right expression") 212 | } 213 | 214 | switch ltype { 215 | case TSTR, TNUMBER: 216 | default: 217 | return NewSyntaxError(e.Left.GetPos(), "between operator only support string and number type") 218 | } 219 | 220 | lexpr := rlist.List[0] 221 | uexpr := rlist.List[1] 222 | if lexpr.ReturnType() != ltype || uexpr.ReturnType() != ltype { 223 | return NewSyntaxError(e.Right.GetPos(), "between operator right expression with wrong type") 224 | } 225 | return nil 226 | } 227 | 228 | func (e *FieldExpr) Check(ctx *CheckCtx) error { 229 | if e.Field == KeyKW && ctx.NotAllowKey { 230 | return NewSyntaxError(e.Pos, "not allow key keyword in expression") 231 | } 232 | if e.Field == ValueKW && ctx.NotAllowValue { 233 | return NewSyntaxError(e.Pos, "not allow value keyword in expression") 234 | } 235 | return nil 236 | } 237 | 238 | func (e *StringExpr) Check(ctx *CheckCtx) error { 239 | return nil 240 | } 241 | 242 | func (e *NotExpr) Check(ctx *CheckCtx) error { 243 | if e.Right.ReturnType() != TBOOL { 244 | return NewSyntaxError(e.Right.GetPos(), "! operator right expression has wrong type") 245 | } 246 | return nil 247 | } 248 | 249 | func (e *FunctionCallExpr) Check(ctx *CheckCtx) error { 250 | _, ok := e.Name.(*NameExpr) 251 | if !ok { 252 | return NewSyntaxError(e.Name.GetPos(), "Invalid function name") 253 | } 254 | if len(e.Args) > 0 { 255 | for i, a := range e.Args { 256 | a = e.tryRewriteExpr(i, ctx) 257 | if err := a.Check(ctx); err != nil { 258 | return err 259 | } 260 | } 261 | } 262 | return nil 263 | } 264 | 265 | func (e *FunctionCallExpr) tryRewriteExpr(idx int, ctx *CheckCtx) Expression { 266 | ret := e.Args[idx] 267 | switch aexp := ret.(type) { 268 | case *NameExpr: 269 | if nexpr, have := ctx.GetNamedExpr(aexp.Data); have { 270 | narg := &FieldReferenceExpr{ 271 | Name: aexp, 272 | FieldExpr: nexpr, 273 | } 274 | e.Args[idx] = narg 275 | return narg 276 | } 277 | } 278 | return ret 279 | } 280 | 281 | func (e *NameExpr) Check(ctx *CheckCtx) error { 282 | return nil 283 | } 284 | 285 | func (e *FloatExpr) Check(ctx *CheckCtx) error { 286 | return nil 287 | } 288 | 289 | func (e *NumberExpr) Check(ctx *CheckCtx) error { 290 | return nil 291 | } 292 | 293 | func (e *BoolExpr) Check(ctx *CheckCtx) error { 294 | return nil 295 | } 296 | 297 | func (e *ListExpr) Check(ctx *CheckCtx) error { 298 | if len(e.List) == 0 { 299 | return NewSyntaxError(e.GetPos(), "Empty list") 300 | } 301 | if len(e.List) > 1 { 302 | ftype := e.List[0].ReturnType() 303 | for i, item := range e.List[1:] { 304 | if item.ReturnType() != ftype { 305 | return NewSyntaxError(item.GetPos(), "List %d item has wrong type", i) 306 | } 307 | } 308 | } 309 | return nil 310 | } 311 | 312 | func (e *FieldAccessExpr) Check(ctx *CheckCtx) error { 313 | _, leftIsFAE := e.Left.(*FieldAccessExpr) 314 | lrType := e.Left.ReturnType() 315 | switch lrType { 316 | case TJSON, TLIST: 317 | default: 318 | if leftIsFAE { 319 | // Support cascade field access such as: 320 | // json(value)['x']['y'] 321 | return nil 322 | } 323 | return NewSyntaxError(e.Left.GetPos(), "Field access expression left require JSON or List type") 324 | } 325 | switch e.FieldName.(type) { 326 | case *StringExpr: 327 | if lrType == TJSON { 328 | return nil 329 | } else if leftIsFAE { 330 | // Support cascade array index access such as: 331 | // json(value)['list'][1] 332 | return nil 333 | } 334 | case *NumberExpr: 335 | if lrType == TLIST { 336 | return nil 337 | } else if leftIsFAE { 338 | // Support cascade array index access such as: 339 | // json(value)['list'][1] 340 | return nil 341 | } 342 | } 343 | return NewSyntaxError(e.FieldName.GetPos(), "Invalid field name") 344 | } 345 | 346 | func (e *FieldReferenceExpr) Check(ctx *CheckCtx) error { 347 | return nil 348 | } 349 | -------------------------------------------------------------------------------- /delete_plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import "fmt" 4 | 5 | type DeletePlan struct { 6 | Storage Storage 7 | ChildPlan Plan 8 | executed bool 9 | } 10 | 11 | func (p *DeletePlan) Init() error { 12 | p.executed = false 13 | return p.ChildPlan.Init() 14 | } 15 | 16 | func (p *DeletePlan) String() string { 17 | return fmt.Sprintf("DeletePlan{}") 18 | } 19 | 20 | func (p *DeletePlan) Explain() []string { 21 | ret := []string{p.String()} 22 | for _, plan := range p.ChildPlan.Explain() { 23 | ret = append(ret, plan) 24 | } 25 | return ret 26 | } 27 | 28 | func (p *DeletePlan) FieldNameList() []string { 29 | return []string{"Rows"} 30 | } 31 | 32 | func (p *DeletePlan) FieldTypeList() []Type { 33 | return []Type{TNUMBER} 34 | } 35 | 36 | func (p *DeletePlan) Next(ctx *ExecuteCtx) ([]Column, error) { 37 | if !p.executed { 38 | n, err := p.execute(ctx) 39 | p.executed = true 40 | return []Column{n}, err 41 | } 42 | return nil, nil 43 | } 44 | 45 | func (p *DeletePlan) Batch(ctx *ExecuteCtx) ([][]Column, error) { 46 | if !p.executed { 47 | n, err := p.execute(ctx) 48 | p.executed = true 49 | row := []Column{n} 50 | return [][]Column{row}, err 51 | } 52 | return nil, nil 53 | } 54 | 55 | func (p *DeletePlan) execute(ctx *ExecuteCtx) (int, error) { 56 | count := 0 57 | for { 58 | ctx.Clear() 59 | rows, err := p.ChildPlan.Batch(ctx) 60 | if err != nil { 61 | return count, err 62 | } 63 | nrows := len(rows) 64 | if nrows == 0 { 65 | return count, nil 66 | } 67 | keys := make([][]byte, nrows) 68 | for i, kv := range rows { 69 | keys[i] = kv.Key 70 | } 71 | err = p.Storage.BatchDelete(keys) 72 | if err != nil { 73 | return count, err 74 | } 75 | count += nrows 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | var ( 9 | _ error = (*SyntaxError)(nil) 10 | _ error = (*ExecuteError)(nil) 11 | _ QueryBinder = (*SyntaxError)(nil) 12 | _ QueryBinder = (*ExecuteError)(nil) 13 | 14 | DefaultErrorPadding = 7 15 | ) 16 | 17 | type QueryBinder interface { 18 | BindQuery(query string) 19 | SetPadding(pad int) 20 | } 21 | 22 | type SyntaxError struct { 23 | Query string 24 | Message string 25 | Pos int 26 | Padding int 27 | } 28 | 29 | func NewSyntaxError(pos int, msg string, args ...any) error { 30 | return &SyntaxError{ 31 | Message: fmt.Sprintf(msg, args...), 32 | Pos: pos, 33 | Padding: DefaultErrorPadding, 34 | } 35 | } 36 | 37 | func (e *SyntaxError) BindQuery(query string) { 38 | e.Query = query 39 | } 40 | 41 | func (e *SyntaxError) SetPadding(pad int) { 42 | e.Padding = pad 43 | } 44 | 45 | func (e *SyntaxError) Error() string { 46 | if e.Query == "" { 47 | return e.simpleError() 48 | } 49 | return e.queryError() 50 | } 51 | 52 | func (e *SyntaxError) queryError() string { 53 | ret := outputQueryAndErrPos(e.Query, e.Pos, e.Padding) 54 | pad := generatePads(e.Padding) 55 | ret += fmt.Sprintf("%sSyntax Error: %s", pad, e.Message) 56 | return ret 57 | } 58 | 59 | func (e *SyntaxError) simpleError() string { 60 | return fmt.Sprintf("Syntax Error: %s at %d", e.Message, e.Pos) 61 | } 62 | 63 | type ExecuteError struct { 64 | Query string 65 | Message string 66 | Pos int 67 | Padding int 68 | } 69 | 70 | func NewExecuteError(pos int, msg string, args ...any) error { 71 | return &ExecuteError{ 72 | Pos: pos, 73 | Message: fmt.Sprintf(msg, args...), 74 | Padding: DefaultErrorPadding, 75 | } 76 | } 77 | 78 | func (e *ExecuteError) BindQuery(query string) { 79 | e.Query = query 80 | } 81 | 82 | func (e *ExecuteError) SetPadding(pad int) { 83 | e.Padding = pad 84 | } 85 | 86 | func (e *ExecuteError) Error() string { 87 | if e.Query == "" { 88 | return e.simpleError() 89 | } 90 | return e.queryError() 91 | } 92 | 93 | func (e *ExecuteError) simpleError() string { 94 | return fmt.Sprintf("Execute Error: %s at %d", e.Message, e.Pos) 95 | } 96 | 97 | func (e *ExecuteError) queryError() string { 98 | ret := outputQueryAndErrPos(e.Query, e.Pos, e.Padding) 99 | pad := generatePads(e.Padding) 100 | ret += fmt.Sprintf("%sExecute Error: %s", pad, e.Message) 101 | return ret 102 | } 103 | 104 | func generatePads(pad int) string { 105 | ret := "" 106 | for i := 0; i < pad; i++ { 107 | ret += " " 108 | } 109 | return ret 110 | } 111 | 112 | func outputQueryAndErrPos(query string, pos int, adjust int) string { 113 | tquery := strings.TrimSpace(query) 114 | qlen := len(tquery) 115 | if pos == -1 { 116 | pos = qlen 117 | } 118 | trimLeft := false 119 | trimRight := false 120 | if qlen > 70 { 121 | if pos <= 35 { 122 | tquery = tquery[0:70] 123 | trimRight = true 124 | } else { 125 | trimLeft = true 126 | trim := pos - 35 127 | restLen := qlen - trim 128 | if restLen > 70 { 129 | restLen = 70 130 | trimRight = true 131 | } 132 | tquery = tquery[trim : trim+restLen] 133 | pos -= trim 134 | } 135 | } 136 | ret := "" 137 | errPos := pos + adjust 138 | if trimLeft { 139 | ret = "... " 140 | errPos += 4 141 | } 142 | ret += tquery 143 | if trimRight { 144 | ret += " ..." 145 | } 146 | ret += "\n" 147 | for i := 0; i < errPos; i++ { 148 | ret += " " 149 | } 150 | ret += "^--\n" 151 | return ret 152 | } 153 | -------------------------------------------------------------------------------- /examples/memkv/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/c4pt0r/kvql/memkv 2 | 3 | go 1.21.1 4 | 5 | require github.com/c4pt0r/kvql v0.0.0-20240506034307-5d9245a7865c 6 | 7 | require github.com/beorn7/perks v1.0.1 // indirect 8 | 9 | replace github.com/c4pt0r/kvql => ../../ 10 | -------------------------------------------------------------------------------- /examples/memkv/go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 | -------------------------------------------------------------------------------- /examples/memkv/memkv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "sort" 8 | "strings" 9 | "sync" 10 | 11 | "github.com/c4pt0r/kvql" 12 | ) 13 | 14 | type MemKV struct { 15 | data map[string][]byte 16 | // should use a sorted map or tree to maintain order 17 | // but for simplicity, we use a slice to maintain order 18 | orderedKeys []string 19 | mu sync.RWMutex 20 | } 21 | 22 | type MemKVCursor struct { 23 | keys []string 24 | index int 25 | data map[string][]byte 26 | } 27 | 28 | func NewMemKV() *MemKV { 29 | return &MemKV{ 30 | data: make(map[string][]byte), 31 | orderedKeys: make([]string, 0), 32 | } 33 | } 34 | 35 | var _ kvql.Storage = (*MemKV)(nil) 36 | var _ kvql.Cursor = (*MemKVCursor)(nil) 37 | 38 | func (m *MemKV) Get(key []byte) (value []byte, err error) { 39 | m.mu.RLock() 40 | defer m.mu.RUnlock() 41 | value, ok := m.data[string(key)] 42 | if !ok { 43 | return nil, nil // Return nil if the key does not exist 44 | } 45 | return value, nil 46 | } 47 | 48 | func (m *MemKV) Put(key []byte, value []byte) error { 49 | m.mu.Lock() 50 | defer m.mu.Unlock() 51 | strKey := string(key) 52 | if _, exists := m.data[strKey]; !exists { 53 | m.orderedKeys = append(m.orderedKeys, strKey) 54 | sort.Strings(m.orderedKeys) // Maintain order after insertion 55 | } 56 | m.data[strKey] = value 57 | return nil 58 | } 59 | 60 | func (m *MemKV) Delete(key []byte) error { 61 | m.mu.Lock() 62 | defer m.mu.Unlock() 63 | strKey := string(key) 64 | if _, exists := m.data[strKey]; exists { 65 | delete(m.data, strKey) 66 | i := sort.SearchStrings(m.orderedKeys, strKey) 67 | m.orderedKeys = append(m.orderedKeys[:i], m.orderedKeys[i+1:]...) 68 | } 69 | return nil 70 | } 71 | 72 | func (m *MemKV) BatchPut(kvs []kvql.KVPair) error { 73 | for _, kv := range kvs { 74 | m.Put(kv.Key, kv.Value) 75 | } 76 | return nil 77 | } 78 | 79 | func (m *MemKV) BatchDelete(keys [][]byte) error { 80 | for _, key := range keys { 81 | m.Delete(key) 82 | } 83 | return nil 84 | } 85 | 86 | func (m *MemKV) Cursor() (cursor kvql.Cursor, err error) { 87 | m.mu.RLock() 88 | defer m.mu.RUnlock() 89 | return &MemKVCursor{data: m.data, keys: m.orderedKeys, index: -1}, nil 90 | } 91 | 92 | func (c *MemKVCursor) Seek(prefix []byte) error { 93 | c.index = sort.SearchStrings(c.keys, string(prefix)) 94 | if c.index < len(c.keys) && strings.HasPrefix(c.keys[c.index], string(prefix)) { 95 | return nil 96 | } 97 | c.index = len(c.keys) 98 | return nil 99 | } 100 | 101 | func (c *MemKVCursor) Next() (key []byte, value []byte, err error) { 102 | if c.index < 0 || c.index >= len(c.keys) { 103 | return nil, nil, nil 104 | } 105 | keyStr := c.keys[c.index] 106 | value = c.data[keyStr] 107 | c.index++ 108 | return []byte(keyStr), value, nil 109 | } 110 | 111 | func (c *MemKVCursor) Close() error { 112 | // No resources to release in this simple cursor 113 | return nil 114 | } 115 | 116 | func repl(storage kvql.Storage) { 117 | buf := bufio.NewReader(os.Stdin) 118 | for { 119 | fmt.Print("kvql> ") 120 | query, err := buf.ReadString('\n') 121 | if err != nil { 122 | fmt.Println("Error reading input:", err) 123 | continue 124 | } 125 | query = strings.TrimSpace(query) 126 | 127 | opt := kvql.NewOptimizer(query) 128 | plan, err := opt.BuildPlan(storage) 129 | if err != nil { 130 | fmt.Println("Error building plan:", err) 131 | continue 132 | } 133 | 134 | execCtx := kvql.NewExecuteCtx() 135 | for { 136 | rows, err := plan.Batch(execCtx) 137 | if err != nil { 138 | fmt.Println("Error executing plan:", err) 139 | break 140 | } 141 | if len(rows) == 0 { 142 | break 143 | } 144 | execCtx.Clear() 145 | for _, row := range rows { 146 | for _, col := range row { 147 | switch col := col.(type) { 148 | case int, int32, int64: 149 | fmt.Printf("%d ", col) 150 | case []byte: 151 | fmt.Printf("%s ", string(col)) 152 | default: 153 | fmt.Printf("%v ", col) 154 | } 155 | } 156 | fmt.Println() 157 | } 158 | } 159 | } 160 | } 161 | 162 | func main() { 163 | kv := NewMemKV() 164 | // put some test data 165 | kv.Put([]byte("a"), []byte("1")) 166 | kv.Put([]byte("a1"), []byte("2")) 167 | kv.Put([]byte("a2"), []byte("3")) 168 | kv.Put([]byte("a3"), []byte("4")) 169 | kv.Put([]byte("b"), []byte("2")) 170 | kv.Put([]byte("c"), []byte("3")) 171 | 172 | repl(kv) 173 | } 174 | -------------------------------------------------------------------------------- /expression.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | /* 10 | Query Examples: 11 | query 'where key ^= "test"' // key prefix match 12 | query 'where key ~= "[regexp]"' // key regexp match 13 | query 'where value ^= "test"' // value prefix match 14 | query 'where value ~= "[regexp]"' // value regexp match 15 | */ 16 | 17 | type KVKeyword byte 18 | type Operator byte 19 | type Type byte 20 | 21 | const ( 22 | KeyKW KVKeyword = 1 23 | ValueKW KVKeyword = 2 24 | 25 | Unknown Operator = 0 26 | And Operator = 1 27 | Or Operator = 2 28 | Not Operator = 3 29 | Eq Operator = 4 30 | NotEq Operator = 5 31 | PrefixMatch Operator = 6 32 | RegExpMatch Operator = 7 33 | Add Operator = 8 34 | Sub Operator = 9 35 | Mul Operator = 10 36 | Div Operator = 11 37 | Gt Operator = 12 38 | Gte Operator = 13 39 | Lt Operator = 14 40 | Lte Operator = 15 41 | In Operator = 16 42 | Between Operator = 17 43 | KWAnd Operator = 18 44 | KWOr Operator = 19 45 | 46 | TUNKNOWN Type = 0 47 | TBOOL Type = 1 48 | TSTR Type = 2 49 | TNUMBER Type = 3 50 | TIDENT Type = 4 51 | TLIST Type = 5 52 | TJSON Type = 6 53 | ) 54 | 55 | var ( 56 | KVKeywordToString = map[KVKeyword]string{ 57 | KeyKW: "KEY", 58 | ValueKW: "VALUE", 59 | } 60 | 61 | OperatorToString = map[Operator]string{ 62 | Eq: "=", 63 | NotEq: "!=", 64 | And: "&", 65 | Or: "|", 66 | Not: "!", 67 | PrefixMatch: "^=", 68 | RegExpMatch: "~=", 69 | Add: "+", 70 | Sub: "-", 71 | Mul: "*", 72 | Div: "/", 73 | Gt: ">", 74 | Gte: ">=", 75 | Lt: "<", 76 | Lte: "<=", 77 | In: "in", 78 | Between: "between", 79 | KWAnd: "and", 80 | KWOr: "or", 81 | } 82 | 83 | StringToOperator = map[string]Operator{ 84 | "=": Eq, 85 | "&": And, 86 | "|": Or, 87 | "!": Not, 88 | "^=": PrefixMatch, 89 | "~=": RegExpMatch, 90 | "!=": NotEq, 91 | "+": Add, 92 | "-": Sub, 93 | "*": Mul, 94 | "/": Div, 95 | ">": Gt, 96 | ">=": Gte, 97 | "<": Lt, 98 | "<=": Lte, 99 | "in": In, 100 | "between": Between, 101 | "and": KWAnd, 102 | "or": KWOr, 103 | } 104 | ) 105 | 106 | func BuildOp(pos int, op string) (Operator, error) { 107 | ret, have := StringToOperator[op] 108 | if !have { 109 | return Unknown, NewSyntaxError(pos, "Unknown operator") 110 | } 111 | return ret, nil 112 | } 113 | 114 | /* 115 | query: where key ^= "test" & value ~= "test" 116 | WhereStmt { 117 | Expr: BinaryOpExpr { 118 | Op: "&", 119 | Left: BinaryOpExpr { 120 | Op: "^=", 121 | Left: FieldExpr{Field: KEY}, 122 | Right: StringExpr{Data: "test"}, 123 | }, 124 | Right: BinaryOpExpr { 125 | Op: "~=", 126 | Left: FieldExpr{Field: VALUE}, 127 | Right: StringExpr{Data: "test"}, 128 | } 129 | }, 130 | } 131 | */ 132 | 133 | var ( 134 | _ Expression = (*BinaryOpExpr)(nil) 135 | _ Expression = (*FieldExpr)(nil) 136 | _ Expression = (*FieldReferenceExpr)(nil) 137 | _ Expression = (*StringExpr)(nil) 138 | _ Expression = (*NotExpr)(nil) 139 | _ Expression = (*FunctionCallExpr)(nil) 140 | _ Expression = (*NameExpr)(nil) 141 | _ Expression = (*NumberExpr)(nil) 142 | _ Expression = (*FloatExpr)(nil) 143 | _ Expression = (*BoolExpr)(nil) 144 | _ Expression = (*ListExpr)(nil) 145 | _ Expression = (*FieldAccessExpr)(nil) 146 | ) 147 | 148 | type CheckCtx struct { 149 | Fields []Expression 150 | FieldNames []string 151 | FieldTypes []Type 152 | NotAllowKey bool 153 | NotAllowValue bool 154 | } 155 | 156 | func (c *CheckCtx) GetNamedExpr(name string) (Expression, bool) { 157 | for i, fname := range c.FieldNames { 158 | if fname == name { 159 | if len(c.Fields) > i { 160 | return c.Fields[i], true 161 | } 162 | } 163 | } 164 | return nil, false 165 | } 166 | 167 | type WalkCallback func(e Expression) bool 168 | 169 | type Expression interface { 170 | Check(ctx *CheckCtx) error 171 | String() string 172 | Execute(kv KVPair, ctx *ExecuteCtx) (any, error) 173 | ExecuteBatch(chunk []KVPair, ctx *ExecuteCtx) ([]any, error) 174 | ReturnType() Type 175 | GetPos() int 176 | Walk(cb WalkCallback) 177 | } 178 | 179 | type BinaryOpExpr struct { 180 | Pos int 181 | Op Operator 182 | Left Expression 183 | Right Expression 184 | } 185 | 186 | func (e *BinaryOpExpr) String() string { 187 | op := OperatorToString[e.Op] 188 | switch op { 189 | case "between": 190 | list, ok := e.Right.(*ListExpr) 191 | if !ok || len(list.List) != 2 { 192 | return fmt.Sprintf("(%s %s %s)", e.Left.String(), op, e.Right.String()) 193 | } 194 | return fmt.Sprintf("(%s BETWEEN %s AND %s)", e.Left.String(), list.List[0].String(), list.List[1].String()) 195 | default: 196 | return fmt.Sprintf("(%s %s %s)", e.Left.String(), op, e.Right.String()) 197 | } 198 | } 199 | 200 | func (e *BinaryOpExpr) GetPos() int { 201 | return e.Pos 202 | } 203 | 204 | func (e *BinaryOpExpr) ReturnType() Type { 205 | switch e.Op { 206 | case And, Or, Not, Eq, NotEq, PrefixMatch, RegExpMatch, Gt, Gte, Lt, Lte, In, Between, KWAnd, KWOr: 207 | return TBOOL 208 | case Sub, Mul, Div: 209 | return TNUMBER 210 | case Add: 211 | if e.Left.ReturnType() == TSTR { 212 | return TSTR 213 | } 214 | return TNUMBER 215 | } 216 | return TUNKNOWN 217 | } 218 | 219 | type FieldExpr struct { 220 | Pos int 221 | Field KVKeyword 222 | } 223 | 224 | func (e *FieldExpr) String() string { 225 | return fmt.Sprintf("%s", KVKeywordToString[e.Field]) 226 | } 227 | 228 | func (e *FieldExpr) ReturnType() Type { 229 | return TSTR 230 | } 231 | 232 | func (e *FieldExpr) GetPos() int { 233 | return e.Pos 234 | } 235 | 236 | type StringExpr struct { 237 | Pos int 238 | Data string 239 | } 240 | 241 | func (e *StringExpr) String() string { 242 | return fmt.Sprintf("'%s'", e.Data) 243 | } 244 | 245 | func (e *StringExpr) ReturnType() Type { 246 | return TSTR 247 | } 248 | 249 | func (e *StringExpr) GetPos() int { 250 | return e.Pos 251 | } 252 | 253 | type NotExpr struct { 254 | Pos int 255 | Right Expression 256 | } 257 | 258 | func (e *NotExpr) String() string { 259 | return fmt.Sprintf("!(%s)", e.Right.String()) 260 | } 261 | 262 | func (e *NotExpr) ReturnType() Type { 263 | return TBOOL 264 | } 265 | 266 | func (e *NotExpr) GetPos() int { 267 | return e.Pos 268 | } 269 | 270 | type FunctionCallExpr struct { 271 | Pos int 272 | Name Expression 273 | Args []Expression 274 | Result any 275 | } 276 | 277 | func (e *FunctionCallExpr) GetPos() int { 278 | return e.Pos 279 | } 280 | 281 | func (e *FunctionCallExpr) String() string { 282 | args := make([]string, len(e.Args)) 283 | for i, expr := range e.Args { 284 | args[i] = expr.String() 285 | } 286 | return fmt.Sprintf("%s(%s)", e.Name.String(), strings.Join(args, ", ")) 287 | } 288 | 289 | func (e *FunctionCallExpr) ReturnType() Type { 290 | fname, err := GetFuncNameFromExpr(e) 291 | if err != nil { 292 | return TUNKNOWN 293 | } 294 | 295 | if funcObj, have := GetScalarFunctionByName(fname); have { 296 | return funcObj.ReturnType 297 | } 298 | if funcObj, have := GetAggrFunctionByName(fname); have { 299 | return funcObj.ReturnType 300 | } 301 | return TUNKNOWN 302 | } 303 | 304 | type NameExpr struct { 305 | Pos int 306 | Data string 307 | } 308 | 309 | func (e *NameExpr) GetPos() int { 310 | return e.Pos 311 | } 312 | 313 | func (e *NameExpr) String() string { 314 | return fmt.Sprintf("%s", e.Data) 315 | } 316 | 317 | func (e *NameExpr) ReturnType() Type { 318 | return TIDENT 319 | } 320 | 321 | type FieldReferenceExpr struct { 322 | Name *NameExpr 323 | FieldExpr Expression 324 | } 325 | 326 | func (e *FieldReferenceExpr) GetPos() int { 327 | return e.Name.Pos 328 | } 329 | 330 | func (e *FieldReferenceExpr) String() string { 331 | return fmt.Sprintf("`%s`", e.Name.Data) 332 | } 333 | 334 | func (e *FieldReferenceExpr) ReturnType() Type { 335 | return e.FieldExpr.ReturnType() 336 | } 337 | 338 | type NumberExpr struct { 339 | Pos int 340 | Data string 341 | Int int64 342 | } 343 | 344 | func (e *NumberExpr) GetPos() int { 345 | return e.Pos 346 | } 347 | 348 | func newNumberExpr(pos int, data string) *NumberExpr { 349 | num, err := strconv.ParseInt(data, 10, 64) 350 | if err != nil { 351 | num = 0 352 | } 353 | return &NumberExpr{ 354 | Pos: pos, 355 | Data: data, 356 | Int: num, 357 | } 358 | } 359 | 360 | func (e *NumberExpr) String() string { 361 | return fmt.Sprintf("%s", e.Data) 362 | } 363 | 364 | func (e *NumberExpr) ReturnType() Type { 365 | return TNUMBER 366 | } 367 | 368 | type FloatExpr struct { 369 | Pos int 370 | Data string 371 | Float float64 372 | } 373 | 374 | func (e *FloatExpr) GetPos() int { 375 | return e.Pos 376 | } 377 | 378 | func newFloatExpr(pos int, data string) *FloatExpr { 379 | num, err := strconv.ParseFloat(data, 64) 380 | if err != nil { 381 | num = 0.0 382 | } 383 | return &FloatExpr{ 384 | Pos: pos, 385 | Data: data, 386 | Float: num, 387 | } 388 | } 389 | 390 | func (e *FloatExpr) String() string { 391 | return fmt.Sprintf("%s", e.Data) 392 | } 393 | 394 | func (e *FloatExpr) ReturnType() Type { 395 | return TNUMBER 396 | } 397 | 398 | type BoolExpr struct { 399 | Pos int 400 | Data string 401 | Bool bool 402 | } 403 | 404 | func (e *BoolExpr) String() string { 405 | return fmt.Sprintf("%s", e.Data) 406 | } 407 | 408 | func (e *BoolExpr) ReturnType() Type { 409 | return TBOOL 410 | } 411 | 412 | func (e *BoolExpr) GetPos() int { 413 | return e.Pos 414 | } 415 | 416 | type ListExpr struct { 417 | Pos int 418 | List []Expression 419 | } 420 | 421 | func (e *ListExpr) GetPos() int { 422 | return e.Pos 423 | } 424 | 425 | func (e *ListExpr) String() string { 426 | ret := make([]string, len(e.List)) 427 | for i, item := range e.List { 428 | ret[i] = item.String() 429 | } 430 | return fmt.Sprintf("(%s)", strings.Join(ret, ", ")) 431 | } 432 | 433 | func (e *ListExpr) ReturnType() Type { 434 | return TLIST 435 | } 436 | 437 | type FieldAccessExpr struct { 438 | Pos int 439 | Left Expression 440 | FieldName Expression 441 | } 442 | 443 | func (e *FieldAccessExpr) GetPos() int { 444 | return e.Pos 445 | } 446 | 447 | func (e *FieldAccessExpr) String() string { 448 | left := e.Left.String() 449 | fname := e.FieldName.String() 450 | return fmt.Sprintf("%s[%s]", left, fname) 451 | } 452 | 453 | func (e *FieldAccessExpr) ReturnType() Type { 454 | return TSTR 455 | } 456 | -------------------------------------------------------------------------------- /expression_exec_test.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "testing" 7 | ) 8 | 9 | func TestExec1(t *testing.T) { 10 | ctx := NewExecuteCtx() 11 | query := "where key = 'test' & value = 'x'" 12 | _, exec, err := BuildExecutor(query) 13 | if err != nil { 14 | t.Fatal(err) 15 | } 16 | kv := NewKVPStr("test", "x") 17 | ok, err := exec.Filter(kv, ctx) 18 | if err != nil || !ok { 19 | t.Fatal(err) 20 | } 21 | fmt.Println(ok) 22 | ctx.Clear() 23 | kv = NewKVPStr("test", "z") 24 | ok, err = exec.Filter(kv, ctx) 25 | if err != nil || ok { 26 | t.Fatal(err) 27 | } 28 | fmt.Println(ok) 29 | } 30 | 31 | func TestExec2(t *testing.T) { 32 | ctx := NewExecuteCtx() 33 | query := "where key ^= 'test' & value ^= 'z'" 34 | kvs := []KVPair{ 35 | NewKVPStr("test1", "z1"), 36 | NewKVPStr("test2", "z2"), 37 | NewKVPStr("test3", "z3"), 38 | NewKVPStr("test4", "x1"), 39 | } 40 | _, exec, err := BuildExecutor(query) 41 | if err != nil { 42 | t.Fatal(err) 43 | } 44 | ret, err := exec.FilterBatch(kvs, ctx) 45 | if err != nil { 46 | t.Fatal(err) 47 | } 48 | fmt.Println(ret) 49 | if fmt.Sprintf("%v", ret) != "[true true true false]" { 50 | t.Fatalf("Return got wrong: %v", ret) 51 | } 52 | } 53 | 54 | func TestExec3(t *testing.T) { 55 | ctx := NewExecuteCtx() 56 | query := "where (key = 'test1' | key = 'test4') & value ^= 'z'" 57 | kvs := []KVPair{ 58 | NewKVPStr("test1", "z1"), 59 | NewKVPStr("test2", "z2"), 60 | NewKVPStr("test3", "z3"), 61 | NewKVPStr("test4", "x1"), 62 | } 63 | _, exec, err := BuildExecutor(query) 64 | if err != nil { 65 | t.Fatal(err) 66 | } 67 | ret, err := exec.FilterBatch(kvs, ctx) 68 | if err != nil { 69 | t.Fatal(err) 70 | } 71 | fmt.Println(ret) 72 | if fmt.Sprintf("%v", ret) != "[true false false false]" { 73 | t.Fatalf("Return got wrong: %v", ret) 74 | } 75 | } 76 | 77 | func TestExec4(t *testing.T) { 78 | ctx := NewExecuteCtx() 79 | query := "where key != 'test1' & value ^= 'z'" 80 | kvs := []KVPair{ 81 | NewKVPStr("test1", "z1"), 82 | NewKVPStr("test2", "z2"), 83 | NewKVPStr("test3", "z3"), 84 | NewKVPStr("test4", "x1"), 85 | } 86 | _, exec, err := BuildExecutor(query) 87 | if err != nil { 88 | t.Fatal(err) 89 | } 90 | ret, err := exec.FilterBatch(kvs, ctx) 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | fmt.Println(ret) 95 | if fmt.Sprintf("%v", ret) != "[false true true false]" { 96 | t.Fatalf("Return got wrong: %v", ret) 97 | } 98 | } 99 | 100 | func TestExec5(t *testing.T) { 101 | ctx := NewExecuteCtx() 102 | query := "where key in ('test1', 'test2')" 103 | kvs := []KVPair{ 104 | NewKVPStr("test1", "z1"), 105 | NewKVPStr("test2", "z2"), 106 | NewKVPStr("test3", "z3"), 107 | NewKVPStr("test4", "x1"), 108 | } 109 | _, exec, err := BuildExecutor(query) 110 | if err != nil { 111 | t.Fatal(err) 112 | } 113 | ret, err := exec.FilterBatch(kvs, ctx) 114 | if err != nil { 115 | t.Fatal(err) 116 | } 117 | fmt.Println(ret) 118 | if fmt.Sprintf("%v", ret) != "[true true false false]" { 119 | t.Fatalf("Return got wrong: %v", ret) 120 | } 121 | } 122 | 123 | type mockStorage struct { 124 | data []KVPair 125 | } 126 | 127 | func newMockStorage(kvs []KVPair) Storage { 128 | return &mockStorage{ 129 | data: kvs, 130 | } 131 | } 132 | 133 | func (t *mockStorage) Get(key []byte) ([]byte, error) { 134 | for _, d := range t.data { 135 | if bytes.Equal(key, d.Key) { 136 | return d.Value, nil 137 | } 138 | } 139 | return nil, nil 140 | } 141 | 142 | func (t *mockStorage) Put(key []byte, value []byte) error { 143 | return nil 144 | } 145 | 146 | func (t *mockStorage) BatchPut(kvs []KVPair) error { 147 | return nil 148 | } 149 | 150 | func (t *mockStorage) Delete(key []byte) error { 151 | return nil 152 | } 153 | 154 | func (t *mockStorage) BatchDelete(key [][]byte) error { 155 | return nil 156 | } 157 | 158 | func (t *mockStorage) Cursor() (Cursor, error) { 159 | return &mockSmokeCursor{ 160 | storage: t, 161 | idx: 0, 162 | }, nil 163 | } 164 | 165 | type mockSmokeCursor struct { 166 | storage *mockStorage 167 | idx int 168 | } 169 | 170 | func (c *mockSmokeCursor) Seek(prefix []byte) error { 171 | return nil 172 | } 173 | 174 | func (c *mockSmokeCursor) Next() ([]byte, []byte, error) { 175 | if c.idx >= len(c.storage.data) { 176 | return nil, nil, nil 177 | } 178 | kvp := c.storage.data[c.idx] 179 | c.idx += 1 180 | return kvp.Key, kvp.Value, nil 181 | } 182 | 183 | func TestExec6(t *testing.T) { 184 | ctx := NewExecuteCtx() 185 | query := "select key, value, int(split(value, '_')[1]) as sv where key ^= 'k' & sv > 10 & sv < 50" 186 | kvs := []KVPair{} 187 | for i := 0; i < 100; i++ { 188 | gkey := fmt.Sprintf("k%d", i+1) 189 | gval := fmt.Sprintf("%s_%d", gkey, i+1) 190 | kvs = append(kvs, NewKVPStr(gkey, gval)) 191 | } 192 | txn := newMockStorage(kvs) 193 | opt := NewOptimizer(query) 194 | plan, err := opt.BuildPlan(txn) 195 | if err != nil { 196 | t.Fatal(err) 197 | } 198 | 199 | rows, err := plan.Batch(ctx) 200 | if err != nil { 201 | t.Fatal(err) 202 | } 203 | fmt.Println(rows) 204 | if len(rows) < PlanBatchSize { 205 | t.Fatal("Should more than PlanBatchSize") 206 | } 207 | if ctx.Hit < 1 { 208 | t.Fatal("Should has hits") 209 | } else { 210 | fmt.Println("Hits:", ctx.Hit) 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /expression_optimizer.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import "fmt" 4 | 5 | type ExpressionOptimizer struct { 6 | Root Expression 7 | parent Expression 8 | } 9 | 10 | func (o *ExpressionOptimizer) Optimize() Expression { 11 | // Optimize twice will fully evaluate constant 12 | newRoot := o.optimize(o.Root) 13 | newRoot = o.optimize(newRoot) 14 | return newRoot 15 | } 16 | 17 | func (o *ExpressionOptimizer) optimize(expr Expression) Expression { 18 | switch e := expr.(type) { 19 | case *BinaryOpExpr: 20 | o.tryReorderBinaryOp(e) 21 | nexpr, _ := o.tryOptimizeBinaryOpExecute(e) 22 | nexpr, _ = o.tryOptimizeAndOr(nexpr) 23 | return nexpr 24 | case *FunctionCallExpr: 25 | nexpr, _ := o.tryOptimizeFunctionCall(e) 26 | return nexpr 27 | } 28 | return expr 29 | } 30 | 31 | func (o *ExpressionOptimizer) tryReorderBinaryOp(e *BinaryOpExpr) { 32 | var ( 33 | leftIsValue = false 34 | rightIsValue = false 35 | leftIsOp = false 36 | rightIsOp = false 37 | leftOpExpr *BinaryOpExpr 38 | ) 39 | 40 | switch left := e.Left.(type) { 41 | case *BinaryOpExpr: 42 | o.tryReorderBinaryOp(left) 43 | leftOpExpr = left 44 | leftIsOp = true 45 | case *StringExpr, *NumberExpr, *FloatExpr: 46 | leftIsValue = true 47 | } 48 | 49 | switch right := e.Right.(type) { 50 | case *BinaryOpExpr: 51 | o.tryReorderBinaryOp(right) 52 | rightIsOp = true 53 | case *StringExpr, *NumberExpr, *FloatExpr: 54 | rightIsValue = true 55 | } 56 | 57 | if e.Op != Add && e.Op != Mul { 58 | return 59 | } 60 | 61 | if !leftIsValue && leftIsOp && rightIsValue && !rightIsOp { 62 | // fmt.Println("DEBUG:", e) 63 | if leftOpExpr.Op == e.Op { 64 | switch rexpr := leftOpExpr.Right.(type) { 65 | case *StringExpr, *NumberExpr, *FloatExpr: 66 | // (ANY op VALUE) op VALUE 67 | e.Left = leftOpExpr.Left 68 | e.Right = &BinaryOpExpr{Pos: e.GetPos(), Op: e.Op, Left: leftOpExpr.Right, Right: e.Right} 69 | case *BinaryOpExpr: 70 | if isBinaryOpExprAllValue(rexpr, e.Op) { 71 | e.Left = leftOpExpr.Left 72 | e.Right = &BinaryOpExpr{Pos: e.GetPos(), Op: e.Op, Left: leftOpExpr.Right, Right: e.Right} 73 | } 74 | } 75 | } 76 | // fmt.Println("DEBUG:", e) 77 | } 78 | return 79 | } 80 | 81 | func isBinaryOpExprAllValue(expr *BinaryOpExpr, op Operator) bool { 82 | if expr.Op != op { 83 | return false 84 | } 85 | lIsValue := false 86 | rIsValue := false 87 | switch le := expr.Left.(type) { 88 | case *StringExpr, *NumberExpr, *FloatExpr: 89 | lIsValue = true 90 | case *BinaryOpExpr: 91 | lIsValue = isBinaryOpExprAllValue(le, op) 92 | } 93 | 94 | switch re := expr.Right.(type) { 95 | case *StringExpr, *NumberExpr, *FloatExpr: 96 | rIsValue = true 97 | case *BinaryOpExpr: 98 | rIsValue = isBinaryOpExprAllValue(re, op) 99 | } 100 | return lIsValue && rIsValue 101 | } 102 | 103 | func (o *ExpressionOptimizer) tryOptimizeBinaryOpExecute(e *BinaryOpExpr) (Expression, bool) { 104 | leftIsValue := false 105 | rightIsValue := false 106 | switch left := e.Left.(type) { 107 | case *BinaryOpExpr: 108 | e.Left, leftIsValue = o.tryOptimizeBinaryOpExecute(left) 109 | case *FunctionCallExpr: 110 | e.Left, leftIsValue = o.tryOptimizeFunctionCall(left) 111 | case *StringExpr, *NumberExpr, *FloatExpr, *BoolExpr: 112 | leftIsValue = true 113 | } 114 | 115 | switch right := e.Right.(type) { 116 | case *BinaryOpExpr: 117 | e.Right, rightIsValue = o.tryOptimizeBinaryOpExecute(right) 118 | case *FunctionCallExpr: 119 | e.Right, rightIsValue = o.tryOptimizeFunctionCall(right) 120 | case *StringExpr, *NumberExpr, *FloatExpr, *BoolExpr: 121 | rightIsValue = true 122 | } 123 | // Not value 124 | if !(leftIsValue && rightIsValue) { 125 | return e, false 126 | } 127 | leftPos := e.Left.GetPos() 128 | switch e.Op { 129 | case Add, Sub, Mul, Div: 130 | ret, err := e.Execute(NewKVP(nil, nil), nil) 131 | if err == nil { 132 | switch e.Left.(type) { 133 | case *StringExpr: 134 | return &StringExpr{Pos: leftPos, Data: ret.(string)}, true 135 | case *NumberExpr: 136 | switch cret := ret.(type) { 137 | case int64: 138 | return &NumberExpr{Pos: leftPos, Data: fmt.Sprintf("%v", cret), Int: cret}, true 139 | case float64: 140 | return &NumberExpr{Pos: leftPos, Data: fmt.Sprintf("%v", int64(cret)), Int: int64(cret)}, true 141 | } 142 | case *FloatExpr: 143 | switch cret := ret.(type) { 144 | case int64: 145 | return &FloatExpr{Pos: leftPos, Data: fmt.Sprintf("%v", float64(cret)), Float: float64(cret)}, true 146 | case float64: 147 | return &FloatExpr{Pos: leftPos, Data: fmt.Sprintf("%v", cret), Float: cret}, true 148 | } 149 | } 150 | } 151 | case And, Or: 152 | ret, err := e.Execute(NewKVP(nil, nil), nil) 153 | if err == nil { 154 | return &BoolExpr{Pos: leftPos, Data: fmt.Sprintf("%v", ret), Bool: ret.(bool)}, true 155 | } 156 | case Eq, NotEq, Gt, Gte, Lt, Lte: 157 | ret, err := e.Execute(NewKVP(nil, nil), nil) 158 | if err == nil { 159 | return &BoolExpr{Pos: leftPos, Data: fmt.Sprintf("%v", ret), Bool: ret.(bool)}, true 160 | } 161 | } 162 | return e, false 163 | } 164 | 165 | func (o *ExpressionOptimizer) tryOptimizeAndOr(expr Expression) (Expression, bool) { 166 | var ( 167 | leftVal bool 168 | rightVal bool 169 | leftIsValue = false 170 | rightIsValue = false 171 | ) 172 | 173 | e, ok := expr.(*BinaryOpExpr) 174 | if !ok { 175 | return expr, false 176 | } 177 | if e.Op != And && e.Op != Or { 178 | return e, false 179 | } 180 | switch left := e.Left.(type) { 181 | case *BoolExpr: 182 | leftIsValue = true 183 | leftVal = left.Bool 184 | } 185 | 186 | switch right := e.Right.(type) { 187 | case *BoolExpr: 188 | rightIsValue = true 189 | rightVal = right.Bool 190 | } 191 | 192 | if leftIsValue && !rightIsValue { 193 | switch e.Op { 194 | case And: 195 | if leftVal { 196 | // true & Expr => Expr 197 | return e.Right, true 198 | } else { 199 | // false & Expr => false 200 | return &BoolExpr{Pos: e.Left.GetPos(), Data: "false", Bool: false}, true 201 | } 202 | case Or: 203 | if leftVal { 204 | // true | Expr => true 205 | return &BoolExpr{Pos: e.Left.GetPos(), Data: "true", Bool: true}, true 206 | } else { 207 | // false | Expr => Expr 208 | return e.Right, true 209 | } 210 | } 211 | } 212 | 213 | if rightIsValue && !leftIsValue { 214 | switch e.Op { 215 | case And: 216 | if rightVal { 217 | // Expr & true => Expr 218 | return e.Left, true 219 | } else { 220 | // Expr & false => false 221 | return &BoolExpr{Pos: e.Right.GetPos(), Data: "false", Bool: false}, true 222 | } 223 | case Or: 224 | if rightVal { 225 | // Expr | true => true 226 | return &BoolExpr{Pos: e.Right.GetPos(), Data: "true", Bool: true}, true 227 | } else { 228 | // Expr | false => Expr 229 | return e.Left, true 230 | } 231 | } 232 | } 233 | 234 | if rightIsValue && leftIsValue { 235 | switch e.Op { 236 | case And: 237 | if leftVal && rightVal { 238 | return &BoolExpr{Pos: e.Left.GetPos(), Data: "true", Bool: true}, true 239 | } 240 | return &BoolExpr{Pos: e.Left.GetPos(), Data: "false", Bool: false}, true 241 | case Or: 242 | if leftVal || rightVal { 243 | return &BoolExpr{Pos: e.Left.GetPos(), Data: "true", Bool: true}, true 244 | } 245 | return &BoolExpr{Pos: e.Left.GetPos(), Data: "false", Bool: false}, true 246 | } 247 | } 248 | 249 | return e, false 250 | } 251 | 252 | func (o *ExpressionOptimizer) tryOptimizeFunctionCall(e *FunctionCallExpr) (Expression, bool) { 253 | allIsValue := true 254 | for i, arg := range e.Args { 255 | nexpr := o.optimize(arg) 256 | e.Args[i] = nexpr 257 | switch nexpr.(type) { 258 | case *StringExpr, *NumberExpr, *FloatExpr, *BoolExpr: 259 | // Value 260 | default: 261 | allIsValue = false 262 | } 263 | } 264 | 265 | if !(allIsValue && IsScalarFuncExpr(e)) { 266 | return e, false 267 | } 268 | 269 | retTp := e.ReturnType() 270 | switch retTp { 271 | case TJSON: 272 | return e, false 273 | } 274 | ret, err := e.Execute(NewKVP(nil, nil), nil) 275 | if err == nil { 276 | switch retTp { 277 | case TSTR: 278 | return &StringExpr{Pos: e.GetPos(), Data: ret.(string)}, true 279 | case TNUMBER: 280 | iret, ok := ret.(int64) 281 | if ok { 282 | return &NumberExpr{Pos: e.GetPos(), Data: fmt.Sprintf("%v", ret), Int: iret}, true 283 | } 284 | fret, ok := ret.(float64) 285 | if ok { 286 | return &FloatExpr{Pos: e.GetPos(), Data: fmt.Sprintf("%v", ret), Float: fret}, true 287 | } 288 | case TBOOL: 289 | if ret.(bool) { 290 | return &BoolExpr{Pos: e.GetPos(), Data: "true", Bool: true}, true 291 | } 292 | return &BoolExpr{Pos: e.GetPos(), Data: "false", Bool: false}, true 293 | } 294 | } 295 | return e, false 296 | } 297 | -------------------------------------------------------------------------------- /expression_test.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "sort" 7 | "testing" 8 | ) 9 | 10 | var benchmarkChunkSize = 32 11 | 12 | type mockQueryStorage struct { 13 | data []KVPair 14 | } 15 | 16 | func newMockQueryStorage(data []KVPair) *mockQueryStorage { 17 | sort.Slice(data, func(i, j int) bool { 18 | return bytes.Compare(data[i].Key, data[j].Key) < 0 19 | }) 20 | return &mockQueryStorage{ 21 | data: data, 22 | } 23 | } 24 | 25 | func (t *mockQueryStorage) Get(key []byte) ([]byte, error) { 26 | for _, kvp := range t.data { 27 | if bytes.Equal(kvp.Key, key) { 28 | return kvp.Value, nil 29 | } 30 | } 31 | return nil, nil 32 | } 33 | 34 | func (t *mockQueryStorage) Put(key []byte, value []byte) error { 35 | return nil 36 | } 37 | 38 | func (t *mockQueryStorage) BatchPut(kvs []KVPair) error { 39 | return nil 40 | } 41 | 42 | func (t *mockQueryStorage) Delete(key []byte) error { 43 | return nil 44 | } 45 | 46 | func (t *mockQueryStorage) BatchDelete(key [][]byte) error { 47 | return nil 48 | } 49 | 50 | func (t *mockQueryStorage) Cursor() (Cursor, error) { 51 | return &mockCursor{ 52 | data: t.data, 53 | idx: 0, 54 | length: len(t.data), 55 | }, nil 56 | } 57 | 58 | type mockCursor struct { 59 | data []KVPair 60 | idx int 61 | length int 62 | } 63 | 64 | func (c *mockCursor) Seek(key []byte) error { 65 | for c.idx < c.length { 66 | row := c.data[c.idx] 67 | if bytes.Compare(row.Key, key) >= 0 { 68 | break 69 | } 70 | c.idx++ 71 | } 72 | return nil 73 | } 74 | 75 | func (c *mockCursor) Next() (key []byte, val []byte, err error) { 76 | if c.idx >= c.length { 77 | return nil, nil, nil 78 | } 79 | ret := c.data[c.idx] 80 | c.idx++ 81 | return ret.Key, ret.Value, nil 82 | } 83 | 84 | func generateChunk(size int) []KVPair { 85 | ret := make([]KVPair, size) 86 | for i := 0; i < size; i++ { 87 | key := fmt.Sprintf("key-%d", i) 88 | val := fmt.Sprintf("%d", i) 89 | ret[i] = NewKVPStr(key, val) 90 | } 91 | return ret 92 | } 93 | 94 | func BenchmarkExpressionEvalVec(b *testing.B) { 95 | chunk := generateChunk(benchmarkChunkSize) 96 | query := "where key ^= 'key-1' & int(value) + int(value) * 8 > 10" 97 | _, exec, err := BuildExecutor(query) 98 | if err != nil { 99 | b.Fatal(err) 100 | } 101 | ctx := NewExecuteCtx() 102 | b.ResetTimer() 103 | for n := 0; n < b.N; n++ { 104 | _, err = exec.filterChunk(chunk, ctx) 105 | if err != nil { 106 | b.Fatal(err) 107 | } 108 | ctx.Clear() 109 | } 110 | } 111 | 112 | func BenchmarkExpressionEval(b *testing.B) { 113 | chunk := generateChunk(benchmarkChunkSize) 114 | query := "where key ^= 'key-1' & int(value) + int(value) * 8 > 10" 115 | _, exec, err := BuildExecutor(query) 116 | if err != nil { 117 | b.Fatal(err) 118 | } 119 | ctx := NewExecuteCtx() 120 | b.ResetTimer() 121 | for n := 0; n < b.N; n++ { 122 | for i := 0; i < len(chunk); i++ { 123 | _, err = exec.Filter(chunk[i], ctx) 124 | if err != nil { 125 | b.Fatal(err) 126 | } 127 | ctx.Clear() 128 | } 129 | } 130 | } 131 | 132 | func BenchmarkExpressionEvalHalfVec(b *testing.B) { 133 | chunk := generateChunk(benchmarkChunkSize) 134 | query := "where key ^= 'key-1' & int(value) + int(value) * 8 > 10" 135 | _, exec, err := BuildExecutor(query) 136 | if err != nil { 137 | b.Fatal(err) 138 | } 139 | ctx := NewExecuteCtx() 140 | b.ResetTimer() 141 | for n := 0; n < b.N; n++ { 142 | _, err = exec.filterBatch(chunk, ctx) 143 | if err != nil { 144 | b.Fatal(err) 145 | } 146 | ctx.Clear() 147 | } 148 | } 149 | 150 | func BenchmarkQuery(b *testing.B) { 151 | query := "select sum(int(value)) * 2, key + '_' + 'end' as kk, int(value) as ival where key between 'k' and 'l' group by kk, ival order by ival desc" 152 | data := generateChunk(1000) 153 | qtxn := newMockQueryStorage(data) 154 | 155 | b.ResetTimer() 156 | for n := 0; n < b.N; n++ { 157 | opt := NewOptimizer(query) 158 | plan, err := opt.BuildPlan(qtxn) 159 | if err != nil { 160 | b.Fatal(err) 161 | } 162 | err = getRows(plan) 163 | if err != nil { 164 | b.Fatal(err) 165 | } 166 | } 167 | } 168 | 169 | func BenchmarkQueryBatch(b *testing.B) { 170 | query := "select sum(int(value)) * 2, key + '_' + 'end' as kk, int(value) as ival where key between 'k' and 'l' group by kk, ival order by ival desc" 171 | data := generateChunk(1000) 172 | qtxn := newMockQueryStorage(data) 173 | 174 | b.ResetTimer() 175 | for n := 0; n < b.N; n++ { 176 | opt := NewOptimizer(query) 177 | plan, err := opt.BuildPlan(qtxn) 178 | if err != nil { 179 | b.Fatal(err) 180 | } 181 | err = getRowsBatch(plan) 182 | if err != nil { 183 | b.Fatal(err) 184 | } 185 | } 186 | } 187 | 188 | func BenchmarkQuerySimple(b *testing.B) { 189 | query := "select int(value) * 2, key + '_' + 'end' as kk, int(value) as ival where key between 'k' and 'l' limit 100" 190 | data := generateChunk(1000) 191 | qtxn := newMockQueryStorage(data) 192 | 193 | b.ResetTimer() 194 | for n := 0; n < b.N; n++ { 195 | opt := NewOptimizer(query) 196 | plan, err := opt.BuildPlan(qtxn) 197 | if err != nil { 198 | b.Fatal(err) 199 | } 200 | err = getRows(plan) 201 | if err != nil { 202 | b.Fatal(err) 203 | } 204 | } 205 | } 206 | 207 | func BenchmarkQuerySimpleBatch(b *testing.B) { 208 | query := "select int(value) * 2, key + '_' + 'end' as kk, int(value) as ival where key between 'k' and 'l' limit 100" 209 | data := generateChunk(1000) 210 | qtxn := newMockQueryStorage(data) 211 | 212 | b.ResetTimer() 213 | for n := 0; n < b.N; n++ { 214 | opt := NewOptimizer(query) 215 | plan, err := opt.BuildPlan(qtxn) 216 | if err != nil { 217 | b.Fatal(err) 218 | } 219 | err = getRowsBatch(plan) 220 | if err != nil { 221 | b.Fatal(err) 222 | } 223 | } 224 | } 225 | 226 | func getRows(plan FinalPlan) error { 227 | ctx := NewExecuteCtx() 228 | for { 229 | cols, err := plan.Next(ctx) 230 | if err != nil { 231 | return err 232 | } 233 | if cols == nil { 234 | break 235 | } 236 | ctx.Clear() 237 | } 238 | return nil 239 | } 240 | 241 | func getRowsBatch(plan FinalPlan) error { 242 | ctx := NewExecuteCtx() 243 | for { 244 | rows, err := plan.Batch(ctx) 245 | if err != nil { 246 | return err 247 | } 248 | if len(rows) == 0 { 249 | break 250 | } 251 | ctx.Clear() 252 | } 253 | return nil 254 | } 255 | -------------------------------------------------------------------------------- /filter_optimizer_test.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "testing" 7 | ) 8 | 9 | func optimizeQuery(query string) (*ScanType, error) { 10 | p := NewParser(query) 11 | gstmt, err := p.Parse() 12 | if err != nil { 13 | return nil, err 14 | } 15 | stmt := gstmt.(*SelectStmt) 16 | o := NewFilterOptimizer(stmt.Where, nil, nil) 17 | ret := o.optimizeExpr(stmt.Where.Expr) 18 | return ret, nil 19 | } 20 | 21 | func assertScanType(t *testing.T, st *ScanType, tp byte, keys []string) { 22 | assertScanTypeWithID(0, t, st, tp, keys) 23 | } 24 | 25 | func assertScanTypeWithID(id int, t *testing.T, st *ScanType, tp byte, keys []string) { 26 | if tp != st.scanTp { 27 | t.Errorf("[%d] Scan Type expect %v but got %v", id, ScanTypeToString(tp), ScanTypeToString(st.scanTp)) 28 | return 29 | } 30 | kstrs := make([]string, len(st.keys)) 31 | for i, k := range st.keys { 32 | kstrs[i] = string(k) 33 | } 34 | 35 | if len(keys) != len(kstrs) { 36 | t.Errorf("[%d] Scan Type keys expect %v but got %v", id, keys, kstrs) 37 | return 38 | } 39 | 40 | if st.scanTp == MGET { 41 | sort.Strings(kstrs) 42 | } 43 | for i, k := range keys { 44 | if k != kstrs[i] { 45 | t.Errorf("[%d] Scan Type keys expect %v but got %v", id, keys, kstrs) 46 | } 47 | } 48 | } 49 | 50 | type optTData struct { 51 | query string 52 | scanTp byte 53 | keys []string 54 | } 55 | 56 | func TestOptimizers(t *testing.T) { 57 | tdata := []optTData{ 58 | // PREFIX & MGET 59 | optTData{ 60 | "select * where key ^= 'k' & (key = 'k1' | key = 'm1')", 61 | MGET, []string{"k1"}, 62 | }, 63 | optTData{ 64 | "select * where key ^= 'k' & (key = 'm1' | key = 'm2')", 65 | EMPTY, nil, 66 | }, 67 | // PREFIX | MGET 68 | optTData{ 69 | "select * where key ^= 'k' | (key = 'k1' | key = 'k2')", 70 | PREFIX, []string{"k"}, 71 | }, 72 | optTData{ 73 | "select * where key ^= 'k' | (key in ('k1', 'k2'))", 74 | PREFIX, []string{"k"}, 75 | }, 76 | 77 | optTData{ 78 | "select * where key ^= 'k' | (key = 'k1' | key = 'm2')", 79 | FULL, nil, 80 | }, 81 | // PREFIX & RANGE 82 | optTData{ 83 | "select * where key ^= 'k' & (key > 'k1' & key < 'k8')", 84 | RANGE, []string{"k1", "k8"}, 85 | }, 86 | optTData{ 87 | "select * where key ^= 'k' & (key > 'j1' & key < 'l8')", 88 | PREFIX, []string{"k"}, 89 | }, 90 | optTData{ 91 | "select * where key ^= 'k' & (key > 'k1' & key < 'l8')", 92 | RANGE, []string{"k1", "l8"}, 93 | }, 94 | optTData{ 95 | "select * where key ^= 'l' & (key > 'k1' & key < 'l8')", 96 | RANGE, []string{"l", "l8"}, 97 | }, 98 | optTData{ 99 | "select * where key ^= 'j' & (key > 'k1' & key < 'l8')", 100 | EMPTY, nil, 101 | }, 102 | optTData{ 103 | "select * where key ^= 'm' & (key > 'k1' & key < 'l8')", 104 | EMPTY, nil, 105 | }, 106 | optTData{ 107 | "select * where key ^= 'm' & (key > 'k1' & key < 'm')", 108 | MGET, []string{"m"}, 109 | }, 110 | // PREFIX | RANGE 111 | optTData{ 112 | "select * where key ^= 'k' | (key > 'k1' & key < 'k8')", 113 | PREFIX, []string{"k"}, 114 | }, 115 | optTData{ 116 | "select * where key ^= 'k' | (key > 'j1' & key < 'l8')", 117 | RANGE, []string{"j1", "l8"}, 118 | }, 119 | optTData{ 120 | "select * where key ^= 'k' | (key > 'k1' & key < 'l8')", 121 | RANGE, []string{"k", "l8"}, 122 | }, 123 | optTData{ 124 | "select * where key ^= 'l' | (key > 'k1' & key < 'l8')", 125 | RANGE, []string{"k1", ""}, 126 | }, 127 | optTData{ 128 | "select * where key ^= 'j' | (key > 'k1' & key < 'l8')", 129 | RANGE, []string{"j", "l8"}, 130 | }, 131 | optTData{ 132 | "select * where key ^= 'm' | (key > 'k1' & key < 'l8')", 133 | RANGE, []string{"k1", ""}, 134 | }, 135 | optTData{ 136 | "select * where key ^= 'm' | (key > 'k1' & key < 'm')", 137 | RANGE, []string{"k1", ""}, 138 | }, 139 | // RANGE & RANGE 140 | optTData{ 141 | "select * where (key > 'k01' & key < 'k10') & (key > 'k05' & key < 'k12')", 142 | RANGE, []string{"k05", "k10"}, 143 | }, 144 | // RANGE | RANGE 145 | optTData{ 146 | "select * where (key > 'k1' & key < 'k5') | (key > 'k4' & key < 'k9')", 147 | RANGE, []string{"k1", "k9"}, 148 | }, 149 | optTData{ 150 | "select * where (key > 'k1' & key < 'k5') | (key > 'k8' & key < 'k9')", 151 | RANGE, []string{"k1", "k9"}, 152 | }, 153 | // RANGE & MGET 154 | optTData{ 155 | "select * where (key > 'i' & key <= 'k') & (key = 'j1' | key = 'k' | key = 'k1')", 156 | MGET, []string{"j1", "k"}, 157 | }, 158 | optTData{ 159 | "select * where (key > 'i' & key <= 'k') & (key = 'l1' | key = 'l2' | key = 'l3')", 160 | EMPTY, nil, 161 | }, 162 | // RANGE | MGET 163 | optTData{ 164 | "select * where (key > 'k1' & key < 'k9') | (key = 'k5')", 165 | RANGE, []string{"k1", "k9"}, 166 | }, 167 | optTData{ 168 | "select * where (key > 'k1' & key < 'k9') | (key = 'l1')", 169 | RANGE, []string{"k1", "l1"}, 170 | }, 171 | optTData{ 172 | "select * where (key > 'k1' & key < 'k9') | (key = 'j1')", 173 | RANGE, []string{"j1", "k9"}, 174 | }, 175 | optTData{ 176 | "select * where (key > 'k1' & key < 'k9') | (key = 'j1' | key = 'm1')", 177 | FULL, nil, 178 | }, 179 | // NOT RANGE 180 | optTData{ 181 | "select * where !(key > 'k1' & key < 'k9')", 182 | FULL, nil, 183 | }, 184 | // Just MGET 185 | optTData{ 186 | "select * where key in ('k1', 'k2')", 187 | MGET, []string{"k1", "k2"}, 188 | }, 189 | // Just RANGE use between and 190 | optTData{ 191 | "select * where key between 'k1' and 'k2'", 192 | RANGE, []string{"k1", "k2"}, 193 | }, 194 | // and operator 195 | optTData{ 196 | "select * where key = 'k1' and key = 'k2'", 197 | EMPTY, nil, 198 | }, 199 | // or operator 200 | optTData{ 201 | "select * where key = 'k1' or key between 'k2' and 'k3'", 202 | RANGE, []string{"k1", "k3"}, 203 | }, 204 | optTData{ 205 | "select * where key = 'k1' or key ='k2'", 206 | MGET, []string{"k1", "k2"}, 207 | }, 208 | // Mix and, or keywords 209 | optTData{ 210 | "select * where (key > 'k1' and key < 'k9') or (key = 'j1' or key = 'm1')", 211 | FULL, nil, 212 | }, 213 | } 214 | 215 | for i, item := range tdata { 216 | st, err := optimizeQuery(item.query) 217 | if err != nil { 218 | t.Errorf("[%d] %v", i, err) 219 | } 220 | fmt.Printf("[%d] %s\n `- %s\n\n", i, item.query, st.String()) 221 | assertScanTypeWithID(i, t, st, item.scanTp, item.keys) 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /func.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | var ( 10 | funcMap = map[string]*Function{ 11 | "lower": &Function{"lower", 1, false, TSTR, funcToLower, funcToLowerVec}, 12 | "upper": &Function{"upper", 1, false, TSTR, funcToUpper, funcToUpperVec}, 13 | "int": &Function{"int", 1, false, TNUMBER, funcToInt, funcToIntVec}, 14 | "float": &Function{"float", 1, false, TNUMBER, funcToFloat, funcToFloatVec}, 15 | "str": &Function{"str", 1, false, TSTR, funcToString, funcToStringVec}, 16 | "is_int": &Function{"is_int", 1, false, TBOOL, funcIsInt, funcIsIntVec}, 17 | "is_float": &Function{"is_float", 1, false, TBOOL, funcIsFloat, funcIsFloatVec}, 18 | "substr": &Function{"substr", 3, false, TSTR, funcSubStr, funcSubStrVec}, 19 | "json": &Function{"json", 1, false, TJSON, funcJson, funcJsonVec}, 20 | "split": &Function{"split", 2, false, TLIST, funcSplit, funcSplitVec}, 21 | "list": &Function{"list", 1, true, TLIST, funcToList, funcToListVec}, 22 | "float_list": &Function{"float_list", 1, true, TLIST, funcFloatList, funcFloatListVec}, 23 | "int_list": &Function{"int_list", 1, true, TLIST, funcIntList, funcIntListVec}, 24 | "flist": &Function{"flist", 1, true, TLIST, funcFloatList, funcFloatListVec}, 25 | "ilist": &Function{"ilist", 1, true, TLIST, funcIntList, funcIntListVec}, 26 | "len": &Function{"len", 1, false, TNUMBER, funcLen, funcLenVec}, 27 | "join": &Function{"join", 2, true, TSTR, funcJoin, funcJoinVec}, 28 | "strlen": &Function{"strlen", 1, false, TNUMBER, funcStrlen, funcStrlenVec}, 29 | 30 | "cosine_distance": &Function{"cosine_distance", 2, false, TNUMBER, funcCosineDistance, funcCosineDistanceVec}, 31 | "l2_distance": &Function{"l2_distance", 2, false, TNUMBER, funcL2Distance, funcL2DistanceVec}, 32 | } 33 | 34 | aggrFuncMap = map[string]*AggrFunc{ 35 | "count": &AggrFunc{"count", 1, false, TNUMBER, newAggrCountFunc}, 36 | "sum": &AggrFunc{"sum", 1, false, TNUMBER, newAggrSumFunc}, 37 | "avg": &AggrFunc{"avg", 1, false, TNUMBER, newAggrAvgFunc}, 38 | "min": &AggrFunc{"min", 1, false, TNUMBER, newAggrMinFunc}, 39 | "max": &AggrFunc{"max", 1, false, TNUMBER, newAggrMaxFunc}, 40 | "quantile": &AggrFunc{"quantile", 2, false, TNUMBER, newAggrQuantileFunc}, 41 | "json_arrayagg": &AggrFunc{"json_arrayagg", 1, false, TSTR, newAggrJsonArrayAggFunc}, 42 | "group_concat": &AggrFunc{"group_concat", 2, false, TSTR, newAggrGroupConcatFunc}, 43 | } 44 | ) 45 | 46 | type FunctionBody func(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) 47 | type VectorFunctionBody func(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) 48 | 49 | type Function struct { 50 | Name string 51 | NumArgs int 52 | VarArgs bool 53 | ReturnType Type 54 | Body FunctionBody 55 | BodyVec VectorFunctionBody 56 | } 57 | 58 | type AggrFunc struct { 59 | Name string 60 | NumArgs int 61 | VarArgs bool 62 | ReturnType Type 63 | Body AggrFunctor 64 | } 65 | 66 | type AggrFunctor func(args []Expression) (AggrFunction, error) 67 | 68 | type AggrFunction interface { 69 | Update(kv KVPair, args []Expression, ctx *ExecuteCtx) error 70 | Complete() (any, error) 71 | Clone() AggrFunction 72 | } 73 | 74 | func GetFuncNameFromExpr(expr Expression) (string, error) { 75 | fc, ok := expr.(*FunctionCallExpr) 76 | if !ok { 77 | return "", NewSyntaxError(expr.GetPos(), "Not function call expression") 78 | } 79 | rfname, err := fc.Name.Execute(NewKVP(nil, nil), nil) 80 | if err != nil { 81 | return "", err 82 | } 83 | fname, ok := rfname.(string) 84 | if !ok { 85 | return "", NewSyntaxError(expr.GetPos(), "Invalid function name") 86 | } 87 | return strings.ToLower(fname), nil 88 | } 89 | 90 | func GetScalarFunction(expr Expression) (*Function, error) { 91 | fname, err := GetFuncNameFromExpr(expr) 92 | if err != nil { 93 | return nil, err 94 | } 95 | fobj, have := funcMap[fname] 96 | if !have { 97 | return nil, NewSyntaxError(expr.GetPos(), "Cannot find function %s", fname) 98 | } 99 | return fobj, nil 100 | } 101 | 102 | func GetScalarFunctionByName(name string) (*Function, bool) { 103 | fobj, have := funcMap[name] 104 | return fobj, have 105 | } 106 | 107 | func GetAggrFunctionByName(name string) (*AggrFunc, bool) { 108 | fobj, have := aggrFuncMap[name] 109 | return fobj, have 110 | } 111 | 112 | func AddScalarFunction(f *Function) { 113 | fname := strings.ToLower(f.Name) 114 | funcMap[fname] = f 115 | } 116 | 117 | func AddAggrFunction(f *AggrFunc) { 118 | fname := strings.ToLower(f.Name) 119 | aggrFuncMap[fname] = f 120 | } 121 | 122 | func IsScalarFuncExpr(expr Expression) bool { 123 | fname, err := GetFuncNameFromExpr(expr) 124 | if err != nil { 125 | return false 126 | } 127 | if _, have := funcMap[fname]; have { 128 | return true 129 | } 130 | return false 131 | } 132 | 133 | func IsAggrFuncExpr(expr Expression) bool { 134 | fname, err := GetFuncNameFromExpr(expr) 135 | if err != nil { 136 | return false 137 | } 138 | if _, have := aggrFuncMap[fname]; have { 139 | return true 140 | } 141 | return false 142 | } 143 | 144 | func IsAggrFunc(fname string) bool { 145 | _, have := aggrFuncMap[fname] 146 | return have 147 | } 148 | 149 | func toString(value any) string { 150 | switch val := value.(type) { 151 | case string: 152 | return val 153 | case []byte: 154 | return string(val) 155 | case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: 156 | return fmt.Sprintf("%d", val) 157 | case float32, float64: 158 | return fmt.Sprintf("%f", val) 159 | case bool: 160 | if val { 161 | return "true" 162 | } 163 | return "false" 164 | default: 165 | if val == nil { 166 | return "" 167 | } 168 | return "" 169 | } 170 | } 171 | 172 | func toInt(value any, defVal int64) int64 { 173 | switch val := value.(type) { 174 | case string: 175 | if ret, err := strconv.ParseInt(val, 10, 64); err == nil { 176 | return ret 177 | } else { 178 | if ret, err := strconv.ParseFloat(val, 64); err == nil { 179 | return int64(ret) 180 | } 181 | return defVal 182 | } 183 | case []byte: 184 | if ret, err := strconv.ParseInt(string(val), 10, 64); err == nil { 185 | return ret 186 | } else { 187 | if ret, err := strconv.ParseFloat(string(val), 64); err == nil { 188 | return int64(ret) 189 | } 190 | return defVal 191 | } 192 | case int8, int16, uint8, uint16: 193 | if ret, err := strconv.ParseInt(fmt.Sprintf("%d", val), 10, 64); err == nil { 194 | return ret 195 | } else { 196 | return defVal 197 | } 198 | case int: 199 | return int64(val) 200 | case uint: 201 | return int64(val) 202 | case int32: 203 | return int64(val) 204 | case uint32: 205 | return int64(val) 206 | case int64: 207 | return val 208 | case uint64: 209 | return int64(val) 210 | case float32: 211 | return int64(val) 212 | case float64: 213 | return int64(val) 214 | default: 215 | return defVal 216 | } 217 | } 218 | 219 | func toFloat(value any, defVal float64) float64 { 220 | switch val := value.(type) { 221 | case string: 222 | if ret, err := strconv.ParseFloat(val, 64); err == nil { 223 | return ret 224 | } else { 225 | return defVal 226 | } 227 | case []byte: 228 | if ret, err := strconv.ParseFloat(string(val), 64); err == nil { 229 | return ret 230 | } else { 231 | return defVal 232 | } 233 | case int8, int16, uint8, uint16: 234 | if ret, err := strconv.ParseFloat(fmt.Sprintf("%d", val), 64); err == nil { 235 | return ret 236 | } else { 237 | return defVal 238 | } 239 | case int: 240 | return float64(val) 241 | case uint: 242 | return float64(val) 243 | case int32: 244 | return float64(val) 245 | case uint32: 246 | return float64(val) 247 | case int64: 248 | return float64(val) 249 | case uint64: 250 | return float64(val) 251 | case float32: 252 | return float64(val) 253 | case float64: 254 | return val 255 | default: 256 | return defVal 257 | } 258 | } 259 | 260 | func toFloatList(value any) ([]float64, error) { 261 | switch val := value.(type) { 262 | case []string: 263 | ret := make([]float64, len(val)) 264 | for i := 0; i < len(val); i++ { 265 | fval, err := strconv.ParseFloat(val[i], 64) 266 | if err != nil { 267 | return nil, err 268 | } 269 | ret[i] = fval 270 | } 271 | return ret, nil 272 | case [][]byte: 273 | ret := make([]float64, len(val)) 274 | for i := 0; i < len(val); i++ { 275 | fval, err := strconv.ParseFloat(string(val[i]), 64) 276 | if err != nil { 277 | return nil, err 278 | } 279 | ret[i] = fval 280 | } 281 | return ret, nil 282 | case []int: 283 | ret := make([]float64, len(val)) 284 | for i := 0; i < len(val); i++ { 285 | ret[i] = float64(val[i]) 286 | } 287 | return ret, nil 288 | case []uint: 289 | ret := make([]float64, len(val)) 290 | for i := 0; i < len(val); i++ { 291 | ret[i] = float64(val[i]) 292 | } 293 | return ret, nil 294 | case []int32: 295 | ret := make([]float64, len(val)) 296 | for i := 0; i < len(val); i++ { 297 | ret[i] = float64(val[i]) 298 | } 299 | return ret, nil 300 | case []uint32: 301 | ret := make([]float64, len(val)) 302 | for i := 0; i < len(val); i++ { 303 | ret[i] = float64(val[i]) 304 | } 305 | return ret, nil 306 | case []int64: 307 | ret := make([]float64, len(val)) 308 | for i := 0; i < len(val); i++ { 309 | ret[i] = float64(val[i]) 310 | } 311 | return ret, nil 312 | case []uint64: 313 | ret := make([]float64, len(val)) 314 | for i := 0; i < len(val); i++ { 315 | ret[i] = float64(val[i]) 316 | } 317 | return ret, nil 318 | case []float32: 319 | ret := make([]float64, len(val)) 320 | for i := 0; i < len(val); i++ { 321 | ret[i] = float64(val[i]) 322 | } 323 | return ret, nil 324 | case []float64: 325 | return val, nil 326 | default: 327 | return nil, fmt.Errorf("Cannot convert to float list") 328 | } 329 | } 330 | 331 | func toIntList(value any) ([]int64, error) { 332 | switch val := value.(type) { 333 | case []string: 334 | ret := make([]int64, len(val)) 335 | for i := 0; i < len(val); i++ { 336 | fval, err := strconv.ParseInt(val[i], 10, 64) 337 | if err != nil { 338 | return nil, err 339 | } 340 | ret[i] = fval 341 | } 342 | return ret, nil 343 | case [][]byte: 344 | ret := make([]int64, len(val)) 345 | for i := 0; i < len(val); i++ { 346 | fval, err := strconv.ParseInt(string(val[i]), 10, 64) 347 | if err != nil { 348 | return nil, err 349 | } 350 | ret[i] = fval 351 | } 352 | return ret, nil 353 | case []int: 354 | ret := make([]int64, len(val)) 355 | for i := 0; i < len(val); i++ { 356 | ret[i] = int64(val[i]) 357 | } 358 | return ret, nil 359 | case []uint: 360 | ret := make([]int64, len(val)) 361 | for i := 0; i < len(val); i++ { 362 | ret[i] = int64(val[i]) 363 | } 364 | return ret, nil 365 | case []int32: 366 | ret := make([]int64, len(val)) 367 | for i := 0; i < len(val); i++ { 368 | ret[i] = int64(val[i]) 369 | } 370 | return ret, nil 371 | case []uint32: 372 | ret := make([]int64, len(val)) 373 | for i := 0; i < len(val); i++ { 374 | ret[i] = int64(val[i]) 375 | } 376 | return ret, nil 377 | case []int64: 378 | return val, nil 379 | case []uint64: 380 | ret := make([]int64, len(val)) 381 | for i := 0; i < len(val); i++ { 382 | ret[i] = int64(val[i]) 383 | } 384 | return ret, nil 385 | case []float32: 386 | ret := make([]int64, len(val)) 387 | for i := 0; i < len(val); i++ { 388 | ret[i] = int64(val[i]) 389 | } 390 | return ret, nil 391 | case []float64: 392 | ret := make([]int64, len(val)) 393 | for i := 0; i < len(val); i++ { 394 | ret[i] = int64(val[i]) 395 | } 396 | return ret, nil 397 | default: 398 | return nil, fmt.Errorf("Cannot convert to float list") 399 | } 400 | } 401 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/c4pt0r/kvql 2 | 3 | go 1.21.1 4 | 5 | require github.com/beorn7/perks v1.0.1 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 | -------------------------------------------------------------------------------- /kv.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | type Storage interface { 4 | Get(key []byte) (value []byte, err error) 5 | Put(key []byte, value []byte) error 6 | BatchPut(kvs []KVPair) error 7 | Delete(key []byte) error 8 | BatchDelete(keys [][]byte) error 9 | Cursor() (cursor Cursor, err error) 10 | } 11 | 12 | type Cursor interface { 13 | Seek(prefix []byte) error 14 | Next() (key []byte, value []byte, err error) 15 | } 16 | 17 | type KVPair struct { 18 | Key []byte 19 | Value []byte 20 | } 21 | 22 | func NewKVP(key []byte, val []byte) KVPair { 23 | return KVPair{ 24 | Key: key, 25 | Value: val, 26 | } 27 | } 28 | 29 | func NewKVPStr(key string, val string) KVPair { 30 | return KVPair{ 31 | Key: []byte(key), 32 | Value: []byte(val), 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /lexer.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | type TokenType byte 10 | 11 | const ( 12 | SELECT TokenType = 1 13 | WHERE TokenType = 2 14 | KEY TokenType = 3 15 | VALUE TokenType = 4 16 | OPERATOR TokenType = 5 17 | STRING TokenType = 6 18 | LPAREN TokenType = 7 19 | RPAREN TokenType = 8 20 | NAME TokenType = 9 21 | SEP TokenType = 10 22 | NUMBER TokenType = 11 23 | FLOAT TokenType = 12 24 | LIMIT TokenType = 13 25 | ORDER TokenType = 14 26 | BY TokenType = 15 27 | ASC TokenType = 16 28 | DESC TokenType = 17 29 | TRUE TokenType = 18 30 | FALSE TokenType = 19 31 | AS TokenType = 20 32 | GROUP TokenType = 21 33 | IN TokenType = 22 34 | BETWEEN TokenType = 23 35 | AND TokenType = 24 36 | LBRACK TokenType = 25 37 | RBRACK TokenType = 26 38 | PUT TokenType = 27 39 | REMOVE TokenType = 28 40 | SEMI TokenType = 29 41 | OR TokenType = 30 42 | DELETE TokenType = 31 43 | ) 44 | 45 | var ( 46 | TokenTypeToString = map[TokenType]string{ 47 | SELECT: "SELECT", 48 | WHERE: "WHERE", 49 | KEY: "KEY", 50 | VALUE: "VALUE", 51 | OPERATOR: "OP", 52 | STRING: "STR", 53 | LPAREN: "(", 54 | RPAREN: ")", 55 | NAME: "NAME", 56 | SEP: "SEP", 57 | NUMBER: "NUM", 58 | FLOAT: "FLOAT", 59 | LIMIT: "LIMIT", 60 | ORDER: "ORDER", 61 | BY: "BY", 62 | ASC: "ASC", 63 | DESC: "DESC", 64 | TRUE: "true", 65 | FALSE: "false", 66 | AS: "AS", 67 | GROUP: "GROUP", 68 | IN: "IN", 69 | BETWEEN: "BETWEEN", 70 | AND: "AND", 71 | LBRACK: "[", 72 | RBRACK: "]", 73 | PUT: "PUT", 74 | REMOVE: "REMOVE", 75 | SEMI: "SEMI", 76 | OR: "OR", 77 | DELETE: "DELETE", 78 | } 79 | ) 80 | 81 | const ( 82 | LowestPrec = 0 // non-operators 83 | UnaryPrec = 6 84 | HighestPrec = 7 85 | ) 86 | 87 | type Token struct { 88 | Tp TokenType 89 | Data string 90 | Pos int 91 | } 92 | 93 | func (t *Token) String() string { 94 | tp := TokenTypeToString[t.Tp] 95 | return fmt.Sprintf("Tp: %6s Data: %10s Pos: %d", tp, t.Data, t.Pos) 96 | } 97 | 98 | func (t *Token) Precedence() int { 99 | switch t.Tp { 100 | case OPERATOR: 101 | switch t.Data { 102 | case "|", "or": 103 | return 1 104 | case "&", "and": 105 | return 2 106 | case "=", "!=", "^=", "~=", ">", ">=", "<", "<=", "in", "between": 107 | return 3 108 | case "+", "-": 109 | return 4 110 | case "*", "/": 111 | return 5 112 | } 113 | } 114 | return LowestPrec 115 | } 116 | 117 | type Lexer struct { 118 | Query string 119 | Length int 120 | } 121 | 122 | func NewLexer(query string) *Lexer { 123 | return &Lexer{ 124 | Query: query, 125 | Length: len(query), 126 | } 127 | } 128 | 129 | func (l *Lexer) Split() []*Token { 130 | var ( 131 | curr string 132 | prev byte 133 | next byte 134 | ret []*Token 135 | strStart bool = false 136 | strStartChar byte = 0 137 | tokStart int = 0 138 | tokLen int = 0 139 | tokStartPos int 140 | ) 141 | for i := 0; i < l.Length; i++ { 142 | char := l.Query[i] 143 | if i < l.Length-1 { 144 | next = l.Query[i+1] 145 | } else { 146 | next = 0 147 | } 148 | switch char { 149 | case ' ': 150 | if strStart { 151 | tokLen++ 152 | break 153 | } 154 | curr = l.Query[tokStart : tokStart+min(tokLen, l.Length-tokStart)] 155 | if token := buildToken(curr, tokStartPos); token != nil { 156 | ret = append(ret, token) 157 | } 158 | tokLen = 0 159 | tokStartPos = i + 1 160 | tokStart = i + 1 161 | case '"', '\'': 162 | if !strStart { 163 | strStart = true 164 | strStartChar = char 165 | tokStartPos = i 166 | tokStart = i + 1 167 | } else if strStartChar == char { 168 | strStart = false 169 | curr = l.Query[tokStart : tokStart+min(tokLen, l.Length-tokStart)] 170 | token := &Token{ 171 | Tp: STRING, 172 | Data: curr, 173 | Pos: tokStartPos, 174 | } 175 | ret = append(ret, token) 176 | tokLen = 0 177 | } else { 178 | tokLen++ 179 | } 180 | case '`': 181 | if !strStart { 182 | strStart = true 183 | strStartChar = char 184 | tokStartPos = i 185 | tokStart = i + 1 186 | } else if strStartChar == char { 187 | strStart = false 188 | curr = l.Query[tokStart : tokStart+min(tokLen, l.Length-tokStart)] 189 | token := &Token{ 190 | Tp: NAME, 191 | Data: curr, 192 | Pos: tokStartPos, 193 | } 194 | ret = append(ret, token) 195 | tokLen = 0 196 | } else { 197 | tokLen++ 198 | } 199 | case '~', '^', '=', '!', '*', '+', '-', '/', '>', '<': 200 | if strStart { 201 | tokLen++ 202 | break 203 | } 204 | curr = l.Query[tokStart : tokStart+min(tokLen, l.Length-tokStart)] 205 | if token := buildToken(curr, tokStartPos); token != nil { 206 | ret = append(ret, token) 207 | } 208 | tokLen = 0 209 | var token *Token = nil 210 | 211 | if next != '=' { 212 | switch char { 213 | case '!', '*', '+', '-', '/': 214 | token = &Token{ 215 | Tp: OPERATOR, 216 | Data: string(char), 217 | Pos: i, 218 | } 219 | case '>', '<': 220 | token = &Token{ 221 | Tp: OPERATOR, 222 | Data: string(char), 223 | Pos: i, 224 | } 225 | } 226 | } 227 | if token != nil { 228 | ret = append(ret, token) 229 | tokStartPos = i + 1 230 | tokStart = i + 1 231 | break 232 | } 233 | 234 | if char == '=' { 235 | switch prev { 236 | case '^': 237 | token = &Token{ 238 | Tp: OPERATOR, 239 | Data: "^=", 240 | Pos: i - 1, 241 | } 242 | case '~': 243 | token = &Token{ 244 | Tp: OPERATOR, 245 | Data: "~=", 246 | Pos: i - 1, 247 | } 248 | case '!': 249 | token = &Token{ 250 | Tp: OPERATOR, 251 | Data: "!=", 252 | Pos: i - 1, 253 | } 254 | case '<': 255 | token = &Token{ 256 | Tp: OPERATOR, 257 | Data: "<=", 258 | Pos: i - 1, 259 | } 260 | case '>': 261 | token = &Token{ 262 | Tp: OPERATOR, 263 | Data: ">=", 264 | Pos: i - 1, 265 | } 266 | default: 267 | token = &Token{ 268 | Tp: OPERATOR, 269 | Data: "=", 270 | Pos: i, 271 | } 272 | } 273 | if token != nil { 274 | ret = append(ret, token) 275 | } 276 | } 277 | tokStartPos = i + 1 278 | tokStart = i + 1 279 | case '&', '|', '(', ')', '[', ']': 280 | if strStart { 281 | tokLen++ 282 | break 283 | } 284 | curr = l.Query[tokStart : tokStart+min(tokLen, l.Length-tokStart)] 285 | token := buildToken(curr, tokStartPos) 286 | if token != nil { 287 | ret = append(ret, token) 288 | } 289 | switch char { 290 | case '(': 291 | token = &Token{ 292 | Tp: LPAREN, 293 | Data: string(char), 294 | Pos: i, 295 | } 296 | case ')': 297 | token = &Token{ 298 | Tp: RPAREN, 299 | Data: string(char), 300 | Pos: i, 301 | } 302 | case '[': 303 | token = &Token{ 304 | Tp: LBRACK, 305 | Data: string(char), 306 | Pos: i, 307 | } 308 | case ']': 309 | token = &Token{ 310 | Tp: RBRACK, 311 | Data: string(char), 312 | Pos: i, 313 | } 314 | default: 315 | token = &Token{ 316 | Tp: OPERATOR, 317 | Data: string(char), 318 | Pos: i, 319 | } 320 | } 321 | ret = append(ret, token) 322 | tokLen = 0 323 | tokStartPos = i + 1 324 | tokStart = i + 1 325 | case ',', ';': 326 | if strStart { 327 | tokLen++ 328 | break 329 | } 330 | curr = l.Query[tokStart : tokStart+min(tokLen, l.Length-tokStart)] 331 | token := buildToken(curr, tokStartPos) 332 | if token != nil { 333 | ret = append(ret, token) 334 | } 335 | switch char { 336 | case ',': 337 | token = &Token{ 338 | Tp: SEP, 339 | Data: string(char), 340 | Pos: i, 341 | } 342 | case ';': 343 | token = &Token{ 344 | Tp: SEMI, 345 | Data: string(char), 346 | Pos: i, 347 | } 348 | } 349 | ret = append(ret, token) 350 | tokLen = 0 351 | tokStartPos = i + 1 352 | tokStart = i + 1 353 | default: 354 | tokLen++ 355 | } 356 | prev = char 357 | } 358 | if tokLen > 0 { 359 | curr = l.Query[tokStart : tokStart+min(tokLen, l.Length-tokStart)] 360 | if token := buildToken(curr, tokStartPos); token != nil { 361 | ret = append(ret, token) 362 | } 363 | } 364 | return ret 365 | } 366 | 367 | func isNumber(val string) bool { 368 | if _, err := strconv.ParseInt(val, 10, 64); err == nil { 369 | return true 370 | } 371 | return false 372 | } 373 | 374 | func isFloat(val string) bool { 375 | if _, err := strconv.ParseFloat(val, 64); err == nil { 376 | return true 377 | } 378 | return false 379 | } 380 | 381 | func buildToken(curr string, pos int) *Token { 382 | curr = strings.ToLower(strings.TrimSpace(curr)) 383 | if len(curr) == 0 { 384 | return nil 385 | } 386 | token := &Token{ 387 | Data: curr, 388 | Pos: pos, 389 | } 390 | switch curr { 391 | case "select": 392 | token.Tp = SELECT 393 | return token 394 | case "where": 395 | token.Tp = WHERE 396 | return token 397 | case "key": 398 | token.Tp = KEY 399 | return token 400 | case "value": 401 | token.Tp = VALUE 402 | return token 403 | case "limit": 404 | token.Tp = LIMIT 405 | return token 406 | case "order": 407 | token.Tp = ORDER 408 | return token 409 | case "by": 410 | token.Tp = BY 411 | return token 412 | case "asc": 413 | token.Tp = ASC 414 | return token 415 | case "desc": 416 | token.Tp = DESC 417 | return token 418 | case "true": 419 | token.Tp = TRUE 420 | return token 421 | case "false": 422 | token.Tp = FALSE 423 | return token 424 | case "as": 425 | token.Tp = AS 426 | return token 427 | case "group": 428 | token.Tp = GROUP 429 | return token 430 | case "in": 431 | token.Tp = OPERATOR 432 | return token 433 | case "between": 434 | token.Tp = OPERATOR 435 | return token 436 | case "put": 437 | token.Tp = PUT 438 | return token 439 | case "remove": 440 | token.Tp = REMOVE 441 | return token 442 | case "and": 443 | token.Tp = OPERATOR 444 | return token 445 | case "or": 446 | token.Tp = OPERATOR 447 | return token 448 | case "delete": 449 | token.Tp = DELETE 450 | return token 451 | default: 452 | if isNumber(curr) { 453 | token.Tp = NUMBER 454 | return token 455 | } else if isFloat(curr) { 456 | token.Tp = FLOAT 457 | return token 458 | } 459 | token.Tp = NAME 460 | return token 461 | } 462 | } 463 | -------------------------------------------------------------------------------- /lexer_test.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestLexer1(t *testing.T) { 9 | query := "where key = 'test' & value = 'value'" 10 | l := NewLexer(query) 11 | toks := l.Split() 12 | for _, t := range toks { 13 | fmt.Printf("%s\n", t.String()) 14 | } 15 | } 16 | 17 | func TestLexer2(t *testing.T) { 18 | query := "where key ^= 'test' | key ~= 'value' & value = 'test'" 19 | l := NewLexer(query) 20 | toks := l.Split() 21 | for _, t := range toks { 22 | fmt.Printf("%s\n", t.String()) 23 | } 24 | } 25 | 26 | func TestLexer3(t *testing.T) { 27 | query := "where key^='test'|key~='value'&value='test' " 28 | l := NewLexer(query) 29 | toks := l.Split() 30 | for _, t := range toks { 31 | fmt.Printf("%s\n", t.String()) 32 | } 33 | } 34 | 35 | func TestLexer4(t *testing.T) { 36 | query := "where key^='test'|(key~='value'&value='test')" 37 | l := NewLexer(query) 38 | toks := l.Split() 39 | for _, t := range toks { 40 | fmt.Printf("%s\n", t.String()) 41 | } 42 | } 43 | 44 | func TestLexer5(t *testing.T) { 45 | query := "where !(key^='test')" 46 | l := NewLexer(query) 47 | toks := l.Split() 48 | for _, t := range toks { 49 | fmt.Printf("%s\n", t.String()) 50 | } 51 | } 52 | 53 | func TestLexer6(t *testing.T) { 54 | query := "where func_name(key, 'test')" 55 | l := NewLexer(query) 56 | toks := l.Split() 57 | for _, t := range toks { 58 | fmt.Printf("%s\n", t.String()) 59 | } 60 | } 61 | 62 | func TestLexer7(t *testing.T) { 63 | query := "select * where func_name(key, 'test')" 64 | l := NewLexer(query) 65 | toks := l.Split() 66 | for _, t := range toks { 67 | fmt.Printf("%s\n", t.String()) 68 | } 69 | } 70 | 71 | func TestLexer8(t *testing.T) { 72 | query := "select * where int(key) + 10" 73 | l := NewLexer(query) 74 | toks := l.Split() 75 | for _, t := range toks { 76 | fmt.Printf("%s\n", t.String()) 77 | } 78 | } 79 | 80 | func TestLexer9(t *testing.T) { 81 | query := "select * where int(key) + 10.5" 82 | l := NewLexer(query) 83 | toks := l.Split() 84 | for _, t := range toks { 85 | fmt.Printf("%s\n", t.String()) 86 | } 87 | } 88 | 89 | func TestLexer10(t *testing.T) { 90 | query := "select * where int(key) + 10.5.7" 91 | l := NewLexer(query) 92 | toks := l.Split() 93 | for _, t := range toks { 94 | fmt.Printf("%s\n", t.String()) 95 | } 96 | } 97 | 98 | func TestLexer11(t *testing.T) { 99 | query := "select * where int(key) + 10 > 5 & int(value) - 10 < 8" 100 | l := NewLexer(query) 101 | toks := l.Split() 102 | for _, t := range toks { 103 | fmt.Printf("%s\n", t.String()) 104 | } 105 | } 106 | 107 | func TestLexer12(t *testing.T) { 108 | query := "select * where key ^= 'asdf\"jkl'" 109 | l := NewLexer(query) 110 | toks := l.Split() 111 | for _, t := range toks { 112 | fmt.Printf("%s\n", t.String()) 113 | } 114 | } 115 | 116 | func TestLexer13(t *testing.T) { 117 | query := "select * where key ^= 'asdf' order by key" 118 | l := NewLexer(query) 119 | toks := l.Split() 120 | for _, t := range toks { 121 | fmt.Printf("%s\n", t.String()) 122 | } 123 | } 124 | 125 | func TestLexer14(t *testing.T) { 126 | query := "select * where key ^= 'asdf' order by key asc" 127 | l := NewLexer(query) 128 | toks := l.Split() 129 | for _, t := range toks { 130 | fmt.Printf("%s\n", t.String()) 131 | } 132 | } 133 | 134 | func TestLexer15(t *testing.T) { 135 | query := "select * where key ^= 'asdf' order by key desc" 136 | l := NewLexer(query) 137 | toks := l.Split() 138 | for _, t := range toks { 139 | fmt.Printf("%s\n", t.String()) 140 | } 141 | } 142 | 143 | func TestLexer16(t *testing.T) { 144 | query := "insert ('k1', 'v1')" 145 | l := NewLexer(query) 146 | toks := l.Split() 147 | for _, t := range toks { 148 | fmt.Printf("%s\n", t.String()) 149 | } 150 | } 151 | 152 | func TestLexer17(t *testing.T) { 153 | query := "insert ('k1', 'v1'), ('k2', 'v2'), ('k3', upper('v3'))" 154 | l := NewLexer(query) 155 | toks := l.Split() 156 | for _, t := range toks { 157 | fmt.Printf("%s\n", t.String()) 158 | } 159 | } 160 | 161 | func TestLexer18(t *testing.T) { 162 | query := "remove 'k1', 'k2'" 163 | l := NewLexer(query) 164 | toks := l.Split() 165 | for _, t := range toks { 166 | fmt.Printf("%s\n", t.String()) 167 | } 168 | } 169 | 170 | func TestLexer19(t *testing.T) { 171 | query := "delete where key ^= 'kp' and value ^= 'v_' " 172 | l := NewLexer(query) 173 | toks := l.Split() 174 | for _, t := range toks { 175 | fmt.Printf("%s\n", t.String()) 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /limit_plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import "fmt" 4 | 5 | type FinalLimitPlan struct { 6 | Storage Storage 7 | Start int 8 | Count int 9 | current int 10 | skips int 11 | ChildPlan FinalPlan 12 | FieldNames []string 13 | FieldTypes []Type 14 | } 15 | 16 | func (p *FinalLimitPlan) Init() error { 17 | p.current = 0 18 | p.skips = 0 19 | return p.ChildPlan.Init() 20 | } 21 | 22 | func (p *FinalLimitPlan) Next(ctx *ExecuteCtx) ([]Column, error) { 23 | for p.skips < p.Start { 24 | cols, err := p.ChildPlan.Next(ctx) 25 | if err != nil { 26 | return nil, err 27 | } 28 | if cols == nil && err == nil { 29 | return nil, nil 30 | } 31 | p.skips++ 32 | } 33 | if p.current >= p.Count { 34 | return nil, nil 35 | } 36 | cols, err := p.ChildPlan.Next(ctx) 37 | if err != nil { 38 | return nil, err 39 | } 40 | if cols == nil && err == nil { 41 | return nil, nil 42 | } 43 | 44 | p.current++ 45 | return cols, nil 46 | 47 | } 48 | 49 | func (p *FinalLimitPlan) Batch(ctx *ExecuteCtx) ([][]Column, error) { 50 | var ( 51 | rows [][]Column 52 | err error 53 | finish = false 54 | count = 0 55 | ret = make([][]Column, 0, PlanBatchSize) 56 | ) 57 | for p.skips < p.Start { 58 | restSkips := p.Start - p.skips 59 | rows, err = p.ChildPlan.Batch(ctx) 60 | if err != nil { 61 | return nil, err 62 | } 63 | nrows := len(rows) 64 | if nrows == 0 { 65 | return nil, nil 66 | } 67 | if nrows <= restSkips { 68 | p.skips += nrows 69 | } else { 70 | p.skips += restSkips 71 | rows = rows[restSkips:] 72 | // Skip finish break is OK 73 | break 74 | } 75 | } 76 | if len(rows) > 0 { 77 | for _, row := range rows { 78 | if p.current >= p.Count { 79 | break 80 | } 81 | ret = append(ret, row) 82 | count++ 83 | p.current++ 84 | } 85 | } 86 | if p.current >= p.Count { 87 | return ret, nil 88 | } 89 | for !finish { 90 | rows, err = p.ChildPlan.Batch(ctx) 91 | if err != nil { 92 | return nil, err 93 | } 94 | if len(rows) == 0 { 95 | finish = true 96 | break 97 | } 98 | for _, row := range rows { 99 | ret = append(ret, row) 100 | count++ 101 | p.current++ 102 | if p.current >= p.Count { 103 | finish = true 104 | break 105 | } 106 | } 107 | if count >= PlanBatchSize { 108 | finish = true 109 | break 110 | } 111 | } 112 | return ret, nil 113 | } 114 | 115 | func (p *FinalLimitPlan) String() string { 116 | return fmt.Sprintf("LimitPlan{Start = %d, Count = %d}", p.Start, p.Count) 117 | } 118 | 119 | func (p *FinalLimitPlan) Explain() []string { 120 | ret := []string{p.String()} 121 | for _, plan := range p.ChildPlan.Explain() { 122 | ret = append(ret, plan) 123 | } 124 | return ret 125 | } 126 | 127 | func (p *FinalLimitPlan) FieldNameList() []string { 128 | return p.FieldNames 129 | } 130 | 131 | func (p *FinalLimitPlan) FieldTypeList() []Type { 132 | return p.FieldTypes 133 | } 134 | 135 | type LimitPlan struct { 136 | Storage Storage 137 | Start int 138 | Count int 139 | current int 140 | skips int 141 | ChildPlan Plan 142 | } 143 | 144 | func (p *LimitPlan) Init() error { 145 | p.current = 0 146 | p.skips = 0 147 | return p.ChildPlan.Init() 148 | } 149 | 150 | func (p *LimitPlan) String() string { 151 | return fmt.Sprintf("LimitPlan{Start = %d, Count = %d}", p.Start, p.Count) 152 | } 153 | 154 | func (p *LimitPlan) Explain() []string { 155 | ret := []string{p.String()} 156 | for _, plan := range p.ChildPlan.Explain() { 157 | ret = append(ret, plan) 158 | } 159 | return ret 160 | } 161 | 162 | func (p *LimitPlan) Next(ctx *ExecuteCtx) ([]byte, []byte, error) { 163 | for p.skips < p.Start { 164 | key, value, err := p.ChildPlan.Next(ctx) 165 | if err != nil { 166 | return nil, nil, err 167 | } 168 | if key == nil && value == nil && err == nil { 169 | return nil, nil, nil 170 | } 171 | p.skips++ 172 | } 173 | if p.current >= p.Count { 174 | return nil, nil, nil 175 | } 176 | k, v, err := p.ChildPlan.Next(ctx) 177 | if err != nil { 178 | return nil, nil, err 179 | } 180 | if k == nil && v == nil && err == nil { 181 | return nil, nil, nil 182 | } 183 | p.current++ 184 | return k, v, nil 185 | } 186 | 187 | func (p *LimitPlan) Batch(ctx *ExecuteCtx) ([]KVPair, error) { 188 | var ( 189 | rows []KVPair 190 | ret = make([]KVPair, 0, PlanBatchSize) 191 | err error 192 | finish = false 193 | count = 0 194 | ) 195 | for p.skips < p.Start { 196 | restSkips := p.Start - p.skips 197 | rows, err = p.ChildPlan.Batch(ctx) 198 | if err != nil { 199 | return nil, err 200 | } 201 | nrows := len(rows) 202 | if nrows == 0 { 203 | return nil, nil 204 | } 205 | if nrows <= restSkips { 206 | p.skips += nrows 207 | } else { 208 | p.skips += restSkips 209 | rows = rows[restSkips:] 210 | // Skip finish break it OK 211 | break 212 | } 213 | } 214 | if len(rows) > 0 { 215 | for _, row := range rows { 216 | if p.current >= p.Count { 217 | break 218 | } 219 | ret = append(ret, row) 220 | count++ 221 | p.current++ 222 | } 223 | } 224 | if p.current >= p.Count { 225 | return ret, nil 226 | } 227 | for !finish { 228 | rows, err = p.ChildPlan.Batch(ctx) 229 | if err != nil { 230 | return nil, err 231 | } 232 | if len(rows) == 0 { 233 | finish = true 234 | break 235 | } 236 | for _, row := range rows { 237 | ret = append(ret, row) 238 | count++ 239 | p.current++ 240 | if p.current >= p.Count { 241 | finish = true 242 | break 243 | } 244 | } 245 | if count >= PlanBatchSize { 246 | finish = true 247 | break 248 | } 249 | } 250 | return ret, nil 251 | } 252 | -------------------------------------------------------------------------------- /optimizer.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import "fmt" 4 | 5 | type Optimizer struct { 6 | Query string 7 | stmt Statement 8 | filter *FilterExec 9 | } 10 | 11 | func NewOptimizer(query string) *Optimizer { 12 | return &Optimizer{ 13 | Query: query, 14 | } 15 | } 16 | 17 | func (o *Optimizer) init() error { 18 | p := NewParser(o.Query) 19 | stmt, err := p.Parse() 20 | if err != nil { 21 | return err 22 | } 23 | o.stmt = stmt 24 | switch vstmt := stmt.(type) { 25 | case *SelectStmt: 26 | o.optimizeSelectExpressions(vstmt) 27 | o.filter = &FilterExec{ 28 | Ast: vstmt.Where, 29 | } 30 | case *DeleteStmt: 31 | o.optimizeDeleteExpressions(vstmt) 32 | o.filter = &FilterExec{ 33 | Ast: vstmt.Where, 34 | } 35 | } 36 | return nil 37 | } 38 | 39 | func (o *Optimizer) optimizeDeleteExpressions(stmt *DeleteStmt) { 40 | eo := ExpressionOptimizer{ 41 | Root: stmt.Where.Expr, 42 | } 43 | stmt.Where.Expr = eo.Optimize() 44 | } 45 | 46 | func (o *Optimizer) optimizeSelectExpressions(stmt *SelectStmt) { 47 | eo := ExpressionOptimizer{ 48 | Root: stmt.Where.Expr, 49 | } 50 | stmt.Where.Expr = eo.Optimize() 51 | for i, field := range stmt.Fields { 52 | // fmt.Println("Before opt", field) 53 | eo.Root = field 54 | stmt.Fields[i] = eo.Optimize() 55 | // fmt.Println("After opt", o.stmt.Fields[i]) 56 | } 57 | } 58 | 59 | func (o *Optimizer) findAggrFunc(expr Expression) bool { 60 | switch e := expr.(type) { 61 | case *BinaryOpExpr: 62 | if o.findAggrFunc(e.Left) { 63 | return true 64 | } 65 | if o.findAggrFunc(e.Right) { 66 | return true 67 | } 68 | case *FunctionCallExpr: 69 | return IsAggrFuncExpr(expr) 70 | } 71 | return false 72 | } 73 | 74 | func (o *Optimizer) buildFinalPlan(s Storage, fp Plan, stmt *SelectStmt) (FinalPlan, error) { 75 | hasAggr := false 76 | aggrFields := 0 77 | aggrAll := true 78 | for _, field := range stmt.Fields { 79 | if o.findAggrFunc(field) { 80 | hasAggr = true 81 | aggrFields++ 82 | } 83 | } 84 | if stmt.GroupBy != nil && len(stmt.Fields) == len(stmt.GroupBy.Fields) { 85 | allInSelect := true 86 | for _, gf := range stmt.GroupBy.Fields { 87 | gfNameInSelect := false 88 | for _, fn := range stmt.FieldNames { 89 | if fn == gf.Name { 90 | gfNameInSelect = true 91 | break 92 | } 93 | } 94 | if !gfNameInSelect { 95 | allInSelect = false 96 | break 97 | } 98 | } 99 | hasAggr = allInSelect 100 | } 101 | var ffp FinalPlan 102 | if !hasAggr && stmt.GroupBy != nil && len(stmt.GroupBy.Fields) > 0 { 103 | return nil, NewSyntaxError(stmt.Pos, "No aggregate fields in select statement") 104 | } 105 | if !hasAggr { 106 | ffp = &ProjectionPlan{ 107 | Storage: s, 108 | ChildPlan: fp, 109 | AllFields: stmt.AllFields, 110 | FieldNames: stmt.FieldNames, 111 | FieldTypes: stmt.FieldTypes, 112 | Fields: stmt.Fields, 113 | } 114 | 115 | // Build order 116 | if stmt.Order != nil { 117 | ffp = o.buildFinalOrderPlan(s, ffp, false, stmt) 118 | } 119 | 120 | // Build limit 121 | if stmt.Limit != nil { 122 | ffp = o.buildFinalLimitPlan(s, ffp, stmt) 123 | } 124 | 125 | return ffp, nil 126 | } 127 | 128 | // Update limit 129 | limit := -1 130 | start := 0 131 | doNotBuildLimit := false 132 | // no order by only has limit 133 | if stmt.Limit != nil && stmt.Order == nil { 134 | doNotBuildLimit = true 135 | start = stmt.Limit.Start 136 | limit = stmt.Limit.Count 137 | } 138 | var groupByFields []GroupByField = nil 139 | if stmt.GroupBy != nil { 140 | groupByFields = stmt.GroupBy.Fields 141 | aggrAll = false 142 | } else { 143 | aggrAll = true 144 | } 145 | 146 | if aggrFields == 0 && len(groupByFields) > 0 { 147 | return nil, NewSyntaxError(stmt.Pos, "No aggregate fields in select statement") 148 | } 149 | 150 | if aggrFields+len(groupByFields) < len(stmt.Fields) { 151 | if stmt.GroupBy != nil { 152 | return nil, NewSyntaxError(stmt.GroupBy.Pos, "Missing aggregate fields in group by statement") 153 | } else { 154 | return nil, NewSyntaxError(-1, "Missing group by statement") 155 | } 156 | } 157 | 158 | ffp = &AggregatePlan{ 159 | Storage: s, 160 | ChildPlan: fp, 161 | AggrAll: aggrAll, 162 | FieldNames: stmt.FieldNames, 163 | FieldTypes: stmt.FieldTypes, 164 | Fields: stmt.Fields, 165 | GroupByFields: groupByFields, 166 | Limit: limit, 167 | Start: start, 168 | } 169 | 170 | if stmt.Order != nil { 171 | ffp = o.buildFinalOrderPlan(s, ffp, true, stmt) 172 | } 173 | 174 | if stmt.Limit != nil && !doNotBuildLimit { 175 | ffp = o.buildFinalLimitPlan(s, ffp, stmt) 176 | } 177 | return ffp, nil 178 | } 179 | 180 | func (o *Optimizer) buildPlan(s Storage) (FinalPlan, error) { 181 | err := o.init() 182 | if err != nil { 183 | return nil, err 184 | } 185 | switch stmt := o.stmt.(type) { 186 | case *SelectStmt: 187 | return o.buildSelectPlan(s, stmt) 188 | case *PutStmt: 189 | return o.buildPutPlan(s, stmt) 190 | case *RemoveStmt: 191 | return o.buildRemovePlan(s, stmt) 192 | case *DeleteStmt: 193 | return o.buildDeletePlan(s, stmt) 194 | default: 195 | return nil, fmt.Errorf("Cannot build query plan without a select statement") 196 | } 197 | } 198 | 199 | func (o *Optimizer) buildPutPlan(s Storage, stmt *PutStmt) (FinalPlan, error) { 200 | plan := &PutPlan{ 201 | Storage: s, 202 | KVPairs: stmt.KVPairs, 203 | } 204 | err := plan.Init() 205 | if err != nil { 206 | return nil, err 207 | } 208 | return plan, nil 209 | } 210 | 211 | func (o *Optimizer) buildRemovePlan(s Storage, stmt *RemoveStmt) (FinalPlan, error) { 212 | plan := &RemovePlan{ 213 | Storage: s, 214 | Keys: stmt.Keys, 215 | } 216 | err := plan.Init() 217 | if err != nil { 218 | return nil, err 219 | } 220 | return plan, nil 221 | } 222 | 223 | func (o *Optimizer) optimizeDeletePlanToRemovePlan(s Storage, mgPlan *MultiGetPlan) (FinalPlan, error) { 224 | keys := make([]Expression, len(mgPlan.Keys)) 225 | for i, key := range mgPlan.Keys { 226 | kexpr := &StringExpr{ 227 | Pos: 0, 228 | Data: key, 229 | } 230 | keys[i] = kexpr 231 | } 232 | 233 | removePlan := &RemovePlan{ 234 | Storage: s, 235 | Keys: keys, 236 | } 237 | err := removePlan.Init() 238 | return removePlan, err 239 | } 240 | 241 | func (o *Optimizer) canOptimizeDeletePlanToRemovePlan(mgPlan *MultiGetPlan) bool { 242 | if mgPlan.Filter.Ast == nil || mgPlan.Filter.Ast.Expr == nil { 243 | return false 244 | } 245 | fexpr := mgPlan.Filter.Ast.Expr 246 | hasAndOp := false 247 | fexpr.Walk(func(e Expression) bool { 248 | switch eval := e.(type) { 249 | case *BinaryOpExpr: 250 | if eval.Op == And || eval.Op == KWAnd { 251 | hasAndOp = true 252 | return false 253 | } 254 | } 255 | return true 256 | }) 257 | // For safety, if filter expressions has `and` operator it should not optimize to remove plan 258 | if hasAndOp { 259 | return false 260 | } 261 | return true 262 | } 263 | 264 | func (o *Optimizer) buildDeletePlan(s Storage, stmt *DeleteStmt) (FinalPlan, error) { 265 | var err error 266 | // Build Scan 267 | fp := o.buildScanPlan(s) 268 | 269 | // Just build an empyt result plan so we can 270 | // ignore limit plan just return the delete plan 271 | // with empty result plan directly 272 | if _, ok := fp.(*EmptyResultPlan); ok { 273 | delPlan := &DeletePlan{ 274 | Storage: s, 275 | ChildPlan: fp, 276 | } 277 | err = delPlan.Init() 278 | if err != nil { 279 | return nil, err 280 | } 281 | return delPlan, nil 282 | } 283 | 284 | if mgPlan, ok := fp.(*MultiGetPlan); ok && stmt.Limit == nil { 285 | // Only multi get plan and no limit statement can be optimize to remove plan 286 | if o.canOptimizeDeletePlanToRemovePlan(mgPlan) { 287 | return o.optimizeDeletePlanToRemovePlan(s, mgPlan) 288 | } 289 | } 290 | 291 | delPlan := &DeletePlan{ 292 | Storage: s, 293 | ChildPlan: fp, 294 | } 295 | 296 | if stmt.Limit != nil { 297 | limitPlan := &LimitPlan{ 298 | Storage: s, 299 | Start: stmt.Limit.Start, 300 | Count: stmt.Limit.Count, 301 | ChildPlan: fp, 302 | } 303 | delPlan.ChildPlan = limitPlan 304 | } 305 | err = delPlan.Init() 306 | if err != nil { 307 | return nil, err 308 | } 309 | return delPlan, nil 310 | } 311 | 312 | func (o *Optimizer) buildSelectPlan(s Storage, stmt *SelectStmt) (FinalPlan, error) { 313 | // Build Scan 314 | fp := o.buildScanPlan(s) 315 | 316 | // Just build an empty result plan so we can 317 | // ignore order and limit plan just return 318 | // the projection plan with empty result plan 319 | if _, ok := fp.(*EmptyResultPlan); ok { 320 | ret, err := o.buildFinalPlan(s, fp, stmt) 321 | if err != nil { 322 | return nil, err 323 | } 324 | err = ret.Init() 325 | if err != nil { 326 | return nil, err 327 | } 328 | return ret, nil 329 | } 330 | 331 | ret, err := o.buildFinalPlan(s, fp, stmt) 332 | if err != nil { 333 | return nil, err 334 | } 335 | err = ret.Init() 336 | if err != nil { 337 | return nil, err 338 | } 339 | return ret, nil 340 | } 341 | 342 | func (o *Optimizer) BuildPlan(s Storage) (FinalPlan, error) { 343 | ret, err := o.buildPlan(s) 344 | if err != nil { 345 | return nil, err 346 | } 347 | err = ret.Init() 348 | if err != nil { 349 | return nil, err 350 | } 351 | return ret, nil 352 | } 353 | 354 | func (o *Optimizer) buildFinalLimitPlan(s Storage, ffp FinalPlan, stmt *SelectStmt) FinalPlan { 355 | return &FinalLimitPlan{ 356 | Storage: s, 357 | Start: stmt.Limit.Start, 358 | Count: stmt.Limit.Count, 359 | FieldNames: ffp.FieldNameList(), 360 | FieldTypes: ffp.FieldTypeList(), 361 | ChildPlan: ffp, 362 | } 363 | } 364 | 365 | func (o *Optimizer) buildFinalOrderPlan(s Storage, ffp FinalPlan, hasAggr bool, stmt *SelectStmt) FinalPlan { 366 | if !hasAggr && len(stmt.Order.Orders) == 1 { 367 | order := stmt.Order.Orders[0] 368 | switch expr := order.Field.(type) { 369 | case *FieldExpr: 370 | // If order by key asc just ignore it 371 | if expr.Field == KeyKW && order.Order == ASC { 372 | return ffp 373 | } 374 | } 375 | } 376 | return &FinalOrderPlan{ 377 | Storage: s, 378 | Orders: stmt.Order.Orders, 379 | FieldNames: ffp.FieldNameList(), 380 | FieldTypes: ffp.FieldTypeList(), 381 | ChildPlan: ffp, 382 | } 383 | } 384 | 385 | func (o *Optimizer) buildScanPlan(s Storage) Plan { 386 | fopt := NewFilterOptimizer(o.filter.Ast, s, o.filter) 387 | return fopt.Optimize() 388 | } 389 | -------------------------------------------------------------------------------- /optimizer_test.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import "testing" 4 | 5 | type builderTest struct { 6 | query string 7 | needError bool 8 | } 9 | 10 | func TestPlanBuilder(t *testing.T) { 11 | tdata := []builderTest{ 12 | builderTest{ 13 | "select key, sum(int(value)) where true", true, 14 | }, 15 | builderTest{ 16 | "select key, value", true, 17 | }, 18 | builderTest{ 19 | "select key, value where true;", false, 20 | }, 21 | } 22 | 23 | txn := &fuzzQueryStorage{} 24 | for i, item := range tdata { 25 | opt := NewOptimizer(item.query) 26 | _, err := opt.buildPlan(txn) 27 | if berr, ok := err.(QueryBinder); ok { 28 | berr.BindQuery(item.query) 29 | berr.SetPadding(0) 30 | } 31 | if err == nil && item.needError { 32 | t.Errorf("[%d] query: `%s` need error, but got nil", i, item.query) 33 | } else if err != nil && !item.needError { 34 | t.Errorf("[%d] query: `%s` should not return error but got:\n%s", i, item.query, err.Error()) 35 | } 36 | } 37 | } 38 | 39 | func buildPlan(query string) (FinalPlan, error) { 40 | txn := &fuzzQueryStorage{} 41 | opt := NewOptimizer(query) 42 | return opt.buildPlan(txn) 43 | } 44 | 45 | func TestOptimizeDelete(t *testing.T) { 46 | queries := []string{ 47 | "delete where key in ('k1', 'k2')", 48 | "delete where key = 'k1' | key = 'k2'", 49 | } 50 | for _, query := range queries { 51 | plan, err := buildPlan(query) 52 | if err != nil { 53 | t.Fatal(err) 54 | } 55 | if p, ok := plan.(*RemovePlan); ok { 56 | if len(p.Keys) != 2 { 57 | t.Fatal("Remove plan should contains 2 keys") 58 | } 59 | } else { 60 | t.Fatal("Should optimize as remove plan") 61 | } 62 | } 63 | } 64 | 65 | func TestOptimizeDelete2(t *testing.T) { 66 | queries := []string{ 67 | "delete where key in ('k1', 'k2') and upper(key) = 'K1'", 68 | } 69 | for _, query := range queries { 70 | plan, err := buildPlan(query) 71 | if err != nil { 72 | t.Fatal(err) 73 | } 74 | if p, ok := plan.(*DeletePlan); ok { 75 | cp := p.ChildPlan 76 | if _, ok := cp.(*MultiGetPlan); !ok { 77 | t.Fatal("Should optimize as multi get plan") 78 | } 79 | } else { 80 | t.Fatal("Should optimize as delete plan") 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /order_plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "bytes" 5 | "container/heap" 6 | "fmt" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | type FinalOrderPlan struct { 12 | Storage Storage 13 | Orders []OrderField 14 | FieldNames []string 15 | FieldTypes []Type 16 | ChildPlan FinalPlan 17 | pos int 18 | total int 19 | sorted *orderColumnsRowHeap 20 | orderPos []int 21 | orderTypes []Type 22 | } 23 | 24 | func (p *FinalOrderPlan) findOrderIdx(o OrderField) (int, error) { 25 | fname := o.Name 26 | for i, fn := range p.FieldNames { 27 | if fname == fn { 28 | return i, nil 29 | } 30 | } 31 | return 0, NewSyntaxError(o.Field.GetPos(), "Cannot find field: %s", fname) 32 | } 33 | 34 | func (p *FinalOrderPlan) Init() error { 35 | p.pos = 0 36 | p.total = 0 37 | p.orderPos = []int{} 38 | p.orderTypes = []Type{} 39 | for _, o := range p.Orders { 40 | idx, err := p.findOrderIdx(o) 41 | if err != nil { 42 | return err 43 | } 44 | p.orderPos = append(p.orderPos, idx) 45 | p.orderTypes = append(p.orderTypes, p.FieldTypes[idx]) 46 | } 47 | p.sorted = &orderColumnsRowHeap{} 48 | heap.Init(p.sorted) 49 | return p.ChildPlan.Init() 50 | } 51 | 52 | func (p *FinalOrderPlan) FieldNameList() []string { 53 | return p.FieldNames 54 | } 55 | 56 | func (p *FinalOrderPlan) FieldTypeList() []Type { 57 | return p.FieldTypes 58 | } 59 | 60 | func (p *FinalOrderPlan) String() string { 61 | fields := []string{} 62 | for _, f := range p.Orders { 63 | orderStr := " ASC" 64 | if f.Order == DESC { 65 | orderStr = " DESC" 66 | } 67 | fields = append(fields, f.Name+orderStr) 68 | } 69 | return fmt.Sprintf("OrderPlan{Fields = <%s>}", strings.Join(fields, ", ")) 70 | } 71 | 72 | func (p *FinalOrderPlan) Explain() []string { 73 | ret := []string{p.String()} 74 | for _, plan := range p.ChildPlan.Explain() { 75 | ret = append(ret, plan) 76 | } 77 | return ret 78 | } 79 | 80 | func (p *FinalOrderPlan) Next(ctx *ExecuteCtx) ([]Column, error) { 81 | if p.total == 0 { 82 | if err := p.prepare(ctx); err != nil { 83 | return nil, err 84 | } 85 | } 86 | if p.pos < p.total { 87 | rrow := heap.Pop(p.sorted) 88 | row := rrow.(*orderColumnsRow) 89 | p.pos++ 90 | return row.cols, nil 91 | } 92 | return nil, nil 93 | } 94 | 95 | func (p *FinalOrderPlan) Batch(ctx *ExecuteCtx) ([][]Column, error) { 96 | if p.total == 0 { 97 | if err := p.prepareBatch(ctx); err != nil { 98 | return nil, err 99 | } 100 | } 101 | var ( 102 | ret = make([][]Column, 0, PlanBatchSize) 103 | count = 0 104 | ) 105 | for p.pos < p.total { 106 | rrow := heap.Pop(p.sorted) 107 | row := rrow.(*orderColumnsRow) 108 | ret = append(ret, row.cols) 109 | p.pos++ 110 | count++ 111 | if count >= PlanBatchSize { 112 | break 113 | } 114 | } 115 | return ret, nil 116 | } 117 | 118 | func (p *FinalOrderPlan) prepare(ctx *ExecuteCtx) error { 119 | for { 120 | col, err := p.ChildPlan.Next(ctx) 121 | if err != nil { 122 | return err 123 | } 124 | if col == nil && err == nil { 125 | break 126 | } 127 | row := &orderColumnsRow{ 128 | cols: col, 129 | orders: p.Orders, 130 | orderPos: p.orderPos, 131 | orderTypes: p.orderTypes, 132 | } 133 | heap.Push(p.sorted, row) 134 | p.total++ 135 | } 136 | return nil 137 | } 138 | 139 | func (p *FinalOrderPlan) prepareBatch(ctx *ExecuteCtx) error { 140 | for { 141 | rows, err := p.ChildPlan.Batch(ctx) 142 | if err != nil { 143 | return err 144 | } 145 | if len(rows) == 0 { 146 | break 147 | } 148 | for _, cols := range rows { 149 | row := &orderColumnsRow{ 150 | cols: cols, 151 | orders: p.Orders, 152 | orderPos: p.orderPos, 153 | orderTypes: p.orderTypes, 154 | } 155 | heap.Push(p.sorted, row) 156 | p.total++ 157 | } 158 | } 159 | return nil 160 | } 161 | 162 | type orderColumnsRow struct { 163 | cols []Column 164 | orders []OrderField 165 | orderPos []int 166 | orderTypes []Type 167 | } 168 | 169 | func (l *orderColumnsRow) Less(r *orderColumnsRow) bool { 170 | for i, o := range l.orders { 171 | oidx := l.orderPos[i] 172 | desc := o.Order == DESC 173 | lval := l.cols[oidx] 174 | rval := r.cols[oidx] 175 | compare := l.compare(l.orderTypes[i], lval, rval, desc) 176 | if compare < 0 { 177 | return true 178 | } else if compare > 0 { 179 | return false 180 | } 181 | } 182 | return false 183 | } 184 | 185 | func (l *orderColumnsRow) compare(tp Type, lval, rval Column, reverse bool) int { 186 | switch tp { 187 | case TSTR: 188 | return l.compareBytes(lval, rval, reverse) 189 | case TNUMBER: 190 | return l.compareNumber(lval, rval, reverse) 191 | case TBOOL: 192 | return l.compareBool(lval, rval, reverse) 193 | default: 194 | return 0 195 | } 196 | } 197 | 198 | func (l *orderColumnsRow) compareBytes(lval, rval Column, reverse bool) int { 199 | var ( 200 | lbval []byte 201 | rbval []byte 202 | ) 203 | switch lval.(type) { 204 | case []byte: 205 | lbval = lval.([]byte) 206 | rbval = rval.([]byte) 207 | case string: 208 | lbval = []byte(lval.(string)) 209 | rbval = []byte(rval.(string)) 210 | default: 211 | return 0 212 | } 213 | if reverse { 214 | return 0 - bytes.Compare(lbval, rbval) 215 | } 216 | return bytes.Compare(lbval, rbval) 217 | } 218 | 219 | func (l *orderColumnsRow) compareBool(lval, rval Column, reverse bool) int { 220 | var ( 221 | lbool bool 222 | rbool bool 223 | ) 224 | switch lval.(type) { 225 | case bool: 226 | lbool = lval.(bool) 227 | rbool = rval.(bool) 228 | case string: 229 | lbool = lval.(string) == "true" 230 | rbool = rval.(string) == "true" 231 | case []byte: 232 | lbool = bytes.Equal(lval.([]byte), []byte("true")) 233 | rbool = bytes.Equal(rval.([]byte), []byte("true")) 234 | default: 235 | return 0 236 | } 237 | lint := 0 238 | rint := 0 239 | if lbool { 240 | lint = 1 241 | } 242 | if rbool { 243 | rint = 1 244 | } 245 | if lint == rint { 246 | return 0 247 | } 248 | if reverse { 249 | if lint > rint { 250 | return -1 251 | } else { 252 | return 1 253 | } 254 | } 255 | if lint < rint { 256 | return -1 257 | } else { 258 | return 1 259 | } 260 | } 261 | 262 | func (l *orderColumnsRow) compareNumber(lval, rval Column, reverse bool) int { 263 | var ( 264 | lint, rint int64 265 | lfloat, rfloat float64 266 | err error 267 | isFloat bool = false 268 | ) 269 | switch lval.(type) { 270 | case int: 271 | lint = int64(lval.(int)) 272 | rint = int64(rval.(int)) 273 | case int16: 274 | lint = int64(lval.(int16)) 275 | rint = int64(rval.(int16)) 276 | case int32: 277 | lint = int64(lval.(int32)) 278 | rint = int64(rval.(int32)) 279 | case int64: 280 | lint = lval.(int64) 281 | rint = rval.(int64) 282 | case uint: 283 | lint = int64(lval.(uint)) 284 | rint = int64(rval.(uint)) 285 | case uint16: 286 | lint = int64(lval.(uint16)) 287 | rint = int64(rval.(uint16)) 288 | case uint32: 289 | lint = int64(lval.(uint32)) 290 | rint = int64(rval.(uint32)) 291 | case uint64: 292 | lint = int64(lval.(uint64)) 293 | rint = int64(rval.(uint64)) 294 | case float32: 295 | lfloat = float64(lval.(float32)) 296 | rfloat = float64(rval.(float32)) 297 | isFloat = true 298 | case float64: 299 | lfloat = lval.(float64) 300 | rfloat = rval.(float64) 301 | isFloat = true 302 | case []byte: 303 | if lint, err = strconv.ParseInt(string(lval.([]byte)), 10, 64); err == nil { 304 | if rint, err = strconv.ParseInt(string(rval.([]byte)), 10, 64); err == nil { 305 | return l.compareInt(lint, rint, reverse) 306 | } 307 | } 308 | if lfloat, err = strconv.ParseFloat(string(lval.([]byte)), 64); err == nil { 309 | if rfloat, err = strconv.ParseFloat(string(rval.([]byte)), 64); err == nil { 310 | return l.compareFloat(lfloat, rfloat, reverse) 311 | } 312 | } 313 | return 0 314 | case string: 315 | if lint, err = strconv.ParseInt(lval.(string), 10, 64); err == nil { 316 | if rint, err = strconv.ParseInt(rval.(string), 10, 64); err == nil { 317 | return l.compareInt(lint, rint, reverse) 318 | } 319 | } 320 | if lfloat, err = strconv.ParseFloat(lval.(string), 64); err == nil { 321 | if rfloat, err = strconv.ParseFloat(rval.(string), 64); err == nil { 322 | return l.compareFloat(lfloat, rfloat, reverse) 323 | } 324 | } 325 | return 0 326 | } 327 | 328 | if isFloat { 329 | return l.compareFloat(lfloat, rfloat, reverse) 330 | } 331 | return l.compareInt(lint, rint, reverse) 332 | } 333 | 334 | func (l *orderColumnsRow) compareInt(lval, rval int64, reverse bool) int { 335 | if lval == rval { 336 | return 0 337 | } 338 | if reverse { 339 | if lval > rval { 340 | return -1 341 | } else { 342 | return 1 343 | } 344 | } 345 | if lval < rval { 346 | return -1 347 | } else { 348 | return 1 349 | } 350 | } 351 | 352 | func (l *orderColumnsRow) compareFloat(lval, rval float64, reverse bool) int { 353 | if lval == rval { 354 | return 0 355 | } 356 | if reverse { 357 | if lval > rval { 358 | return -1 359 | } else { 360 | return 1 361 | } 362 | } 363 | if lval < rval { 364 | return -1 365 | } else { 366 | return 1 367 | } 368 | } 369 | 370 | type orderColumnsRowHeap []*orderColumnsRow 371 | 372 | func (h orderColumnsRowHeap) Len() int { 373 | return len(h) 374 | } 375 | 376 | func (h orderColumnsRowHeap) Swap(i, j int) { 377 | h[i], h[j] = h[j], h[i] 378 | } 379 | 380 | func (h *orderColumnsRowHeap) Push(x any) { 381 | *h = append(*h, x.(*orderColumnsRow)) 382 | } 383 | 384 | func (h *orderColumnsRowHeap) Pop() any { 385 | old := *h 386 | n := len(old) 387 | x := old[n-1] 388 | *h = old[0 : n-1] 389 | return x 390 | } 391 | 392 | func (h orderColumnsRowHeap) Less(i, j int) bool { 393 | l := h[i] 394 | r := h[j] 395 | return l.Less(r) 396 | } 397 | -------------------------------------------------------------------------------- /parser_fuzz_test.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import "testing" 4 | 5 | type fuzzQueryStorage struct{} 6 | 7 | func (t *fuzzQueryStorage) Get(key []byte) ([]byte, error) { 8 | return nil, nil 9 | } 10 | 11 | func (t *fuzzQueryStorage) Put(key []byte, value []byte) error { 12 | return nil 13 | } 14 | 15 | func (t *fuzzQueryStorage) BatchPut(kvs []KVPair) error { 16 | return nil 17 | } 18 | 19 | func (t *fuzzQueryStorage) Delete(key []byte) error { 20 | return nil 21 | } 22 | 23 | func (t *fuzzQueryStorage) BatchDelete(key [][]byte) error { 24 | return nil 25 | } 26 | 27 | func (t *fuzzQueryStorage) Cursor() (Cursor, error) { 28 | return &fuzzQueryCursor{}, nil 29 | } 30 | 31 | type fuzzQueryCursor struct{} 32 | 33 | func (c *fuzzQueryCursor) Seek(key []byte) error { 34 | return nil 35 | } 36 | 37 | func (c *fuzzQueryCursor) Next() ([]byte, []byte, error) { 38 | return nil, nil, nil 39 | } 40 | 41 | func FuzzSQLParser(f *testing.F) { 42 | tests := []string{ 43 | "select key, int(value) where int(key) + 1 >= 1 & (int(value) - 1 > 10 | int(value) <= 20)", 44 | "select key, int(value) where key ^= 'key' order by key limit 20, 10", 45 | "select * where key in ('k1', 'k2', 'k3')", 46 | "select * where (key between 'k1' and 'k3') & int(value) between 1 and 10", 47 | "select key, json(value)['test'] where key ^= 'k' & json(value)['test'][1] = 'v1'", 48 | "put ('k1', 'v1'), ('k1', 'V_' + upper(key)), ('k3', lower('V3'))", 49 | "remove 'k1', 'k2'", 50 | "delete where key ^='prefix' and value = 'v2'", 51 | "delete where key in ('k1', 'k2')", 52 | "delete where (key = 'k1' | key = 'k2') and key ^= 'k'", 53 | } 54 | 55 | for _, t := range tests { 56 | f.Add(t) 57 | } 58 | txn := &fuzzQueryStorage{} 59 | f.Fuzz(func(t *testing.T, query string) { 60 | o := NewOptimizer(query) 61 | o.buildPlan(txn) 62 | }) 63 | } 64 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func parseQuery(query string) (*SelectStmt, error) { 9 | p := NewParser(query) 10 | expr, err := p.Parse() 11 | if expr != nil { 12 | return expr.(*SelectStmt), err 13 | } 14 | return nil, err 15 | } 16 | 17 | func parsePutQuery(query string) (*PutStmt, error) { 18 | p := NewParser(query) 19 | expr, err := p.Parse() 20 | if expr != nil { 21 | return expr.(*PutStmt), err 22 | } 23 | return nil, err 24 | } 25 | 26 | func parseRemoveQuery(query string) (*RemoveStmt, error) { 27 | p := NewParser(query) 28 | expr, err := p.Parse() 29 | if expr != nil { 30 | return expr.(*RemoveStmt), err 31 | } 32 | return nil, err 33 | } 34 | 35 | func parseDeleteQuery(query string) (*DeleteStmt, error) { 36 | p := NewParser(query) 37 | expr, err := p.Parse() 38 | if expr != nil { 39 | return expr.(*DeleteStmt), err 40 | } 41 | return nil, err 42 | } 43 | 44 | func TestParser1(t *testing.T) { 45 | query := "where key = 'test' & value = 'value'" 46 | expr, err := parseQuery(query) 47 | if err != nil { 48 | t.Fatal(err) 49 | } 50 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 51 | } 52 | 53 | func TestParser2(t *testing.T) { 54 | query := "where key ^= 'test'" 55 | expr, err := parseQuery(query) 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 60 | } 61 | 62 | func TestParser3(t *testing.T) { 63 | query := "where key ^= 'test' value = 'xxx'" 64 | expr, err := parseQuery(query) 65 | if err == nil { 66 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 67 | t.Fatal("Should get syntax error") 68 | } 69 | fmt.Printf("%+v\n", err) 70 | } 71 | 72 | func TestParser4(t *testing.T) { 73 | query := "where (key ^= 'test' | key ^= 'bar') & value = 'xxx'" 74 | expr, err := parseQuery(query) 75 | if err != nil { 76 | t.Fatal(err) 77 | } 78 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 79 | } 80 | 81 | func TestParser5(t *testing.T) { 82 | query := "where (key ^= 'test' | (key ^= 'bar' & key ^= 'foo')) & value = 'xxx'" 83 | expr, err := parseQuery(query) 84 | if err != nil { 85 | t.Fatal(err) 86 | } 87 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 88 | } 89 | 90 | func TestParser6(t *testing.T) { 91 | query := "where !(key ^= 'test' | !(key ^= 'bar' & key ^= 'foo')) & value = 'xxx'" 92 | expr, err := parseQuery(query) 93 | if err != nil { 94 | t.Fatal(err) 95 | } 96 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 97 | } 98 | 99 | func TestParser7(t *testing.T) { 100 | funcMap["func_name"] = &Function{"func_name", 2, false, TBOOL, nil, nil} 101 | query := "where func_name(key, 'test')" 102 | expr, err := parseQuery(query) 103 | if err != nil { 104 | t.Fatal(err) 105 | } 106 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 107 | } 108 | 109 | func TestParser8(t *testing.T) { 110 | funcMap["func_name"] = &Function{"func_name", 2, false, TSTR, nil, nil} 111 | query := "where func_name(key, 'test') ^= 'name'" 112 | expr, err := parseQuery(query) 113 | if err != nil { 114 | t.Fatal(err) 115 | } 116 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 117 | } 118 | 119 | func TestParser9(t *testing.T) { 120 | funcMap["func_name"] = &Function{"func_name", 2, false, TSTR, nil, nil} 121 | funcMap["func_name2"] = &Function{"func_name2", 1, false, TBOOL, nil, nil} 122 | query := "where (func_name(key, 'test') ^= 'name') & (func_name2(value) | value ^= 't')" 123 | expr, err := parseQuery(query) 124 | if err != nil { 125 | fmt.Println(err) 126 | t.Fatal(err) 127 | } 128 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 129 | } 130 | 131 | func TestParser10(t *testing.T) { 132 | funcMap["func1"] = &Function{"func1", 2, false, TBOOL, nil, nil} 133 | query := "where func1(func2(key), '')" 134 | expr, err := parseQuery(query) 135 | if err != nil { 136 | t.Fatal(err) 137 | } 138 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 139 | } 140 | 141 | func TestParser11(t *testing.T) { 142 | funcMap["func1"] = &Function{"func1", 2, false, TBOOL, nil, nil} 143 | query := "where func1(func2(key), '', func3(func4('1', '2'), '5'))" 144 | expr, err := parseQuery(query) 145 | if err != nil { 146 | t.Fatal(err) 147 | } 148 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 149 | } 150 | 151 | func TestParser12(t *testing.T) { 152 | funcMap["func1"] = &Function{"func1", 2, false, TBOOL, nil, nil} 153 | query := "where func1(func2(key), func3(func4('1', '2'), '5'), func5())" 154 | expr, err := parseQuery(query) 155 | if err != nil { 156 | t.Fatal(err) 157 | } 158 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 159 | } 160 | 161 | func TestParser13(t *testing.T) { 162 | funcMap["func1"] = &Function{"func1", 2, false, TBOOL, nil, nil} 163 | query := "where func1(key, func2(), (key = 'test'))" 164 | expr, err := parseQuery(query) 165 | if err != nil { 166 | t.Fatal(err) 167 | } 168 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 169 | } 170 | 171 | func TestParser14(t *testing.T) { 172 | query := "select * where key = '1'" 173 | expr, err := parseQuery(query) 174 | if err != nil { 175 | t.Fatal(err) 176 | } 177 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 178 | fmt.Printf("%+v\n", *expr) 179 | } 180 | 181 | func TestParser15(t *testing.T) { 182 | query := "select key, int(value) where str(int(key) + 1) = '1'" 183 | expr, err := parseQuery(query) 184 | if err != nil { 185 | t.Fatal(err) 186 | } 187 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 188 | fmt.Printf("%+v\n", *expr) 189 | } 190 | 191 | func TestParser16(t *testing.T) { 192 | query := "select key, int(value) where int(key) + 1 >= 1 & (int(value) - 1 > 10 | int(value) <= 20)" 193 | expr, err := parseQuery(query) 194 | if err != nil { 195 | t.Fatal(err) 196 | } 197 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 198 | fmt.Printf("%+v\n", *expr) 199 | } 200 | 201 | func TestParser17(t *testing.T) { 202 | query := "select key, int(value) where key ^= 'key' limit 10" 203 | expr, err := parseQuery(query) 204 | if err != nil { 205 | t.Fatal(err) 206 | } 207 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 208 | fmt.Printf("%+v\n", *expr.Limit) 209 | fmt.Printf("%+v\n", *expr) 210 | } 211 | 212 | func TestParser18(t *testing.T) { 213 | query := "select key, int(value) where key ^= 'key' limit 20, 10" 214 | expr, err := parseQuery(query) 215 | if err != nil { 216 | t.Fatal(err) 217 | } 218 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 219 | fmt.Printf("%+v\n", *expr.Limit) 220 | fmt.Printf("%+v\n", *expr) 221 | } 222 | 223 | func TestParser19(t *testing.T) { 224 | query := "select key, int(value) where key ^= 'key' order by key limit 20, 10" 225 | expr, err := parseQuery(query) 226 | if err != nil { 227 | t.Fatal(err) 228 | } 229 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 230 | fmt.Printf("%+v\n", *expr.Order) 231 | fmt.Printf("%+v\n", *expr.Limit) 232 | fmt.Printf("%+v\n", *expr) 233 | } 234 | 235 | func TestParser20(t *testing.T) { 236 | query := "select key, int(value), value where key ^= 'key' order by key, value desc limit 20, 10" 237 | expr, err := parseQuery(query) 238 | if err != nil { 239 | t.Fatal(err) 240 | } 241 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 242 | fmt.Printf("%+v\n", *expr.Order) 243 | fmt.Printf("%+v\n", *expr.Limit) 244 | fmt.Printf("%+v\n", *expr) 245 | } 246 | 247 | func TestParser21(t *testing.T) { 248 | query := "select * where key in ('k1', 'k2', 'k3')" 249 | expr, err := parseQuery(query) 250 | if err != nil { 251 | t.Fatal(err) 252 | } 253 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 254 | } 255 | 256 | func TestParser22(t *testing.T) { 257 | query := "select * where key between 'k1' and 'k3'" 258 | expr, err := parseQuery(query) 259 | if err != nil { 260 | t.Fatal(err) 261 | } 262 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 263 | } 264 | 265 | func TestParser23(t *testing.T) { 266 | query := "select * where key between 'k1' and 'k3' & int(value) between 1 and 10" 267 | expr, err := parseQuery(query) 268 | if err != nil { 269 | t.Fatal(err) 270 | } 271 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 272 | 273 | } 274 | 275 | func TestParser24(t *testing.T) { 276 | query := "select * where (key between 'k1' and 'k3') & int(value) between 1 and 10" 277 | expr, err := parseQuery(query) 278 | if err != nil { 279 | t.Fatal(err) 280 | } 281 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 282 | } 283 | 284 | func TestParser25(t *testing.T) { 285 | query := "select key, json(value)['test'] where key ^= 'k' & json(value)['test'] = 'v1'" 286 | expr, err := parseQuery(query) 287 | if err != nil { 288 | t.Fatal(err) 289 | } 290 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 291 | } 292 | 293 | func TestParser26(t *testing.T) { 294 | query := "select key, json(value)['test'] where key ^= 'k' & json(value)['test'][1] = 'v1'" 295 | expr, err := parseQuery(query) 296 | if err != nil { 297 | t.Fatal(err) 298 | } 299 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 300 | } 301 | 302 | func TestParser27(t *testing.T) { 303 | query := "select key, json(value)[1] where key ^= 'k' & json(value)['test'][1] = 'v1'" 304 | expr, err := parseQuery(query) 305 | if err == nil { 306 | t.Fatal("Require error") 307 | } 308 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 309 | } 310 | 311 | func TestParser28(t *testing.T) { 312 | query := "select key, split(key, '_')[1], split(key, '_')[2] where key ^= 'k' & json(value)['test'][1] = 'v1'" 313 | expr, err := parseQuery(query) 314 | if err != nil { 315 | t.Fatal(err) 316 | } 317 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 318 | } 319 | 320 | func TestParser29(t *testing.T) { 321 | query := "select key, int(split(key, '_')[1]) as f2, split(key, '_')[2] as f3 where key ^= 'k' & f2 > 10" 322 | expr, err := parseQuery(query) 323 | if err != nil { 324 | t.Fatal(err) 325 | } 326 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 327 | } 328 | 329 | func TestParser30(t *testing.T) { 330 | query := "put ('k1', 'v1')" 331 | expr, err := parsePutQuery(query) 332 | if err != nil { 333 | t.Fatal(err) 334 | } 335 | if len(expr.KVPairs) != 1 { 336 | t.Fatal("Require 1 KV pair") 337 | } 338 | for i, kvp := range expr.KVPairs { 339 | fmt.Printf("[%d] %v: %v\n", i, kvp.Key.String(), kvp.Value.String()) 340 | } 341 | } 342 | 343 | func TestParser31(t *testing.T) { 344 | query := "put ('k1', 'v1'), ('k2', 'v2')" 345 | expr, err := parsePutQuery(query) 346 | if err != nil { 347 | t.Fatal(err) 348 | } 349 | if len(expr.KVPairs) != 2 { 350 | t.Fatal("Require 2 KV pair") 351 | } 352 | for i, kvp := range expr.KVPairs { 353 | fmt.Printf("[%d] %v: %v\n", i, kvp.Key.String(), kvp.Value.String()) 354 | } 355 | } 356 | 357 | func TestParser32(t *testing.T) { 358 | query := "put ('k1', 'v1'), ('k2', 'v2'), ('k3', upper('value3'))" 359 | expr, err := parsePutQuery(query) 360 | if err != nil { 361 | t.Fatal(err) 362 | } 363 | if len(expr.KVPairs) != 3 { 364 | t.Fatal("Require 3 KV pair") 365 | } 366 | for i, kvp := range expr.KVPairs { 367 | fmt.Printf("[%d] %v: %v\n", i, kvp.Key.String(), kvp.Value.String()) 368 | } 369 | } 370 | 371 | func TestParser33(t *testing.T) { 372 | query := "put ('k1', value), ('k2', 'v2'), ('k3', upper('value3'))" 373 | _, err := parsePutQuery(query) 374 | if err == nil { 375 | t.Fatal("Require error") 376 | } 377 | fmt.Println(err) 378 | } 379 | 380 | func TestParser34(t *testing.T) { 381 | query := "put ('k1', key + 'test'), ('k2', 'v2'), ('k3', upper('value3'))" 382 | expr, err := parsePutQuery(query) 383 | if err != nil { 384 | t.Fatal(err) 385 | } 386 | for i, kvp := range expr.KVPairs { 387 | fmt.Printf("[%d] %v: %v\n", i, kvp.Key.String(), kvp.Value.String()) 388 | } 389 | } 390 | 391 | func TestParser35(t *testing.T) { 392 | query := "put ('k1', is_number(key + 'test')), ('k2', 'v2'), ('k3', upper('value3'))" 393 | expr, err := parsePutQuery(query) 394 | if err == nil { 395 | t.Fatal("Require error") 396 | } 397 | fmt.Println(err) 398 | for i, kvp := range expr.KVPairs { 399 | fmt.Printf("[%d] %v: %v\n", i, kvp.Key.String(), kvp.Value.String()) 400 | } 401 | } 402 | 403 | func TestParser36(t *testing.T) { 404 | query := "remove 'k1', 'k2'" 405 | expr, err := parseRemoveQuery(query) 406 | if err != nil { 407 | t.Fatal(err) 408 | } 409 | for i, k := range expr.Keys { 410 | fmt.Printf("[%d] %v\n", i, k.String()) 411 | } 412 | } 413 | 414 | func TestParser37(t *testing.T) { 415 | query := "remove 'k1', key, value" 416 | expr, err := parseRemoveQuery(query) 417 | if err == nil { 418 | t.Fatal("Require error") 419 | } 420 | fmt.Println(err) 421 | for i, k := range expr.Keys { 422 | fmt.Printf("[%d] %v\n", i, k.String()) 423 | } 424 | } 425 | 426 | func TestParser38(t *testing.T) { 427 | query := "select * where key = 'k1' and key = 'k2'" 428 | expr, err := parseQuery(query) 429 | if err != nil { 430 | t.Fatal(err) 431 | } 432 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 433 | } 434 | 435 | func TestParser39(t *testing.T) { 436 | query := "select * where key = 'k1' or key between 'k3' and 'k4'" 437 | expr, err := parseQuery(query) 438 | if err != nil { 439 | t.Fatal(err) 440 | } 441 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 442 | } 443 | 444 | func TestParser40(t *testing.T) { 445 | query := "delete where key = 'k1' or key between 'k3' and 'k4'" 446 | expr, err := parseDeleteQuery(query) 447 | if err != nil { 448 | t.Fatal(err) 449 | } 450 | fmt.Printf("%+v\n", expr.Where.Expr.String()) 451 | } 452 | -------------------------------------------------------------------------------- /plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | var ( 9 | PlanBatchSize = 32 10 | EnableFieldCache = true 11 | ) 12 | 13 | func init() { 14 | if dfc := os.Getenv("DISABLE_FCACHE"); dfc == "1" { 15 | EnableFieldCache = false 16 | } 17 | } 18 | 19 | type ExecuteCtx struct { 20 | Hit int 21 | EnableCache bool 22 | FieldCaches map[string]any 23 | FieldChunkKeyCaches map[string][]any 24 | FieldChunkCaches map[string][]any 25 | } 26 | 27 | func NewExecuteCtx() *ExecuteCtx { 28 | return &ExecuteCtx{ 29 | Hit: 0, 30 | EnableCache: EnableFieldCache, 31 | FieldCaches: make(map[string]any), 32 | FieldChunkKeyCaches: make(map[string][]any), 33 | FieldChunkCaches: make(map[string][]any), 34 | } 35 | } 36 | 37 | func (c *ExecuteCtx) GetFieldResult(name string) (any, bool) { 38 | if !c.EnableCache { 39 | return nil, false 40 | } 41 | if val, have := c.FieldCaches[name]; have { 42 | return val, true 43 | } 44 | return nil, false 45 | } 46 | 47 | func (c *ExecuteCtx) SetFieldResult(name string, value any) { 48 | if !c.EnableCache { 49 | return 50 | } 51 | c.FieldCaches[name] = value 52 | } 53 | 54 | func (c *ExecuteCtx) GetChunkFieldResult(name string, key []byte) ([]any, bool) { 55 | if !c.EnableCache { 56 | return nil, false 57 | } 58 | ckey := fmt.Sprintf("%s-%s", name, string(key)) 59 | if chunk, have := c.FieldChunkKeyCaches[ckey]; have { 60 | return chunk, true 61 | } 62 | return nil, false 63 | } 64 | 65 | func (c *ExecuteCtx) AppendChunkFieldResult(name string, chunk []any) { 66 | if !c.EnableCache { 67 | return 68 | } 69 | cdata, have := c.FieldChunkCaches[name] 70 | if have { 71 | cdata = append(cdata, chunk...) 72 | c.FieldChunkCaches[name] = cdata 73 | } else { 74 | cchunk := make([]any, len(chunk)) 75 | copy(cchunk, chunk) 76 | c.FieldChunkCaches[name] = cchunk 77 | } 78 | } 79 | 80 | func (c *ExecuteCtx) SetChunkFieldResult(name string, key []byte, chunk []any) { 81 | if !c.EnableCache { 82 | return 83 | } 84 | ckey := fmt.Sprintf("%s-%s", name, string(key)) 85 | if _, have := c.FieldChunkKeyCaches[ckey]; have { 86 | return 87 | } 88 | c.FieldChunkKeyCaches[ckey] = chunk 89 | c.AppendChunkFieldResult(name, chunk) 90 | } 91 | 92 | func (c *ExecuteCtx) GetChunkFieldFinalResult(name string) ([]any, bool) { 93 | if !c.EnableCache { 94 | return nil, false 95 | } 96 | val, have := c.FieldChunkCaches[name] 97 | return val, have 98 | } 99 | 100 | func (c *ExecuteCtx) UpdateHit() { 101 | c.Hit++ 102 | } 103 | 104 | func (c *ExecuteCtx) Clear() { 105 | if !c.EnableCache { 106 | return 107 | } 108 | clear(c.FieldCaches) 109 | clear(c.FieldChunkCaches) 110 | clear(c.FieldChunkKeyCaches) 111 | } 112 | 113 | func (c *ExecuteCtx) AdjustChunkCache(chooseIdxes []int) { 114 | if !c.EnableCache { 115 | return 116 | } 117 | cidxes := make(map[int]struct{}) 118 | for _, idx := range chooseIdxes { 119 | cidxes[idx] = struct{}{} 120 | } 121 | for k, v := range c.FieldChunkCaches { 122 | nv := make([]any, 0, len(chooseIdxes)) 123 | for i, item := range v { 124 | if _, have := cidxes[i]; have { 125 | nv = append(nv, item) 126 | } 127 | } 128 | c.FieldChunkCaches[k] = nv 129 | } 130 | } 131 | 132 | type FinalPlan interface { 133 | String() string 134 | Explain() []string 135 | Init() error 136 | Next(ctx *ExecuteCtx) ([]Column, error) 137 | Batch(ctx *ExecuteCtx) ([][]Column, error) 138 | FieldNameList() []string 139 | FieldTypeList() []Type 140 | } 141 | 142 | type Plan interface { 143 | String() string 144 | Explain() []string 145 | Init() error 146 | Next(ctx *ExecuteCtx) (key []byte, value []byte, err error) 147 | Batch(ctx *ExecuteCtx) (rows []KVPair, err error) 148 | } 149 | 150 | var ( 151 | _ Plan = (*FullScanPlan)(nil) 152 | _ Plan = (*EmptyResultPlan)(nil) 153 | _ Plan = (*RangeScanPlan)(nil) 154 | _ Plan = (*PrefixScanPlan)(nil) 155 | _ Plan = (*MultiGetPlan)(nil) 156 | _ Plan = (*LimitPlan)(nil) 157 | 158 | _ FinalPlan = (*ProjectionPlan)(nil) 159 | _ FinalPlan = (*AggregatePlan)(nil) 160 | _ FinalPlan = (*FinalOrderPlan)(nil) 161 | _ FinalPlan = (*FinalLimitPlan)(nil) 162 | _ FinalPlan = (*PutPlan)(nil) 163 | ) 164 | 165 | type Column any 166 | 167 | type EmptyResultPlan struct { 168 | Storage Storage 169 | } 170 | 171 | func NewEmptyResultPlan(s Storage, f *FilterExec) Plan { 172 | return &EmptyResultPlan{ 173 | Storage: s, 174 | } 175 | } 176 | 177 | func (p *EmptyResultPlan) Init() error { 178 | return nil 179 | } 180 | 181 | func (p *EmptyResultPlan) Next(ctx *ExecuteCtx) ([]byte, []byte, error) { 182 | return nil, nil, nil 183 | } 184 | 185 | func (p *EmptyResultPlan) String() string { 186 | return "EmptyResultPlan" 187 | } 188 | 189 | func (p *EmptyResultPlan) Explain() []string { 190 | return []string{p.String()} 191 | } 192 | 193 | func (p *EmptyResultPlan) Batch(ctx *ExecuteCtx) ([]KVPair, error) { 194 | return nil, nil 195 | } 196 | -------------------------------------------------------------------------------- /projection_plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | type ProjectionPlan struct { 9 | Storage Storage 10 | ChildPlan Plan 11 | AllFields bool 12 | FieldNames []string 13 | FieldTypes []Type 14 | Fields []Expression 15 | } 16 | 17 | func (p *ProjectionPlan) Init() error { 18 | return p.ChildPlan.Init() 19 | } 20 | 21 | func (p *ProjectionPlan) FieldNameList() []string { 22 | if p.AllFields { 23 | return []string{"KEY", "VALUE"} 24 | } 25 | return p.FieldNames 26 | } 27 | 28 | func (p *ProjectionPlan) FieldTypeList() []Type { 29 | if p.AllFields { 30 | return []Type{TSTR, TSTR} 31 | } 32 | return p.FieldTypes 33 | } 34 | 35 | func (p *ProjectionPlan) Next(ctx *ExecuteCtx) ([]Column, error) { 36 | ctx.Clear() 37 | k, v, err := p.ChildPlan.Next(ctx) 38 | if err != nil { 39 | return nil, err 40 | } 41 | if k == nil && v == nil && err == nil { 42 | return nil, nil 43 | } 44 | if p.AllFields { 45 | return []Column{k, v}, nil 46 | } 47 | return p.processProjection(NewKVP(k, v), ctx) 48 | } 49 | 50 | func (p *ProjectionPlan) Batch(ctx *ExecuteCtx) ([][]Column, error) { 51 | ctx.Clear() 52 | kvps, err := p.ChildPlan.Batch(ctx) 53 | if err != nil { 54 | return nil, err 55 | } 56 | if len(kvps) == 0 { 57 | return nil, nil 58 | } 59 | if p.AllFields { 60 | ret := make([][]Column, 0, len(kvps)) 61 | for _, kvp := range kvps { 62 | ret = append(ret, []Column{kvp.Key, kvp.Value}) 63 | } 64 | return ret, nil 65 | } 66 | return p.processProjectionBatch(kvps, ctx) 67 | } 68 | 69 | func (p *ProjectionPlan) processProjectionBatch(chunk []KVPair, ctx *ExecuteCtx) ([][]Column, error) { 70 | var ( 71 | nFields = len(p.Fields) 72 | ret = make([][]Column, len(chunk)) 73 | cols = make([][]any, nFields) 74 | err error 75 | have bool 76 | ) 77 | for i := 0; i < nFields; i++ { 78 | have = false 79 | if ctx != nil { 80 | fname := p.FieldNames[i] 81 | cols[i], have = ctx.GetChunkFieldFinalResult(fname) 82 | } 83 | if !have { 84 | cols[i], err = p.Fields[i].ExecuteBatch(chunk, ctx) 85 | } else { 86 | ctx.UpdateHit() 87 | } 88 | if err != nil { 89 | return nil, err 90 | } 91 | } 92 | for i := 0; i < len(chunk); i++ { 93 | row := make([]Column, nFields) 94 | for j := 0; j < nFields; j++ { 95 | row[j] = cols[j][i] 96 | } 97 | ret[i] = row 98 | } 99 | return ret, nil 100 | } 101 | 102 | func (p *ProjectionPlan) processProjection(kvp KVPair, ctx *ExecuteCtx) ([]Column, error) { 103 | nFields := len(p.Fields) 104 | ret := make([]Column, nFields) 105 | var ( 106 | result any 107 | err error 108 | ) 109 | for i := 0; i < nFields; i++ { 110 | have := false 111 | if ctx != nil { 112 | fname := p.FieldNames[i] 113 | result, have = ctx.GetFieldResult(fname) 114 | } 115 | if !have { 116 | result, err = p.Fields[i].Execute(kvp, ctx) 117 | } else { 118 | ctx.UpdateHit() 119 | } 120 | if err != nil { 121 | return nil, err 122 | } 123 | switch value := result.(type) { 124 | case bool, []byte, string, 125 | int, int8, int16, int32, int64, 126 | uint, uint8, uint16, uint32, uint64, 127 | float32, float64, 128 | JSON, map[string]any, []any: 129 | ret[i] = value 130 | default: 131 | if value == nil { 132 | ret[i] = nil 133 | break 134 | } 135 | return nil, NewExecuteError(p.Fields[i].GetPos(), "Expression result type not support") 136 | } 137 | } 138 | return ret, nil 139 | } 140 | 141 | func (p *ProjectionPlan) String() string { 142 | fields := []string{} 143 | if p.AllFields { 144 | fields = append(fields, "*") 145 | } else { 146 | for _, f := range p.Fields { 147 | fields = append(fields, f.String()) 148 | } 149 | } 150 | return fmt.Sprintf("ProjectionPlan{Fields = <%s>}", strings.Join(fields, ", ")) 151 | } 152 | 153 | func (p *ProjectionPlan) Explain() []string { 154 | ret := []string{p.String()} 155 | for _, plan := range p.ChildPlan.Explain() { 156 | ret = append(ret, plan) 157 | } 158 | return ret 159 | } 160 | -------------------------------------------------------------------------------- /put_plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | type PutPlan struct { 9 | Storage Storage 10 | KVPairs []*PutKVPair 11 | executed bool 12 | } 13 | 14 | func (p *PutPlan) Init() error { 15 | p.executed = false 16 | return nil 17 | } 18 | 19 | func (p *PutPlan) Explain() []string { 20 | return []string{p.String()} 21 | } 22 | 23 | func (p *PutPlan) String() string { 24 | kvps := make([]string, len(p.KVPairs)) 25 | for i, kvp := range p.KVPairs { 26 | kvps[i] = kvp.String() 27 | } 28 | return fmt.Sprintf("PutPlan{KVPairs = [%s]}", strings.Join(kvps, ", ")) 29 | } 30 | 31 | func (p *PutPlan) Next(ctx *ExecuteCtx) ([]Column, error) { 32 | if !p.executed { 33 | n, err := p.execute(ctx) 34 | p.executed = true 35 | return []Column{n}, err 36 | } 37 | return nil, nil 38 | } 39 | 40 | func (p *PutPlan) Batch(ctx *ExecuteCtx) ([][]Column, error) { 41 | if !p.executed { 42 | n, err := p.execute(ctx) 43 | p.executed = true 44 | row := []Column{n} 45 | return [][]Column{row}, err 46 | } 47 | return nil, nil 48 | } 49 | 50 | func (p *PutPlan) FieldNameList() []string { 51 | return []string{"Rows"} 52 | } 53 | 54 | func (p *PutPlan) FieldTypeList() []Type { 55 | return []Type{TNUMBER} 56 | } 57 | 58 | func (p *PutPlan) processKVPair(ctx *ExecuteCtx, kvp *PutKVPair) ([]byte, []byte, error) { 59 | ekvp := NewKVPStr("", "") 60 | rkey, err := kvp.Key.Execute(ekvp, ctx) 61 | if err != nil { 62 | return nil, nil, err 63 | } 64 | key := []byte(toString(rkey)) 65 | ekvp.Key = key 66 | rvalue, err := kvp.Value.Execute(ekvp, ctx) 67 | if err != nil { 68 | return nil, nil, err 69 | } 70 | value := []byte(toString(rvalue)) 71 | return key, value, nil 72 | } 73 | 74 | func (p *PutPlan) execute(ctx *ExecuteCtx) (int, error) { 75 | nkvps := len(p.KVPairs) 76 | kvps := make([]KVPair, nkvps) 77 | for i, kvp := range p.KVPairs { 78 | key, value, err := p.processKVPair(ctx, kvp) 79 | if err != nil { 80 | return 0, err 81 | } 82 | kvps[i] = NewKVP(key, value) 83 | } 84 | 85 | if nkvps == 0 { 86 | return 0, nil 87 | } else if nkvps == 1 { 88 | err := p.Storage.Put(kvps[0].Key, kvps[0].Value) 89 | if err != nil { 90 | return 0, err 91 | } 92 | return 1, nil 93 | } else { 94 | err := p.Storage.BatchPut(kvps) 95 | if err != nil { 96 | return 0, err 97 | } 98 | return nkvps, nil 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /remove_plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | type RemovePlan struct { 9 | Storage Storage 10 | Keys []Expression 11 | executed bool 12 | } 13 | 14 | func (p *RemovePlan) Init() error { 15 | p.executed = false 16 | return nil 17 | } 18 | 19 | func (p *RemovePlan) Explain() []string { 20 | return []string{p.String()} 21 | } 22 | 23 | func (p *RemovePlan) String() string { 24 | keys := make([]string, len(p.Keys)) 25 | for i, k := range p.Keys { 26 | keys[i] = k.String() 27 | } 28 | return fmt.Sprintf("RemovePlan{Keys = [%s]}", strings.Join(keys, ", ")) 29 | } 30 | 31 | func (p *RemovePlan) Next(ctx *ExecuteCtx) ([]Column, error) { 32 | if !p.executed { 33 | n, err := p.execute(ctx) 34 | p.executed = true 35 | return []Column{n}, err 36 | } 37 | return nil, nil 38 | } 39 | 40 | func (p *RemovePlan) Batch(ctx *ExecuteCtx) ([][]Column, error) { 41 | if !p.executed { 42 | n, err := p.execute(ctx) 43 | p.executed = true 44 | row := []Column{n} 45 | return [][]Column{row}, err 46 | } 47 | return nil, nil 48 | } 49 | 50 | func (p *RemovePlan) FieldNameList() []string { 51 | return []string{"Rows"} 52 | } 53 | 54 | func (p *RemovePlan) FieldTypeList() []Type { 55 | return []Type{TNUMBER} 56 | } 57 | 58 | func (p *RemovePlan) processKey(ekvp KVPair, ctx *ExecuteCtx, kexpr Expression) ([]byte, error) { 59 | rkey, err := kexpr.Execute(ekvp, ctx) 60 | if err != nil { 61 | return nil, err 62 | } 63 | key := []byte(toString(rkey)) 64 | return key, nil 65 | } 66 | 67 | func (p *RemovePlan) execute(ctx *ExecuteCtx) (int, error) { 68 | nks := len(p.Keys) 69 | keys := make([][]byte, nks) 70 | ekvp := NewKVPStr("", "") 71 | for i, kexpr := range p.Keys { 72 | key, err := p.processKey(ekvp, ctx, kexpr) 73 | if err != nil { 74 | return 0, err 75 | } 76 | keys[i] = key 77 | } 78 | 79 | if nks == 0 { 80 | return 0, nil 81 | } else if nks == 1 { 82 | err := p.Storage.Delete(keys[0]) 83 | if err != nil { 84 | return 0, err 85 | } 86 | return 1, nil 87 | } else { 88 | err := p.Storage.BatchDelete(keys) 89 | if err != nil { 90 | return 0, err 91 | } 92 | return nks, nil 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /scalar_func.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "math" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | func funcToLower(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 12 | rarg, err := args[0].Execute(kv, ctx) 13 | if err != nil { 14 | return nil, err 15 | } 16 | arg := toString(rarg) 17 | return strings.ToLower(arg), nil 18 | } 19 | 20 | func funcToUpper(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 21 | rarg, err := args[0].Execute(kv, ctx) 22 | if err != nil { 23 | return nil, err 24 | } 25 | arg := toString(rarg) 26 | return strings.ToUpper(arg), nil 27 | } 28 | 29 | func funcToInt(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 30 | rarg, err := args[0].Execute(kv, ctx) 31 | if err != nil { 32 | return nil, err 33 | } 34 | ret := toInt(rarg, 0) 35 | return ret, nil 36 | } 37 | 38 | func funcToFloat(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 39 | rarg, err := args[0].Execute(kv, ctx) 40 | if err != nil { 41 | return nil, err 42 | } 43 | ret := toFloat(rarg, 0.0) 44 | return ret, nil 45 | } 46 | 47 | func funcToString(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 48 | rarg, err := args[0].Execute(kv, ctx) 49 | if err != nil { 50 | return nil, err 51 | } 52 | ret := toString(rarg) 53 | return ret, nil 54 | } 55 | 56 | func funcIsInt(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 57 | rarg, err := args[0].Execute(kv, ctx) 58 | if err != nil { 59 | return nil, err 60 | } 61 | switch val := rarg.(type) { 62 | case string: 63 | if _, err := strconv.ParseInt(val, 10, 64); err == nil { 64 | return true, nil 65 | } 66 | case []byte: 67 | if _, err := strconv.ParseInt(string(val), 10, 64); err == nil { 68 | return true, nil 69 | } 70 | case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: 71 | return true, nil 72 | } 73 | return false, nil 74 | } 75 | 76 | func funcIsFloat(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 77 | rarg, err := args[0].Execute(kv, ctx) 78 | if err != nil { 79 | return nil, err 80 | } 81 | switch val := rarg.(type) { 82 | case string: 83 | if _, err := strconv.ParseFloat(val, 64); err == nil { 84 | return true, nil 85 | } 86 | case []byte: 87 | if _, err := strconv.ParseFloat(string(val), 64); err == nil { 88 | return true, nil 89 | } 90 | case float32, float64: 91 | return true, nil 92 | } 93 | return false, nil 94 | } 95 | 96 | func funcSubStr(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 97 | rarg, err := args[0].Execute(kv, ctx) 98 | if err != nil { 99 | return nil, err 100 | } 101 | val := toString(rarg) 102 | if args[1].ReturnType() != TNUMBER { 103 | return nil, NewExecuteError(args[1].GetPos(), "substr function second parameter require number type") 104 | } 105 | if args[2].ReturnType() != TNUMBER { 106 | return nil, NewExecuteError(args[2].GetPos(), "substr function third parameter require number type") 107 | } 108 | rarg, err = args[1].Execute(kv, ctx) 109 | if err != nil { 110 | return nil, err 111 | } 112 | start := int(toInt(rarg, 0)) 113 | rarg, err = args[2].Execute(kv, ctx) 114 | if err != nil { 115 | return nil, err 116 | } 117 | length := int(toInt(rarg, 0)) 118 | vlen := len(val) 119 | if start > vlen-1 { 120 | return "", nil 121 | } 122 | length = min(length, vlen-start) 123 | return val[start:length], nil 124 | } 125 | 126 | func min(a, b int) int { 127 | if a < b { 128 | return a 129 | } 130 | return b 131 | } 132 | 133 | type JSON map[string]any 134 | 135 | func funcJson(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 136 | rarg, err := args[0].Execute(kv, ctx) 137 | if err != nil { 138 | return nil, err 139 | } 140 | jsonData, ok := convertToByteArray(rarg) 141 | if !ok { 142 | return nil, NewExecuteError(args[0].GetPos(), "Cannot convert to byte array") 143 | } 144 | ret := make(JSON) 145 | json.Unmarshal(jsonData, &ret) 146 | return ret, nil 147 | } 148 | 149 | func funcSplit(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 150 | rarg, err := args[0].Execute(kv, ctx) 151 | if err != nil { 152 | return nil, err 153 | } 154 | if args[1].ReturnType() != TSTR { 155 | return nil, NewExecuteError(args[1].GetPos(), "split function second parameter require string type") 156 | } 157 | rspliter, err := args[1].Execute(kv, ctx) 158 | if err != nil { 159 | return nil, err 160 | } 161 | val := toString(rarg) 162 | spliter := toString(rspliter) 163 | ret := strings.Split(val, spliter) 164 | return ret, nil 165 | } 166 | 167 | func funcJoin(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 168 | if args[0].ReturnType() != TSTR { 169 | return nil, NewExecuteError(args[0].GetPos(), "join function first parameter require string type") 170 | } 171 | rseparator, err := args[0].Execute(kv, ctx) 172 | if err != nil { 173 | return nil, err 174 | } 175 | separator := toString(rseparator) 176 | vals := make([]string, len(args)-1) 177 | for i, arg := range args[1:] { 178 | rval, err := arg.Execute(kv, ctx) 179 | if err != nil { 180 | return nil, err 181 | } 182 | vals[i] = toString(rval) 183 | } 184 | ret := strings.Join(vals, separator) 185 | return ret, nil 186 | } 187 | 188 | func funcCosineDistance(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 189 | larg, err := args[0].Execute(kv, ctx) 190 | if err != nil { 191 | return nil, err 192 | } 193 | rarg, err := args[1].Execute(kv, ctx) 194 | if err != nil { 195 | return nil, err 196 | } 197 | lvec, err := toFloatList(larg) 198 | if err != nil { 199 | return nil, err 200 | } 201 | rvec, err := toFloatList(rarg) 202 | if err != nil { 203 | return nil, err 204 | } 205 | ret, err := cosineDistance(lvec, rvec) 206 | if err != nil { 207 | return nil, err 208 | } 209 | return ret, nil 210 | } 211 | 212 | func cosineDistance(left, right []float64) (float64, error) { 213 | if len(left) != len(right) { 214 | return 0, fmt.Errorf("length must equals") 215 | } 216 | var t1 float64 217 | var t2 float64 218 | var t3 float64 219 | for i := 0; i < len(left); i++ { 220 | t1 += left[i] * right[i] 221 | t2 += left[i] * left[i] 222 | t3 += right[i] * right[i] 223 | } 224 | return 1 - t1/(math.Sqrt(t2)*math.Sqrt(t3)), nil 225 | } 226 | 227 | func funcL2Distance(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 228 | larg, err := args[0].Execute(kv, ctx) 229 | if err != nil { 230 | return nil, err 231 | } 232 | rarg, err := args[1].Execute(kv, ctx) 233 | if err != nil { 234 | return nil, err 235 | } 236 | lvec, err := toFloatList(larg) 237 | if err != nil { 238 | return nil, err 239 | } 240 | rvec, err := toFloatList(rarg) 241 | if err != nil { 242 | return nil, err 243 | } 244 | ret, err := l2Distance(lvec, rvec) 245 | if err != nil { 246 | return nil, err 247 | } 248 | return ret, nil 249 | } 250 | 251 | func l2Distance(left, right []float64) (float64, error) { 252 | if len(left) != len(right) { 253 | return 0, fmt.Errorf("length must equals") 254 | } 255 | var total float64 = 0 256 | for i := 0; i < len(left); i++ { 257 | diff := math.Abs(left[i] - right[i]) 258 | total += diff * diff 259 | } 260 | return math.Sqrt(total), nil 261 | } 262 | 263 | func funcFloatList(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 264 | ret := make([]float64, len(args)) 265 | for i := 0; i < len(args); i++ { 266 | val, err := args[i].Execute(kv, ctx) 267 | if err != nil { 268 | return nil, err 269 | } 270 | ret[i] = toFloat(val, 0.0) 271 | } 272 | return ret, nil 273 | } 274 | 275 | func funcIntList(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 276 | ret := make([]int64, len(args)) 277 | for i := 0; i < len(args); i++ { 278 | val, err := args[i].Execute(kv, ctx) 279 | if err != nil { 280 | return nil, err 281 | } 282 | ret[i] = toInt(val, 0) 283 | } 284 | return ret, nil 285 | } 286 | 287 | func funcToList(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 288 | if len(args) == 0 { 289 | return []int64{}, nil 290 | } 291 | 292 | first, err := args[0].Execute(kv, ctx) 293 | if err != nil { 294 | return nil, err 295 | } 296 | useInt := false 297 | switch fval := first.(type) { 298 | case string: 299 | if _, err := strconv.ParseInt(fval, 10, 64); err == nil { 300 | useInt = true 301 | } else if _, err := strconv.ParseFloat(fval, 64); err == nil { 302 | useInt = false 303 | } 304 | case []byte: 305 | if _, err := strconv.ParseInt(string(fval), 10, 64); err == nil { 306 | useInt = true 307 | } else if _, err := strconv.ParseFloat(string(fval), 64); err == nil { 308 | useInt = false 309 | } 310 | case int, uint, int32, uint32, int64, uint64: 311 | useInt = true 312 | case float32, float64: 313 | useInt = false 314 | } 315 | if useInt { 316 | return funcIntList(kv, args, ctx) 317 | } 318 | return funcFloatList(kv, args, ctx) 319 | } 320 | 321 | func funcLen(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 322 | rarg, err := args[0].Execute(kv, ctx) 323 | if err != nil { 324 | return nil, err 325 | } 326 | ret, err := getListLength(rarg) 327 | if err != nil { 328 | return nil, NewExecuteError(args[0].GetPos(), err.Error()) 329 | } 330 | return ret, nil 331 | } 332 | 333 | func getListLength(data any) (int, error) { 334 | switch val := data.(type) { 335 | case string: 336 | return len(val), nil 337 | case int, int32, int64, uint, uint32, uint64, float32, float64: 338 | return 0, nil 339 | case []byte: 340 | return len(val), nil 341 | case []int: 342 | return len(val), nil 343 | case []int32: 344 | return len(val), nil 345 | case []int64: 346 | return len(val), nil 347 | case []uint: 348 | return len(val), nil 349 | case []uint32: 350 | return len(val), nil 351 | case []uint64: 352 | return len(val), nil 353 | case []float32: 354 | return len(val), nil 355 | case []float64: 356 | return len(val), nil 357 | } 358 | return 0, fmt.Errorf("invalid type") 359 | } 360 | 361 | func funcStrlen(kv KVPair, args []Expression, ctx *ExecuteCtx) (any, error) { 362 | rarg, err := args[0].Execute(kv, ctx) 363 | if err != nil { 364 | return nil, err 365 | } 366 | ret := toString(rarg) 367 | return int64(len(ret)), nil 368 | } 369 | -------------------------------------------------------------------------------- /scalar_func_vec.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "encoding/json" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | func funcToLowerVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 10 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 11 | if err != nil { 12 | return nil, err 13 | } 14 | var ( 15 | ret = make([]any, len(chunk)) 16 | ) 17 | for i := 0; i < len(chunk); i++ { 18 | arg := toString(rarg[i]) 19 | ret[i] = strings.ToLower(arg) 20 | } 21 | return ret, nil 22 | } 23 | 24 | func funcToUpperVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 25 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 26 | if err != nil { 27 | return nil, err 28 | } 29 | var ( 30 | ret = make([]any, len(chunk)) 31 | ) 32 | for i := 0; i < len(chunk); i++ { 33 | arg := toString(rarg[i]) 34 | ret[i] = strings.ToUpper(arg) 35 | } 36 | return ret, nil 37 | } 38 | 39 | func funcToIntVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 40 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 41 | if err != nil { 42 | return nil, err 43 | } 44 | var ( 45 | ret = make([]any, len(chunk)) 46 | ) 47 | for i := 0; i < len(chunk); i++ { 48 | ret[i] = toInt(rarg[i], 0) 49 | } 50 | return ret, nil 51 | } 52 | 53 | func funcToFloatVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 54 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 55 | if err != nil { 56 | return nil, err 57 | } 58 | var ( 59 | ret = make([]any, len(chunk)) 60 | ) 61 | for i := 0; i < len(chunk); i++ { 62 | ret[i] = toFloat(rarg[i], 0.0) 63 | } 64 | return ret, nil 65 | } 66 | 67 | func funcToStringVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 68 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 69 | if err != nil { 70 | return nil, err 71 | } 72 | var ( 73 | ret = make([]any, len(chunk)) 74 | ) 75 | for i := 0; i < len(chunk); i++ { 76 | ret[i] = toString(rarg[i]) 77 | } 78 | return ret, nil 79 | } 80 | 81 | func funcIsIntVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 82 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 83 | if err != nil { 84 | return nil, err 85 | } 86 | var ( 87 | ret = make([]any, len(chunk)) 88 | ) 89 | for i := 0; i < len(chunk); i++ { 90 | ret[i] = false 91 | switch val := rarg[i].(type) { 92 | case string: 93 | if _, err := strconv.ParseInt(val, 10, 64); err == nil { 94 | ret[i] = true 95 | } 96 | case []byte: 97 | if _, err := strconv.ParseInt(string(val), 10, 64); err == nil { 98 | ret[i] = true 99 | } 100 | case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: 101 | ret[i] = true 102 | } 103 | } 104 | return ret, nil 105 | } 106 | 107 | func funcIsFloatVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 108 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 109 | if err != nil { 110 | return nil, err 111 | } 112 | var ( 113 | ret = make([]any, len(chunk)) 114 | ) 115 | for i := 0; i < len(chunk); i++ { 116 | ret[i] = false 117 | switch val := rarg[i].(type) { 118 | case string: 119 | if _, err := strconv.ParseFloat(val, 64); err == nil { 120 | ret[i] = true 121 | } 122 | case []byte: 123 | if _, err := strconv.ParseFloat(string(val), 64); err == nil { 124 | ret[i] = true 125 | } 126 | case float32, float64: 127 | ret[i] = true 128 | } 129 | } 130 | return ret, nil 131 | } 132 | 133 | func funcSubStrVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 134 | if args[1].ReturnType() != TNUMBER { 135 | return nil, NewExecuteError(args[1].GetPos(), "substr function second parameter require number type") 136 | } 137 | if args[2].ReturnType() != TNUMBER { 138 | return nil, NewExecuteError(args[2].GetPos(), "substr function third parameter require number type") 139 | } 140 | values, err := args[0].ExecuteBatch(chunk, ctx) 141 | if err != nil { 142 | return nil, err 143 | } 144 | starts, err := args[1].ExecuteBatch(chunk, ctx) 145 | if err != nil { 146 | return nil, err 147 | } 148 | lengths, err := args[2].ExecuteBatch(chunk, ctx) 149 | if err != nil { 150 | return nil, err 151 | } 152 | for i := 0; i < len(chunk); i++ { 153 | val := toString(values[i]) 154 | start := int(toInt(starts[i], 0)) 155 | length := int(toInt(lengths[i], 0)) 156 | vlen := len(val) 157 | if start > vlen-1 { 158 | values[i] = "" 159 | } else { 160 | length = min(length, vlen-start) 161 | values[i] = val[start:length] 162 | } 163 | } 164 | return values, nil 165 | } 166 | 167 | func funcJsonVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 168 | values, err := args[0].ExecuteBatch(chunk, ctx) 169 | if err != nil { 170 | return nil, err 171 | } 172 | for i := 0; i < len(chunk); i++ { 173 | val, ok := convertToByteArray(values[i]) 174 | if !ok { 175 | return nil, NewExecuteError(args[0].GetPos(), "Cannot convert to byte array") 176 | } 177 | item := make(JSON) 178 | json.Unmarshal(val, &item) 179 | values[i] = item 180 | } 181 | return values, nil 182 | } 183 | 184 | func funcSplitVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 185 | if args[1].ReturnType() != TSTR { 186 | return nil, NewExecuteError(args[1].GetPos(), "split function second parameter require string type") 187 | } 188 | values, err := args[0].ExecuteBatch(chunk, ctx) 189 | if err != nil { 190 | return nil, err 191 | } 192 | spliters, err := args[1].ExecuteBatch(chunk, ctx) 193 | if err != nil { 194 | return nil, err 195 | } 196 | for i := 0; i < len(chunk); i++ { 197 | val := toString(values[i]) 198 | spliter := toString(spliters[i]) 199 | values[i] = strings.Split(val, spliter) 200 | } 201 | return values, nil 202 | } 203 | 204 | func funcJoinVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 205 | ret := make([]any, len(chunk)) 206 | for i := 0; i < len(chunk); i++ { 207 | row, err := funcJoin(chunk[i], args, ctx) 208 | if err != nil { 209 | return nil, err 210 | } 211 | ret[i] = row 212 | } 213 | return ret, nil 214 | } 215 | 216 | func funcCosineDistanceVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 217 | largs, err := args[0].ExecuteBatch(chunk, ctx) 218 | if err != nil { 219 | return nil, err 220 | } 221 | rargs, err := args[1].ExecuteBatch(chunk, ctx) 222 | if err != nil { 223 | return nil, err 224 | } 225 | for i := 0; i < len(chunk); i++ { 226 | lvec, err := toFloatList(largs[i]) 227 | if err != nil { 228 | return nil, err 229 | } 230 | rvec, err := toFloatList(rargs[i]) 231 | if err != nil { 232 | return nil, err 233 | } 234 | ret, err := cosineDistance(lvec, rvec) 235 | if err != nil { 236 | return nil, err 237 | } 238 | largs[i] = ret 239 | } 240 | return largs, nil 241 | } 242 | 243 | func funcL2DistanceVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 244 | largs, err := args[0].ExecuteBatch(chunk, ctx) 245 | if err != nil { 246 | return nil, err 247 | } 248 | rargs, err := args[1].ExecuteBatch(chunk, ctx) 249 | if err != nil { 250 | return nil, err 251 | } 252 | for i := 0; i < len(chunk); i++ { 253 | lvec, err := toFloatList(largs[i]) 254 | if err != nil { 255 | return nil, err 256 | } 257 | rvec, err := toFloatList(rargs[i]) 258 | if err != nil { 259 | return nil, err 260 | } 261 | ret, err := l2Distance(lvec, rvec) 262 | if err != nil { 263 | return nil, err 264 | } 265 | largs[i] = ret 266 | } 267 | return largs, nil 268 | } 269 | 270 | func funcFloatListVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 271 | ret := make([]any, len(chunk)) 272 | for i := 0; i < len(chunk); i++ { 273 | row, err := funcFloatList(chunk[i], args, ctx) 274 | if err != nil { 275 | return nil, err 276 | } 277 | ret[i] = row 278 | } 279 | return ret, nil 280 | } 281 | 282 | func funcIntListVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 283 | ret := make([]any, len(chunk)) 284 | for i := 0; i < len(chunk); i++ { 285 | row, err := funcIntList(chunk[i], args, ctx) 286 | if err != nil { 287 | return nil, err 288 | } 289 | ret[i] = row 290 | } 291 | return ret, nil 292 | } 293 | 294 | func funcToListVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 295 | if len(args) == 0 || len(chunk) == 0 { 296 | return nil, nil 297 | } 298 | first, err := args[0].Execute(chunk[0], ctx) 299 | if err != nil { 300 | return nil, err 301 | } 302 | useInt := false 303 | switch fval := first.(type) { 304 | case string: 305 | if _, err := strconv.ParseInt(fval, 10, 64); err == nil { 306 | useInt = true 307 | } else if _, err := strconv.ParseFloat(fval, 64); err == nil { 308 | useInt = false 309 | } 310 | case []byte: 311 | if _, err := strconv.ParseInt(string(fval), 10, 64); err == nil { 312 | useInt = true 313 | } else if _, err := strconv.ParseFloat(string(fval), 64); err == nil { 314 | useInt = false 315 | } 316 | case int, uint, int32, uint32, int64, uint64: 317 | useInt = true 318 | case float32, float64: 319 | useInt = false 320 | } 321 | if useInt { 322 | return funcIntListVec(chunk, args, ctx) 323 | } 324 | return funcFloatListVec(chunk, args, ctx) 325 | } 326 | 327 | func funcLenVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 328 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 329 | if err != nil { 330 | return nil, err 331 | } 332 | for i := 0; i < len(chunk); i++ { 333 | val, err := getListLength(rarg[i]) 334 | if err != nil { 335 | return nil, NewExecuteError(args[0].GetPos(), err.Error()) 336 | } 337 | rarg[i] = val 338 | } 339 | return rarg, nil 340 | } 341 | 342 | func funcStrlenVec(chunk []KVPair, args []Expression, ctx *ExecuteCtx) ([]any, error) { 343 | rarg, err := args[0].ExecuteBatch(chunk, ctx) 344 | if err != nil { 345 | return nil, err 346 | } 347 | var ( 348 | ret = make([]any, len(chunk)) 349 | ) 350 | for i := 0; i < len(chunk); i++ { 351 | ret[i] = int64(len(toString(rarg[i]))) 352 | } 353 | return ret, nil 354 | } 355 | -------------------------------------------------------------------------------- /scan_plan.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "sort" 7 | "strings" 8 | ) 9 | 10 | type FullScanPlan struct { 11 | Storage Storage 12 | Filter *FilterExec 13 | iter Cursor 14 | } 15 | 16 | func NewFullScanPlan(s Storage, f *FilterExec) Plan { 17 | return &FullScanPlan{ 18 | Storage: s, 19 | Filter: f, 20 | } 21 | } 22 | 23 | func (p *FullScanPlan) String() string { 24 | return fmt.Sprintf("FullScanPlan{Filter = '%s'}", p.Filter.Explain()) 25 | } 26 | 27 | func (p *FullScanPlan) Explain() []string { 28 | return []string{p.String()} 29 | } 30 | 31 | func (p *FullScanPlan) Init() (err error) { 32 | p.iter, err = p.Storage.Cursor() 33 | if err != nil { 34 | return err 35 | } 36 | return p.iter.Seek([]byte{}) 37 | } 38 | 39 | func (p *FullScanPlan) Next(ctx *ExecuteCtx) ([]byte, []byte, error) { 40 | for { 41 | key, val, err := p.iter.Next() 42 | if err != nil { 43 | return nil, nil, err 44 | } 45 | if key == nil { 46 | break 47 | } 48 | ok, err := p.Filter.Filter(NewKVP(key, val), ctx) 49 | if err != nil { 50 | return nil, nil, err 51 | } 52 | if ok { 53 | return key, val, nil 54 | } 55 | } 56 | return nil, nil, nil 57 | } 58 | 59 | func (p *FullScanPlan) Batch(ctx *ExecuteCtx) ([]KVPair, error) { 60 | var ( 61 | ret = make([]KVPair, 0, PlanBatchSize) 62 | filterBatch = make([]KVPair, 0, PlanBatchSize) 63 | count = 0 64 | finish = false 65 | chooseIdxes = make([]int, 0, 2*PlanBatchSize) 66 | bidx = 0 67 | ) 68 | for !finish { 69 | filterBatch = filterBatch[:0] 70 | for i := 0; i < PlanBatchSize; i++ { 71 | key, val, err := p.iter.Next() 72 | if err != nil { 73 | return nil, err 74 | } 75 | if key == nil { 76 | finish = true 77 | break 78 | } 79 | filterBatch = append(filterBatch, NewKVP(key, val)) 80 | } 81 | if len(filterBatch) > 0 { 82 | matchs, err := p.Filter.FilterBatch(filterBatch, ctx) 83 | if err != nil { 84 | return nil, err 85 | } 86 | for i, m := range matchs { 87 | if m { 88 | ret = append(ret, filterBatch[i]) 89 | chooseIdxes = append(chooseIdxes, bidx) 90 | count += 1 91 | } 92 | bidx += 1 93 | } 94 | if count >= PlanBatchSize { 95 | finish = true 96 | } 97 | } 98 | } 99 | ctx.AdjustChunkCache(chooseIdxes) 100 | return ret, nil 101 | } 102 | 103 | type PrefixScanPlan struct { 104 | Storage Storage 105 | Filter *FilterExec 106 | Prefix string 107 | iter Cursor 108 | } 109 | 110 | func NewPrefixScanPlan(s Storage, f *FilterExec, p string) Plan { 111 | return &PrefixScanPlan{ 112 | Storage: s, 113 | Filter: f, 114 | Prefix: p, 115 | } 116 | } 117 | 118 | func (p *PrefixScanPlan) Init() (err error) { 119 | p.iter, err = p.Storage.Cursor() 120 | if err != nil { 121 | return err 122 | } 123 | return p.iter.Seek([]byte(p.Prefix)) 124 | } 125 | 126 | func (p *PrefixScanPlan) Next(ctx *ExecuteCtx) ([]byte, []byte, error) { 127 | pb := []byte(p.Prefix) 128 | for { 129 | key, val, err := p.iter.Next() 130 | if err != nil { 131 | return nil, nil, err 132 | } 133 | if key == nil { 134 | break 135 | } 136 | 137 | // Key not have the prefix 138 | if !bytes.HasPrefix(key, pb) { 139 | break 140 | } 141 | 142 | // Filter with the expression 143 | ok, err := p.Filter.Filter(NewKVP(key, val), ctx) 144 | if err != nil { 145 | return nil, nil, err 146 | } 147 | if ok { 148 | return key, val, nil 149 | } 150 | } 151 | return nil, nil, nil 152 | } 153 | 154 | func (p *PrefixScanPlan) Batch(ctx *ExecuteCtx) ([]KVPair, error) { 155 | var ( 156 | ret = make([]KVPair, 0, PlanBatchSize) 157 | filterBatch = make([]KVPair, 0, PlanBatchSize) 158 | count = 0 159 | finish = false 160 | pb = []byte(p.Prefix) 161 | chooseIdxes = make([]int, 0, 2*PlanBatchSize) 162 | bidx = 0 163 | ) 164 | for !finish { 165 | filterBatch = filterBatch[:0] 166 | for i := 0; i < PlanBatchSize; i++ { 167 | key, val, err := p.iter.Next() 168 | if err != nil { 169 | return nil, err 170 | } 171 | if key == nil { 172 | finish = true 173 | break 174 | } 175 | // Key not have the prefix 176 | if !bytes.HasPrefix(key, pb) { 177 | finish = true 178 | break 179 | } 180 | filterBatch = append(filterBatch, NewKVP(key, val)) 181 | } 182 | if len(filterBatch) > 0 { 183 | matchs, err := p.Filter.FilterBatch(filterBatch, ctx) 184 | if err != nil { 185 | return nil, err 186 | } 187 | for i, m := range matchs { 188 | if m { 189 | ret = append(ret, filterBatch[i]) 190 | chooseIdxes = append(chooseIdxes, bidx) 191 | count += 1 192 | } 193 | bidx += 1 194 | } 195 | if count >= PlanBatchSize { 196 | finish = true 197 | } 198 | } 199 | } 200 | ctx.AdjustChunkCache(chooseIdxes) 201 | return ret, nil 202 | } 203 | 204 | func (p *PrefixScanPlan) String() string { 205 | return fmt.Sprintf("PrefixScanPlan{Prefix = '%s', Filter = '%s'}", p.Prefix, p.Filter.Explain()) 206 | } 207 | 208 | func (p *PrefixScanPlan) Explain() []string { 209 | return []string{p.String()} 210 | } 211 | 212 | type RangeScanPlan struct { 213 | Storage Storage 214 | Filter *FilterExec 215 | Start []byte 216 | End []byte 217 | iter Cursor 218 | } 219 | 220 | func NewRangeScanPlan(s Storage, f *FilterExec, start []byte, end []byte) Plan { 221 | return &RangeScanPlan{ 222 | Storage: s, 223 | Filter: f, 224 | Start: start, 225 | End: end, 226 | } 227 | } 228 | 229 | func (p *RangeScanPlan) Init() (err error) { 230 | p.iter, err = p.Storage.Cursor() 231 | if err != nil { 232 | return err 233 | } 234 | if p.Start != nil { 235 | err = p.iter.Seek(p.Start) 236 | if err != nil { 237 | return err 238 | } 239 | } 240 | return nil 241 | } 242 | 243 | func (p *RangeScanPlan) Next(ctx *ExecuteCtx) ([]byte, []byte, error) { 244 | for { 245 | key, val, err := p.iter.Next() 246 | if err != nil { 247 | return nil, nil, err 248 | } 249 | if key == nil { 250 | break 251 | } 252 | 253 | // Key is greater than End 254 | if p.End != nil && bytes.Compare(key, p.End) > 0 { 255 | break 256 | } 257 | 258 | // Filter with the expression 259 | ok, err := p.Filter.Filter(NewKVP(key, val), ctx) 260 | if err != nil { 261 | return nil, nil, err 262 | } 263 | if ok { 264 | return key, val, nil 265 | } 266 | } 267 | return nil, nil, nil 268 | } 269 | 270 | func (p *RangeScanPlan) Batch(ctx *ExecuteCtx) ([]KVPair, error) { 271 | var ( 272 | ret = make([]KVPair, 0, PlanBatchSize) 273 | filterBatch = make([]KVPair, 0, PlanBatchSize) 274 | count = 0 275 | finish = false 276 | chooseIdxes = make([]int, 0, 2*PlanBatchSize) 277 | bidx = 0 278 | ) 279 | for !finish { 280 | filterBatch = filterBatch[:0] 281 | for i := 0; i < PlanBatchSize; i++ { 282 | key, val, err := p.iter.Next() 283 | if err != nil { 284 | return nil, err 285 | } 286 | if key == nil { 287 | finish = true 288 | break 289 | } 290 | // Key is greater than End 291 | if p.End != nil && bytes.Compare(key, p.End) > 0 { 292 | finish = true 293 | break 294 | } 295 | filterBatch = append(filterBatch, NewKVP(key, val)) 296 | } 297 | 298 | if len(filterBatch) > 0 { 299 | matchs, err := p.Filter.FilterBatch(filterBatch, ctx) 300 | if err != nil { 301 | return nil, err 302 | } 303 | for i, m := range matchs { 304 | if m { 305 | ret = append(ret, filterBatch[i]) 306 | chooseIdxes = append(chooseIdxes, bidx) 307 | count += 1 308 | } 309 | bidx += 1 310 | } 311 | if count >= PlanBatchSize { 312 | finish = true 313 | } 314 | } 315 | } 316 | ctx.AdjustChunkCache(chooseIdxes) 317 | return ret, nil 318 | } 319 | 320 | func convertByteToString(val []byte) string { 321 | if val == nil { 322 | return "" 323 | } 324 | return string(val) 325 | } 326 | 327 | func (p *RangeScanPlan) String() string { 328 | return fmt.Sprintf("RangeScanPlan{Start = '%s', End = '%s', Filter = '%s'}", convertByteToString(p.Start), convertByteToString(p.End), p.Filter.Explain()) 329 | } 330 | 331 | func (p *RangeScanPlan) Explain() []string { 332 | return []string{p.String()} 333 | } 334 | 335 | type MultiGetPlan struct { 336 | Storage Storage 337 | Filter *FilterExec 338 | Keys []string 339 | numKeys int 340 | idx int 341 | } 342 | 343 | func NewMultiGetPlan(s Storage, f *FilterExec, keys []string) Plan { 344 | // We should sort keys to ensure order by erase works correctly 345 | sort.Strings(keys) 346 | return &MultiGetPlan{ 347 | Storage: s, 348 | Filter: f, 349 | Keys: keys, 350 | idx: 0, 351 | numKeys: len(keys), 352 | } 353 | } 354 | 355 | func (p *MultiGetPlan) Init() error { 356 | return nil 357 | } 358 | 359 | func (p *MultiGetPlan) Next(ctx *ExecuteCtx) ([]byte, []byte, error) { 360 | for { 361 | if p.idx >= p.numKeys { 362 | break 363 | } 364 | key := []byte(p.Keys[p.idx]) 365 | p.idx++ 366 | val, err := p.Storage.Get(key) 367 | if err != nil { 368 | return nil, nil, err 369 | } 370 | if val == nil { 371 | // No Value 372 | continue 373 | } 374 | ok, err := p.Filter.Filter(NewKVP(key, val), ctx) 375 | if err != nil { 376 | return nil, nil, err 377 | } 378 | if ok { 379 | return key, val, nil 380 | } 381 | } 382 | return nil, nil, nil 383 | } 384 | 385 | func (p *MultiGetPlan) Batch(ctx *ExecuteCtx) ([]KVPair, error) { 386 | var ( 387 | ret = make([]KVPair, 0, PlanBatchSize) 388 | filterBatch = make([]KVPair, 0, PlanBatchSize) 389 | count = 0 390 | finish = false 391 | chooseIdxes = make([]int, 0, 2*PlanBatchSize) 392 | bidx = 0 393 | ) 394 | for !finish { 395 | filterBatch = filterBatch[:0] 396 | for i := 0; i < PlanBatchSize; i++ { 397 | if p.idx >= p.numKeys { 398 | finish = true 399 | break 400 | } 401 | key := []byte(p.Keys[p.idx]) 402 | p.idx++ 403 | val, err := p.Storage.Get(key) 404 | if err != nil { 405 | return nil, err 406 | } 407 | if val == nil { 408 | // No Value 409 | continue 410 | } 411 | filterBatch = append(filterBatch, NewKVP(key, val)) 412 | } 413 | if len(filterBatch) > 0 { 414 | matchs, err := p.Filter.FilterBatch(filterBatch, ctx) 415 | if err != nil { 416 | return nil, err 417 | } 418 | for i, m := range matchs { 419 | if m { 420 | ret = append(ret, filterBatch[i]) 421 | chooseIdxes = append(chooseIdxes, bidx) 422 | count += 1 423 | } 424 | } 425 | } 426 | if count >= PlanBatchSize { 427 | finish = true 428 | } 429 | } 430 | ctx.AdjustChunkCache(chooseIdxes) 431 | return ret, nil 432 | } 433 | 434 | func (p *MultiGetPlan) String() string { 435 | keys := strings.Join(p.Keys, ", ") 436 | return fmt.Sprintf("MultiGetPlan{Keys = <%s>, Filter = '%s'}", keys, p.Filter.Explain()) 437 | } 438 | 439 | func (p *MultiGetPlan) Explain() []string { 440 | return []string{p.String()} 441 | } 442 | -------------------------------------------------------------------------------- /spec.md: -------------------------------------------------------------------------------- 1 | # A SQL like query language for TiKV 2 | 3 | The query package provide a SQL like query language for user to do some search operation on TiKV's key-value pair. 4 | 5 | ## Query Syntax 6 | 7 | ### Basic syntax 8 | 9 | ``` 10 | select (field expression), (field expression)... where (filter expression) group by (group expression) order by (order expression) limit (start, counts) 11 | ``` 12 | 13 | **Field Expression** 14 | 15 | ``` 16 | Field Expression := (FunctionCall | key | value | "*") ("as" FieldName)? 17 | 18 | FunctionCall := FunctionName "(" FuncArgs ")" | 19 | FunctionName "(" FuncArgs ")" FieldAccessExpression* 20 | 21 | FuncArgs := Expression (, Expression)* 22 | 23 | FieldAccessExpression := "[" string "]" | "[" number "]" 24 | ``` 25 | 26 | Basically can be `*`, `key` or `value` and you can use AS keyword to rename it. Such as: 27 | 28 | ``` 29 | # same as select key, value where key ^= "prefix" 30 | select * where key ^= "prefix" 31 | 32 | # rename key to f1 and value to f2 in result set 33 | select key as f1, value as f2 where key ^= "prefix" 34 | ``` 35 | 36 | The `key` is key-value pair's key, and aslo `value` is the value. 37 | 38 | If using function it support functions shows below: 39 | 40 | | Function | Description | 41 | | -------- | ----------- | 42 | | lower(value: str): str | convert value string into lower case | 43 | | upper(value: str): str | convert value string into upper case | 44 | | int(value: any): int | convert value into integer, if cannot convert to integer just return error 45 | | float(value: any): float | convert value into float, if cannot convert to float just return error | 46 | | str(value: any): str | convert value into string | 47 | | is_int(value: any): bool | return is value can be converted into integer | 48 | | is_float(value: any): bool | return is value can be converted into float | 49 | | substr(value: str, start: int, end: int): str | return substring of value from `start` position to `end` position | 50 | | split(value: str, spliter: str): list | split value into a string list by spliter string | 51 | | list(elem1: any, elem2: any...): list | convert many elements into a list, list elements' type must be same, the list type support `int`, `str`, `float` types | 52 | | float_list(elem1: float, elem2: float...): list | convert many float elements into a list | 53 | | flist(elem1: float, elem2: float...): list | same as float_list | 54 | | int_list(elem1: int, elem2: int...): list | convert many integer elements into a list | 55 | | ilist(elem1: int, elem2: int...): list | same as int_list | 56 | | len(value: list): int | return value list length | 57 | | l2_distance(left: list, right: list): float | calculate l2 distance of two list | 58 | | cosine_distance(left: list, right: list): float | calculate cosine distance of two list | 59 | | json(value: str): json | parse string value into json type | 60 | | join(seperator: str, val1: any, val2: any...): str | join values by seperator | 61 | 62 | You can use any of the functions above in field expression, such as: 63 | 64 | ``` 65 | # Convert value into int type 66 | select key, int(value) where key ^= "prefix" 67 | 68 | # Convert value into int type and do some math on it 69 | select key, ((int(value) + 1) * 8) where key ^= "prefix" 70 | 71 | # Convert value into upper case 72 | select key, upper(value) where key ^= "prefix" 73 | 74 | # Calculate l2 distance on two vectors 75 | select key, l2_distance(list(1,2,3,4), split(value, ",")) where key ^= "prefix" 76 | ``` 77 | 78 | And you may notice there has a `json` type and yes you can use `[]` operator to access `json` map and list. And `[]` operator can also use in `list` type. 79 | 80 | ``` 81 | select key, json(value)["key1"]["key2"] where key ^= "prefix" 82 | 83 | select key, list(1,2,3,4)[2] where key ^= "prefix" 84 | ``` 85 | 86 | **Filter Expression** 87 | 88 | Filter expression followed the `where` keyword, and it contains filter condition expressions. 89 | 90 | ``` 91 | Filter Expression := "!"? Expression 92 | 93 | Expression := "(" BinaryExpression | UnaryExpression ")" 94 | 95 | UnaryExpression := "key" | "value" | string | number | "true" | "false" | FunctionCall | FieldName 96 | 97 | BinaryExpression := Expression Op Expression | 98 | Expression "between" Expression "and" Expression | 99 | Expression "in" "(" Expression (, Expression)* ")" | 100 | Expression "in" FunctionCall | 101 | FunctionCall 102 | 103 | Op := MathOp | CompareOp | AndOrOp 104 | MathOp := "+" | "-" | "*" | "/" 105 | AndOrOp := "&" | "|" 106 | CompareOp := "=" | "!=" | "^=" | "~=" | ">" | ">=" | "<" | "<=" 107 | 108 | FunctionCall := FunctionName "(" FuncArgs ")" | 109 | FunctionName "(" FuncArgs ")" FieldAccessExpression* 110 | 111 | FuncArgs := Expression (, Expression)* 112 | 113 | FieldAccessExpression := "[" string "]" | "[" number "]" 114 | ``` 115 | 116 | The basic usage of filter expression is filter key as equal or has same prefix. So there has some special compare operator for this: 117 | 118 | * `=`: Equals 119 | * `!=`: Not equals 120 | * `^=`: Prefix match 121 | * `~=`: Regexp match 122 | 123 | For example: 124 | 125 | ``` 126 | # Key equals "key01" 127 | select * where key = "key01" 128 | 129 | # Keys that has "key01" prefix 130 | select * where key ^= "key01" 131 | 132 | # Keys that match "^key[0-9]+$" 133 | select * where key ~= "^key[0-9]+$" 134 | ``` 135 | 136 | And we also provide `between` ... `and` expression and `in` expression same as SQL: 137 | 138 | ``` 139 | select * where key between "k" and "l" 140 | 141 | select * where key in ("k1", "k2", "k3") 142 | ``` 143 | 144 | To concate more expressions you can use `&` and `|` operator: 145 | 146 | ``` 147 | select * where key in ("k1", "k2", "k3") & value ~= "^prefix[0-9]+" 148 | 149 | select * where key ^= "key" | value ^= "val" 150 | ``` 151 | 152 | And then is using field name in filter expression, that will save some characters for SQL writer. 153 | 154 | ``` 155 | # filter value's substring from 2 to 3 (one char) is between "b" to "e" 156 | select key, substr(value, 2, 3) as mid, value where mid between "b" and "e" 157 | ``` 158 | 159 | If you want, you can also do some math on filter expression: 160 | 161 | ``` 162 | select * where key ^= "num" & int(value) + 1 > 10 163 | ``` 164 | 165 | If value is a JSON string and you want to filter data by some fields, you can use field access operator: 166 | 167 | ``` 168 | select * where key ^= "json" & json(value)["user"] = "Bob" 169 | ``` 170 | 171 | **Order By** 172 | 173 | Same as SQL, you can use `order by` to sort result set. 174 | 175 | ``` 176 | Order Expression := OrderByField (, OrderByField)* 177 | 178 | OrderByField := FieldName (ASC | DESC)? 179 | ``` 180 | 181 | The `FieldName` can be `key`, `value` or the name defined by select: 182 | 183 | ``` 184 | select key, value where key ^= "prefix" order by value 185 | 186 | select key, int(value) as snum where key ^= "prefix" order by snum asc, key asc 187 | ``` 188 | 189 | **Limit** 190 | 191 | Same as SQL. If one number follow limit keyword just define how many rows return. If two numbers followed, first is how many rows should be skip and the second is how many rows return. 192 | 193 | ``` 194 | select * where key ^= "prefix" limit 10 195 | 196 | select * where key ^= "prefix" limit 10, 10 197 | ``` 198 | 199 | ### Aggregation 200 | 201 | The query language also support aggregation. You can use `GROUP BY` expression like in SQL: 202 | 203 | ``` 204 | Group Expression := FieldName (, FieldName)* 205 | ``` 206 | 207 | Below is the aggregation function list: 208 | 209 | | Function | Description | 210 | | -------- | ----------- | 211 | | count(value: int): int | Count value by group | 212 | | sum(value: int): int | Sum value by group | 213 | | avg(value: int): int | Calculate average value by group | 214 | | min(value: int): int | Find the minimum value by group | 215 | | max(value: int): int | Find the maxmum value by group | 216 | | quantile(value: float, percent: float): float | Calculate the Quantile by group | 217 | 218 | For example: 219 | 220 | ``` 221 | select count(1), substr(key, 3, 4) as pk where key ^= "k_" group by pk 222 | 223 | select count(1), sum(int(value)) as sum, substr(key, 0, 2) as kprefix where key between 'k' and 'l' group by kprefix order by sum desc 224 | ``` 225 | 226 | ### Put statement 227 | 228 | If you want to insert some data into TiKV, you can use `put` statement. 229 | 230 | ``` 231 | PutStmt := "PUT" KeyValuePair (, KeyValuePair)* 232 | 233 | KeyValuePair := "(" Expression "," Expression ")" 234 | ``` 235 | 236 | For example: 237 | 238 | ``` 239 | put ("k1", "v1"), ("k2", "v2") 240 | 241 | # Use function call to generate value 242 | put ("k3", upper("value3")), ("k4", join(",", 1, 2, 3, 4)) 243 | 244 | # use key keyword to generate value 245 | put ("k4", upper("val_" + key)) 246 | ``` 247 | 248 | Notice: In put statement you can only use `key` keyword to generate the value. If `value` keyword in statement it will report an syntax error. 249 | 250 | ### Remove statement 251 | 252 | If you want to delete some data from TiKV, you ca use `remove` statement. 253 | 254 | ``` 255 | RemoveStmt := "REMOVE" Expression (, Expression)* 256 | ``` 257 | 258 | For example: 259 | 260 | ``` 261 | remove "k1", "k2" 262 | ``` 263 | 264 | Notice: In remove statement you cannot use `key` and `value` keyword. -------------------------------------------------------------------------------- /statement.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import "fmt" 4 | 5 | var ( 6 | _ Statement = (*WhereStmt)(nil) 7 | _ Statement = (*OrderStmt)(nil) 8 | _ Statement = (*GroupByStmt)(nil) 9 | _ Statement = (*LimitStmt)(nil) 10 | _ Statement = (*PutStmt)(nil) 11 | _ Statement = (*RemoveStmt)(nil) 12 | ) 13 | 14 | type Statement interface { 15 | Name() string 16 | } 17 | 18 | type SelectStmt struct { 19 | Pos int 20 | AllFields bool 21 | FieldNames []string 22 | FieldTypes []Type 23 | Fields []Expression 24 | Where *WhereStmt 25 | Order *OrderStmt 26 | Limit *LimitStmt 27 | GroupBy *GroupByStmt 28 | } 29 | 30 | func (s *SelectStmt) Name() string { 31 | return "SELECT" 32 | } 33 | 34 | type WhereStmt struct { 35 | Pos int 36 | Expr Expression 37 | } 38 | 39 | func (s *WhereStmt) Name() string { 40 | return "WHERE" 41 | } 42 | 43 | type OrderField struct { 44 | Name string 45 | Field Expression 46 | Order TokenType 47 | } 48 | 49 | type OrderStmt struct { 50 | Pos int 51 | Orders []OrderField 52 | } 53 | 54 | func (s *OrderStmt) Name() string { 55 | return "ORDER BY" 56 | } 57 | 58 | type GroupByField struct { 59 | Name string 60 | Expr Expression 61 | } 62 | 63 | type GroupByStmt struct { 64 | Pos int 65 | Fields []GroupByField 66 | } 67 | 68 | func (s *GroupByStmt) Name() string { 69 | return "GROUP BY" 70 | } 71 | 72 | type LimitStmt struct { 73 | Pos int 74 | Start int 75 | Count int 76 | } 77 | 78 | func (s *LimitStmt) Name() string { 79 | return "LIMIT" 80 | } 81 | 82 | type PutKVPair struct { 83 | Key Expression 84 | Value Expression 85 | } 86 | 87 | func (p *PutKVPair) String() string { 88 | return fmt.Sprintf("{%s: %s}", p.Key.String(), p.Value.String()) 89 | } 90 | 91 | type PutStmt struct { 92 | Pos int 93 | KVPairs []*PutKVPair 94 | } 95 | 96 | func (s *PutStmt) Name() string { 97 | return "PUT" 98 | } 99 | 100 | type RemoveStmt struct { 101 | Pos int 102 | Keys []Expression 103 | } 104 | 105 | func (s *RemoveStmt) Name() string { 106 | return "REMOVE" 107 | } 108 | 109 | type DeleteStmt struct { 110 | Pos int 111 | Where *WhereStmt 112 | Limit *LimitStmt 113 | } 114 | 115 | func (s *DeleteStmt) Name() string { 116 | return "DELETE" 117 | } 118 | 119 | func (s *RemoveStmt) Validate(ctx *CheckCtx) error { 120 | for _, expr := range s.Keys { 121 | rtype := expr.ReturnType() 122 | if rtype != TSTR && rtype != TNUMBER { 123 | return NewSyntaxError(expr.GetPos(), "need str or number type") 124 | } 125 | if err := expr.Check(ctx); err != nil { 126 | return err 127 | } 128 | } 129 | return nil 130 | } 131 | 132 | func (s *PutStmt) Validate(ctx *CheckCtx) error { 133 | for _, kv := range s.KVPairs { 134 | if err := s.validateKVPair(kv, ctx); err != nil { 135 | return err 136 | } 137 | } 138 | return nil 139 | } 140 | 141 | func (s *PutStmt) validateKVPair(kv *PutKVPair, ctx *CheckCtx) error { 142 | if err := kv.Key.Check(ctx); err != nil { 143 | return err 144 | } 145 | switch kv.Key.ReturnType() { 146 | case TSTR, TNUMBER: 147 | break 148 | default: 149 | return NewSyntaxError(kv.Key.GetPos(), "need str or number type") 150 | } 151 | if err := kv.Value.Check(ctx); err != nil { 152 | return err 153 | } 154 | switch kv.Value.ReturnType() { 155 | case TSTR, TNUMBER: 156 | break 157 | default: 158 | return NewSyntaxError(kv.Value.GetPos(), "need str or number type") 159 | } 160 | return nil 161 | } 162 | 163 | func (s *DeleteStmt) Validate(ctx *CheckCtx) error { 164 | return s.Where.Expr.Check(ctx) 165 | } 166 | 167 | func (s *SelectStmt) ValidateFields(ctx *CheckCtx) error { 168 | for _, f := range s.Fields { 169 | if err := s.validateField(f, ctx); err != nil { 170 | return err 171 | } 172 | } 173 | return nil 174 | } 175 | 176 | func (s *SelectStmt) validateField(f Expression, ctx *CheckCtx) error { 177 | if err := f.Check(ctx); err != nil { 178 | return err 179 | } 180 | 181 | return s.checkAggrFunctionArgs(f) 182 | } 183 | 184 | func (s *SelectStmt) checkAggrFunctionArgs(expr Expression) error { 185 | var err error 186 | switch e := expr.(type) { 187 | case *BinaryOpExpr: 188 | err = s.checkAggrFunctionArgs(e.Left) 189 | if err != nil { 190 | return err 191 | } 192 | err = s.checkAggrFunctionArgs(e.Right) 193 | if err != nil { 194 | return err 195 | } 196 | case *FunctionCallExpr: 197 | fname, err := GetFuncNameFromExpr(e) 198 | if err == nil && IsAggrFunc(fname) { 199 | err = s.checkAggrFuncArgs(e.Args) 200 | if err != nil { 201 | return err 202 | } 203 | } 204 | } 205 | return nil 206 | } 207 | 208 | func (s *SelectStmt) checkAggrFuncArgs(args []Expression) error { 209 | for _, arg := range args { 210 | if err := s.checkAggrFuncArg(arg); err != nil { 211 | return err 212 | } 213 | } 214 | return nil 215 | } 216 | 217 | func (s *SelectStmt) checkAggrFuncArg(arg Expression) error { 218 | var err error 219 | switch e := arg.(type) { 220 | case *BinaryOpExpr: 221 | err = s.checkAggrFuncArg(e.Left) 222 | if err != nil { 223 | return err 224 | } 225 | err = s.checkAggrFuncArg(e.Right) 226 | if err != nil { 227 | return err 228 | } 229 | case *FunctionCallExpr: 230 | fname, err := GetFuncNameFromExpr(e) 231 | if err == nil && IsAggrFunc(fname) { 232 | return NewSyntaxError(arg.GetPos(), "Aggregate function arguments should not contains aggregate function") 233 | } 234 | } 235 | return nil 236 | } 237 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "reflect" 8 | ) 9 | 10 | func BuildExecutor(query string) (*SelectStmt, *FilterExec, error) { 11 | p := NewParser(query) 12 | expr, err := p.Parse() 13 | if err != nil { 14 | return nil, nil, err 15 | } 16 | vexpr := expr.(*SelectStmt) 17 | return vexpr, &FilterExec{ 18 | Ast: vexpr.Where, 19 | }, nil 20 | } 21 | 22 | func convertToByteArray(value any) ([]byte, bool) { 23 | switch ret := value.(type) { 24 | case []byte: 25 | return ret, true 26 | case string: 27 | return []byte(ret), true 28 | default: 29 | return nil, false 30 | } 31 | } 32 | 33 | func convertToInt(value any) (int64, bool) { 34 | switch ret := value.(type) { 35 | case int: 36 | return int64(ret), true 37 | case int8: 38 | return int64(ret), true 39 | case int16: 40 | return int64(ret), true 41 | case int32: 42 | return int64(ret), true 43 | case int64: 44 | return ret, true 45 | case uint: 46 | return int64(ret), true 47 | case uint8: 48 | return int64(ret), true 49 | case uint16: 50 | return int64(ret), true 51 | case uint32: 52 | return int64(ret), true 53 | case uint64: 54 | return int64(ret), true 55 | default: 56 | return 0, false 57 | } 58 | } 59 | 60 | func convertToFloat(value any) (float64, bool) { 61 | switch ret := value.(type) { 62 | case float32: 63 | return float64(ret), true 64 | case float64: 65 | return ret, true 66 | default: 67 | return 0, false 68 | } 69 | } 70 | 71 | func executeMathOp(left any, right any, op byte, rightExpr Expression) (any, error) { 72 | lint, liok := convertToInt(left) 73 | rint, riok := convertToInt(right) 74 | if liok && riok { 75 | switch op { 76 | case '+': 77 | return lint + rint, nil 78 | case '-': 79 | return lint - rint, nil 80 | case '*': 81 | return lint * rint, nil 82 | case '/': 83 | if rint == 0 { 84 | return 0, NewExecuteError(rightExpr.GetPos(), "Divide by zero") 85 | } 86 | return lint / rint, nil 87 | default: 88 | return 0.0, errors.New("Unknown operator") 89 | } 90 | } 91 | // Float 92 | lfloat, lfok := convertToFloat(left) 93 | rfloat, rfok := convertToFloat(right) 94 | if lfok && rfok { 95 | switch op { 96 | case '+': 97 | return lfloat + rfloat, nil 98 | case '-': 99 | return lfloat - rfloat, nil 100 | case '*': 101 | return lfloat * rfloat, nil 102 | case '/': 103 | if rfloat == 0.0 { 104 | return 0, NewExecuteError(rightExpr.GetPos(), "Divide by zero") 105 | } 106 | return lfloat / rfloat, nil 107 | default: 108 | return 0.0, errors.New("Unknown operator") 109 | } 110 | } 111 | 112 | var ( 113 | lfval float64 114 | rfval float64 115 | ) 116 | if liok && rfok { 117 | lfval = float64(lint) 118 | rfval = rfloat 119 | } else if lfok && riok { 120 | lfval = lfloat 121 | rfval = float64(rint) 122 | } else { 123 | return 0.0, fmt.Errorf("Invalid operator %v left or right parameter type", op) 124 | } 125 | switch op { 126 | case '+': 127 | return lfval + rfval, nil 128 | case '-': 129 | return lfval - rfval, nil 130 | case '*': 131 | return lfval * rfval, nil 132 | case '/': 133 | if rfval == 0.0 { 134 | return 0, NewExecuteError(rightExpr.GetPos(), "Divide by zero") 135 | } 136 | return lfval / rfval, nil 137 | default: 138 | return 0.0, errors.New("Unknown operator") 139 | } 140 | } 141 | 142 | func execNumberCompare(left any, right any, op string) (bool, error) { 143 | lint, liok := convertToInt(left) 144 | rint, riok := convertToInt(right) 145 | if liok && riok { 146 | switch op { 147 | case ">": 148 | return lint > rint, nil 149 | case ">=": 150 | return lint >= rint, nil 151 | case "<": 152 | return lint < rint, nil 153 | case "<=": 154 | return lint <= rint, nil 155 | case "=": 156 | return lint == rint, nil 157 | } 158 | } 159 | 160 | lfloat, lfok := convertToFloat(left) 161 | rfloat, rfok := convertToFloat(right) 162 | if liok && rfok { 163 | lfloat = float64(lint) 164 | } else if lfok && riok { 165 | rfloat = float64(rint) 166 | } else if lfok && rfok { 167 | // OK 168 | } else { 169 | return false, fmt.Errorf("Invalid operator %v left or right parameter type", op) 170 | } 171 | switch op { 172 | case ">": 173 | return lfloat > rfloat, nil 174 | case ">=": 175 | return lfloat >= rfloat, nil 176 | case "<": 177 | return lfloat < rfloat, nil 178 | case "<=": 179 | return lfloat <= rfloat, nil 180 | case "=": 181 | return lfloat == rfloat, nil 182 | } 183 | return false, errors.New("Unknown operator") 184 | } 185 | 186 | func execStringCompare(left any, right any, op string) (bool, error) { 187 | lstr, lsok := convertToByteArray(left) 188 | rstr, rsok := convertToByteArray(right) 189 | if lsok && rsok { 190 | cmpret := bytes.Compare(lstr, rstr) 191 | switch op { 192 | case ">": 193 | return cmpret > 0, nil 194 | case ">=": 195 | return cmpret >= 0, nil 196 | case "<": 197 | return cmpret < 0, nil 198 | case "<=": 199 | return cmpret <= 0, nil 200 | case "=": 201 | return cmpret == 0, nil 202 | default: 203 | return false, errors.New("Unknown operator") 204 | } 205 | } 206 | 207 | return false, fmt.Errorf("Invalid operator %v left or right parameter type", op) 208 | } 209 | 210 | func unpackArray(s any) ([]any, bool) { 211 | var ret []any 212 | switch val := s.(type) { 213 | case []string: 214 | ret = make([]any, len(val)) 215 | for i, item := range val { 216 | ret[i] = item 217 | } 218 | return ret, true 219 | case []int16: 220 | ret = make([]any, len(val)) 221 | for i, item := range val { 222 | ret[i] = item 223 | } 224 | return ret, true 225 | case []uint16: 226 | ret = make([]any, len(val)) 227 | for i, item := range val { 228 | ret[i] = item 229 | } 230 | return ret, true 231 | case []int: 232 | ret = make([]any, len(val)) 233 | for i, item := range val { 234 | ret[i] = item 235 | } 236 | return ret, true 237 | case []uint: 238 | ret = make([]any, len(val)) 239 | for i, item := range val { 240 | ret[i] = item 241 | } 242 | return ret, true 243 | case []int32: 244 | ret = make([]any, len(val)) 245 | for i, item := range val { 246 | ret[i] = item 247 | } 248 | return ret, true 249 | case []uint32: 250 | ret = make([]any, len(val)) 251 | for i, item := range val { 252 | ret[i] = item 253 | } 254 | return ret, true 255 | case []int64: 256 | ret = make([]any, len(val)) 257 | for i, item := range val { 258 | ret[i] = item 259 | } 260 | return ret, true 261 | case []uint64: 262 | ret = make([]any, len(val)) 263 | for i, item := range val { 264 | ret[i] = item 265 | } 266 | return ret, true 267 | case []float32: 268 | ret = make([]any, len(val)) 269 | for i, item := range val { 270 | ret[i] = item 271 | } 272 | return ret, true 273 | case []float64: 274 | ret = make([]any, len(val)) 275 | for i, item := range val { 276 | ret[i] = item 277 | } 278 | return ret, true 279 | case [][]byte: 280 | ret = make([]any, len(val)) 281 | for i, item := range val { 282 | ret[i] = item 283 | } 284 | return ret, true 285 | default: 286 | return nil, false 287 | } 288 | } 289 | 290 | func unpackArrayR(s any) []any { 291 | v := reflect.ValueOf(s) 292 | r := make([]any, v.Len()) 293 | for i := 0; i < v.Len(); i++ { 294 | r[i] = v.Index(i).Interface() 295 | } 296 | return r 297 | } -------------------------------------------------------------------------------- /walker.go: -------------------------------------------------------------------------------- 1 | package kvql 2 | 3 | func (e *BinaryOpExpr) Walk(cb WalkCallback) { 4 | if cb(e) { 5 | e.Left.Walk(cb) 6 | e.Right.Walk(cb) 7 | } 8 | } 9 | 10 | func (e *FieldExpr) Walk(cb WalkCallback) { 11 | cb(e) 12 | } 13 | 14 | func (e *FieldReferenceExpr) Walk(cb WalkCallback) { 15 | if cb(e) { 16 | e.FieldExpr.Walk(cb) 17 | } 18 | } 19 | 20 | func (e *StringExpr) Walk(cb WalkCallback) { 21 | cb(e) 22 | } 23 | 24 | func (e *NotExpr) Walk(cb WalkCallback) { 25 | if cb(e) { 26 | e.Right.Walk(cb) 27 | } 28 | } 29 | 30 | func (e *FunctionCallExpr) Walk(cb WalkCallback) { 31 | if cb(e) { 32 | e.Name.Walk(cb) 33 | for _, arg := range e.Args { 34 | arg.Walk(cb) 35 | } 36 | } 37 | } 38 | 39 | func (e *NameExpr) Walk(cb WalkCallback) { 40 | cb(e) 41 | } 42 | 43 | func (e *NumberExpr) Walk(cb WalkCallback) { 44 | cb(e) 45 | } 46 | 47 | func (e *FloatExpr) Walk(cb WalkCallback) { 48 | cb(e) 49 | } 50 | 51 | func (e *BoolExpr) Walk(cb WalkCallback) { 52 | cb(e) 53 | } 54 | 55 | func (e *ListExpr) Walk(cb WalkCallback) { 56 | if cb(e) { 57 | for _, item := range e.List { 58 | item.Walk(cb) 59 | } 60 | } 61 | } 62 | 63 | func (e *FieldAccessExpr) Walk(cb WalkCallback) { 64 | if cb(e) { 65 | e.Left.Walk(cb) 66 | e.FieldName.Walk(cb) 67 | } 68 | } 69 | --------------------------------------------------------------------------------