├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── scratchpad ├── FSharpNestedParens.fsx ├── FsharpExpressionFieldsParser.fsx ├── FsharpExpressionFieldsParser.fsx.output.txt ├── FsharpExpressionParser.fsx ├── FsharpParser.ipynb ├── FsharpQuotedStringParser.fsx ├── FsharpRecursiveSearch.fsx ├── FsharpRecursiveSearchErrors.fsx ├── FsharpSearchStringParser.fsx ├── FsharpStringParser.fsx ├── PythonParser.ipynb ├── javascript-quoted-string-parser.js ├── javascript-search-string-parser.js ├── javascript-string-parser.js ├── python_expression_fields_parser.py ├── python_expression_fields_parser.py.output.txt ├── python_expression_parser.py ├── python_lexer.py ├── python_lexer_quoted_strings.py ├── python_nested_operators_parser.py ├── python_nested_parens.py ├── python_quoted_string_parser.py ├── python_recursive_search.py ├── python_search_string_parser.py ├── python_single_word_parser.py ├── python_string_list_parser.py ├── python_string_parser.py ├── react-input-with-error.js ├── typescript-ast-to-sql.ts ├── typescript-ast-to-sql.ts.output.txt ├── typescript-expression-fields-parser.ts ├── typescript-expression-parser.ts ├── typescript-field-validator.ts ├── typescript-lexer-parser-functional.test.ts ├── typescript-lexer-parser-functional.ts ├── typescript-lexer-parser-functional.ts.output.txt ├── typescript-lexer-parser.ts ├── typescript-quoted-string-parser.ts ├── typescript-recursive-search-errors.ts ├── typescript-recursive-search.ts ├── typescript-search-string-parser.js ├── typescript-search-string-parser.ts ├── typescript-string-parser.ts └── typescript_nested_parens.ts ├── search-input-query-demo ├── .gitignore ├── README.md ├── eslint.config.js ├── index.html ├── package-lock.json ├── package.json ├── src │ ├── App.css │ ├── App.tsx │ ├── ExpressionDescription.tsx │ ├── SearchComponent.tsx │ ├── SearchTypeSelector.tsx │ ├── db-service.ts │ ├── index.css │ ├── main.tsx │ └── vite-env.d.ts ├── tsconfig.app.json ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts ├── search-input-query-parser ├── .gitignore ├── README.md ├── package-lock.json ├── package.json ├── src │ ├── first-pass-parser.test.ts │ ├── first-pass-parser.ts │ ├── index.ts │ ├── lexer.test.ts │ ├── lexer.ts │ ├── parse-in-values.ts │ ├── parse-primary.ts │ ├── parse-range-expression.ts │ ├── parser.test.ts │ ├── parser.ts │ ├── search-query-to-ilike-sql.ts │ ├── search-query-to-paradedb-sql.test.ts │ ├── search-query-to-paradedb-sql.ts │ ├── search-query-to-sql.test.ts │ ├── search-query-to-sql.ts │ ├── search-query-to-tsvector-sql.ts │ ├── transform-to-expression.ts │ ├── validate-expression-fields.ts │ ├── validate-in-expression.ts │ ├── validate-string.ts │ ├── validate-wildcard.ts │ ├── validator.test.ts │ └── validator.ts ├── tsconfig.base.json ├── tsconfig.cjs.json ├── tsconfig.esm.json ├── tsconfig.json └── tsconfig.types.json └── search-input-query-react ├── .gitignore ├── README.md ├── eslint.config.js ├── index.html ├── jest.config.js ├── jest.setup.ts ├── package-lock.json ├── package.json ├── src ├── PlaceholderContentWidget.tsx ├── SearchInputQuery.test.tsx ├── SearchInputQuery.tsx ├── create-completion-item-provider.ts └── search-syntax.ts ├── tsconfig.base.json ├── tsconfig.cjs.json ├── tsconfig.esm.json ├── tsconfig.json ├── tsconfig.types.json └── vite.config.ts /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test Suite 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node-version: [23.x] 16 | package: [parser, react] 17 | include: 18 | - package: parser 19 | path: search-input-query-parser 20 | - package: react 21 | path: search-input-query-react 22 | 23 | defaults: 24 | run: 25 | working-directory: ${{ matrix.path }} 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | 30 | - name: Use Node.js ${{ matrix.node-version }} 31 | uses: actions/setup-node@v4 32 | with: 33 | node-version: ${{ matrix.node-version }} 34 | cache: 'npm' 35 | cache-dependency-path: ${{ matrix.path }}/package-lock.json 36 | 37 | - name: Install dependencies 38 | run: npm ci 39 | 40 | - name: Run tests 41 | if: matrix.package != 'demo' 42 | run: npm test 43 | 44 | - name: TypeScript check 45 | run: npm run type-check -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | parser.out 2 | parsetab.py 3 | __pycache__/ 4 | .vscode/ 5 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2024 William Cotton 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /scratchpad/FSharpNestedParens.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // Define Expression type for our AST 6 | type Expression = 7 | | Atom of string 8 | | List of Expression list 9 | 10 | // Helper to prettify the output 11 | let rec prettyPrint = function 12 | | Atom s -> s 13 | | List exprs -> 14 | let inner = exprs |> List.map prettyPrint |> String.concat " " 15 | $"({inner})" 16 | 17 | // Forward reference for recursive expressions 18 | let expr, exprImpl = createParserForwardedToRef() 19 | 20 | // Parse an atom (anything that's not whitespace or parens) 21 | let atom = 22 | many1Chars (noneOf " ()") 23 | |>> Atom 24 | 25 | // Parse a list of expressions between parentheses 26 | let list = 27 | between 28 | (pchar '(' .>> spaces) 29 | (spaces >>. pchar ')') // Consume optional spaces before ')' 30 | (sepEndBy expr spaces) // Use sepEndBy to handle spaces between expressions 31 | |>> List 32 | 33 | // Implementation of the expression parser 34 | do exprImpl.Value <- (list <|> atom) 35 | 36 | // Main entry point parser - handles whitespace and EOF 37 | let parseExpr = 38 | spaces >>. expr .>> spaces .>> eof 39 | 40 | // Function to run the parser 41 | let parseExpression input = 42 | run parseExpr input 43 | 44 | // Test function with error handling 45 | let testParse input = 46 | printfn "\nParsing: %s" input 47 | match parseExpression input with 48 | | Success (result, _, _) -> 49 | printfn "Parsed: %A" result 50 | printfn "Pretty printed: %s" (prettyPrint result) 51 | | Failure (msg, _, _) -> 52 | printfn "Error: %s" msg 53 | 54 | // Example usage with test cases 55 | let main() = 56 | let testCases = [ 57 | "((one) two three)" 58 | "((a b) (c d) e)" 59 | "(a)" 60 | "((a (b c)) d)" 61 | "(((deeply) nested) parens)" 62 | ] 63 | 64 | for test in testCases do 65 | testParse test 66 | 67 | // Run the main function 68 | main() 69 | -------------------------------------------------------------------------------- /scratchpad/FsharpExpressionFieldsParser.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // AST types 6 | type Expression = 7 | | Term of string 8 | | Field of string * string 9 | | And of Expression * Expression 10 | | Or of Expression * Expression 11 | 12 | type SearchQuery = { 13 | Expression: Expression option 14 | } 15 | 16 | // Forward declare expression parser for recursive use 17 | let expr, exprImpl = createParserForwardedToRef() 18 | 19 | // Helper to handle escaped characters in quoted strings 20 | let escapedChar = 21 | pstring "\\" >>. 22 | (choice [ 23 | pchar '"' >>% '"' // \" -> " 24 | pchar '\\' >>% '\\' // \\ -> \ 25 | anyChar // \x -> x (treat escape as nothing for any other char) 26 | ]) "escaped character" 27 | 28 | // Define reserved words 29 | let reservedWords = set ["AND"; "OR"] 30 | 31 | // Helper to check if a string is a reserved word 32 | let isReservedWord (word: string) = 33 | reservedWords.Contains(word.ToUpper()) 34 | 35 | // Helper to create a parser that fails if the input is a reserved word 36 | let notReservedWord p = 37 | p >>= fun result -> 38 | if isReservedWord result 39 | then fail (sprintf "Cannot use reserved word '%s' as identifier" result) 40 | else preturn result 41 | 42 | // Parser for quoted string that correctly handles escaping 43 | let quotedString = 44 | between 45 | (pchar '"' "opening quote '\"'") 46 | (pchar '"' "closing quote '\"'") 47 | (manyChars (escapedChar <|> noneOf "\"\\") "string content") "quoted string" 48 | 49 | // Parse a word (unquoted string - stops at whitespace, quotes, parens, or standalone AND/OR) 50 | let unquotedString = 51 | many1Chars (noneOf " \"():") 52 | |> notReservedWord "term" 53 | 54 | // Parse a field:value pair with reserved word checking for field names 55 | let fieldValue = 56 | pipe3 57 | (many1Chars (noneOf " \"():") |> notReservedWord .>> spaces "field name") 58 | (pchar ':' >>. spaces "':'") 59 | (quotedString <|> many1Chars (noneOf " \"()") "field value") 60 | (fun field _ value -> Field(field.ToLower(), value)) "field:value pair" 61 | 62 | 63 | // Parse a term (either field:value, quoted string, or unquoted string) 64 | let term = 65 | attempt fieldValue "field:value pair" 66 | <|> (quotedString |>> Term "quoted term") 67 | <|> (unquotedString |>> Term "term") 68 | 69 | // Parse logical operators with proper precedence 70 | let opp = new OperatorPrecedenceParser() 71 | 72 | let addInfixOperator str precedence associativity f = 73 | opp.AddOperator( 74 | InfixOperator(str, spaces, precedence, associativity, f) 75 | ) 76 | 77 | // Setup operator precedence 78 | do 79 | addInfixOperator "AND" 2 Associativity.Left (fun x y -> And(x, y)) 80 | addInfixOperator "OR" 1 Associativity.Left (fun x y -> Or(x, y)) 81 | 82 | // Modify combineWithAnd to always return an Expression 83 | let combineWithAnd exprs = 84 | exprs |> List.reduce (fun left right -> And(left, right)) 85 | 86 | // Parse a sequence of expressions within parentheses or a single term 87 | let primaryExpr = 88 | let parenExpr = 89 | between 90 | (pchar '(' .>> spaces "opening parenthesis '('") 91 | (pchar ')' "closing parenthesis ')'") 92 | (many1 (expr .>> spaces) 93 | |>> combineWithAnd) "expression in parentheses" 94 | 95 | (attempt parenExpr <|> term) "primary expression" 96 | .>> spaces 97 | 98 | // Implement the expression parser 99 | do exprImpl.Value <- opp.ExpressionParser .>> spaces 100 | opp.TermParser <- primaryExpr 101 | 102 | // Main parser for the full search query 103 | let searchParser = 104 | spaces >>. many1 (expr .>> spaces) .>> eof "end of search query" 105 | |>> fun exprs -> 106 | { Expression = Some (combineWithAnd exprs) } 107 | 108 | // Helper function to stringify expressions 109 | let rec stringify = function 110 | | Term value -> 111 | if value.Contains(" ") then sprintf "\"%s\"" value 112 | else value 113 | | Field(field, value) -> 114 | sprintf "%s:%s" field value 115 | | And(left, right) -> 116 | sprintf "(%s AND %s)" (stringify left) (stringify right) 117 | | Or(left, right) -> 118 | sprintf "(%s OR %s)" (stringify left) (stringify right) 119 | 120 | // Function to run the parser 121 | let parseSearchQuery (input: string) = 122 | run searchParser input 123 | 124 | // Test function 125 | let testParser input = 126 | printfn "\nParsing query: %s" input 127 | match parseSearchQuery input with 128 | | Success (result, _, _) -> 129 | printfn "%A" result 130 | match result.Expression with 131 | | Some expr -> printfn "Parsed expression: %s" (stringify expr) 132 | | None -> printfn "No main expression" 133 | | Failure (msg, _, _) -> 134 | printfn "Error: %s" msg 135 | printfn "" 136 | 137 | // Run test cases 138 | let testQueries = [ 139 | "\"red shoes\" OR ((blue OR purple) AND sneakers)" 140 | "comfortable AND (leather OR suede)" 141 | "(winter OR summer) AND boots" 142 | "boots summer" 143 | "color:red AND size:large" 144 | "category:\"winter boots\" AND (color:black OR color:brown)" 145 | "winter boots color:blue" 146 | "red boots black" 147 | "red (boots black)" 148 | "AND:value" 149 | "OR:test" 150 | "brand:\"Nike\\Air\"" 151 | "brand:\"Nike\\\"Air\"" 152 | "brand:\"Nike\\\\\"Air\"" 153 | "field: value" 154 | "field :value" 155 | "field : value" 156 | "a AND b OR c" 157 | "a OR b AND c" 158 | "a OR b OR c AND d" 159 | "" 160 | "()" 161 | "field:" 162 | ":value" 163 | "(a OR b) c d" 164 | "a AND (b OR c) AND d" 165 | "((a AND b) OR c) AND d" 166 | "status:\"pending review\"" 167 | "category:pending review" 168 | "size:large color:red status:available" 169 | "category:\"winter boots\" AND (color:black OR color:brown) AND size:12" 170 | "category:'winter boots' AND (color:black OR color:brown) AND AND:2" 171 | "category:'winter boots' AND (OR:black OR color:brown) AND AND:2" 172 | ] 173 | 174 | testQueries |> List.iter testParser 175 | -------------------------------------------------------------------------------- /scratchpad/FsharpExpressionFieldsParser.fsx.output.txt: -------------------------------------------------------------------------------- 1 | 2 | Parsing query: "red shoes" OR ((blue OR purple) AND sneakers) 3 | { Expression = 4 | Some 5 | (Or 6 | (Term "red shoes", 7 | And (Or (Term "blue", Term "purple"), Term "sneakers"))) } 8 | Parsed expression: ("red shoes" OR ((blue OR purple) AND sneakers)) 9 | 10 | 11 | Parsing query: comfortable AND (leather OR suede) 12 | { Expression = 13 | Some (And (Term "comfortable", Or (Term "leather", Term "suede"))) } 14 | Parsed expression: (comfortable AND (leather OR suede)) 15 | 16 | 17 | Parsing query: (winter OR summer) AND boots 18 | { Expression = Some (And (Or (Term "winter", Term "summer"), Term "boots")) } 19 | Parsed expression: ((winter OR summer) AND boots) 20 | 21 | 22 | Parsing query: boots summer 23 | { Expression = Some (And (Term "boots", Term "summer")) } 24 | Parsed expression: (boots AND summer) 25 | 26 | 27 | Parsing query: color:red AND size:large 28 | { Expression = Some (And (Field ("color", "red"), Field ("size", "large"))) } 29 | Parsed expression: (color:red AND size:large) 30 | 31 | 32 | Parsing query: category:"winter boots" AND (color:black OR color:brown) 33 | { Expression = 34 | Some 35 | (And 36 | (Field ("category", "winter boots"), 37 | Or (Field ("color", "black"), Field ("color", "brown")))) } 38 | Parsed expression: (category:winter boots AND (color:black OR color:brown)) 39 | 40 | 41 | Parsing query: winter boots color:blue 42 | { Expression = 43 | Some (And (And (Term "winter", Term "boots"), Field ("color", "blue"))) } 44 | Parsed expression: ((winter AND boots) AND color:blue) 45 | 46 | 47 | Parsing query: red boots black 48 | { Expression = Some (And (And (Term "red", Term "boots"), Term "black")) } 49 | Parsed expression: ((red AND boots) AND black) 50 | 51 | 52 | Parsing query: red (boots black) 53 | { Expression = Some (And (Term "red", And (Term "boots", Term "black"))) } 54 | Parsed expression: (red AND (boots AND black)) 55 | 56 | 57 | Parsing query: AND:value 58 | Error: Error in Ln: 1 Col: 4 59 | AND:value 60 | ^ 61 | Expecting: any char not in ‘ "():’ 62 | Other error messages: 63 | Cannot use reserved word 'AND' as identifier 64 | 65 | 66 | 67 | Parsing query: OR:test 68 | Error: Error in Ln: 1 Col: 3 69 | OR:test 70 | ^ 71 | Expecting: any char not in ‘ "():’ 72 | Other error messages: 73 | Cannot use reserved word 'OR' as identifier 74 | 75 | 76 | 77 | Parsing query: brand:"Nike\Air" 78 | { Expression = Some (Field ("brand", "NikeAir")) } 79 | Parsed expression: brand:NikeAir 80 | 81 | 82 | Parsing query: brand:"Nike\"Air" 83 | { Expression = Some (Field ("brand", "Nike"Air")) } 84 | Parsed expression: brand:Nike"Air 85 | 86 | 87 | Parsing query: brand:"Nike\\"Air" 88 | Error: Error in Ln: 1 Col: 19 89 | brand:"Nike\\"Air" 90 | ^ 91 | Note: The error occurred at the end of the input stream. 92 | Expecting: closing quote '"' or string content 93 | 94 | 95 | 96 | Parsing query: field: value 97 | { Expression = Some (Field ("field", "value")) } 98 | Parsed expression: field:value 99 | 100 | 101 | Parsing query: field :value 102 | { Expression = Some (Field ("field", "value")) } 103 | Parsed expression: field:value 104 | 105 | 106 | Parsing query: field : value 107 | { Expression = Some (Field ("field", "value")) } 108 | Parsed expression: field:value 109 | 110 | 111 | Parsing query: a AND b OR c 112 | { Expression = Some (Or (And (Term "a", Term "b"), Term "c")) } 113 | Parsed expression: ((a AND b) OR c) 114 | 115 | 116 | Parsing query: a OR b AND c 117 | { Expression = Some (Or (Term "a", And (Term "b", Term "c"))) } 118 | Parsed expression: (a OR (b AND c)) 119 | 120 | 121 | Parsing query: a OR b OR c AND d 122 | { Expression = Some (Or (Or (Term "a", Term "b"), And (Term "c", Term "d"))) } 123 | Parsed expression: ((a OR b) OR (c AND d)) 124 | 125 | 126 | Parsing query: 127 | Error: Error in Ln: 1 Col: 1 128 | Note: The error occurred at the end of the input stream. 129 | Expecting: end of search query 130 | 131 | 132 | 133 | Parsing query: () 134 | Error: Error in Ln: 1 Col: 1 135 | () 136 | ^ 137 | Expecting: end of search query 138 | 139 | 140 | 141 | Parsing query: field: 142 | Error: Error in Ln: 1 Col: 6 143 | field: 144 | ^ 145 | Expecting: any char not in ‘ "():’, end of input, infix operator or primary 146 | expression 147 | 148 | 149 | 150 | Parsing query: :value 151 | Error: Error in Ln: 1 Col: 1 152 | :value 153 | ^ 154 | Expecting: end of search query 155 | 156 | 157 | 158 | Parsing query: (a OR b) c d 159 | { Expression = Some (And (And (Or (Term "a", Term "b"), Term "c"), Term "d")) } 160 | Parsed expression: (((a OR b) AND c) AND d) 161 | 162 | 163 | Parsing query: a AND (b OR c) AND d 164 | { Expression = Some (And (And (Term "a", Or (Term "b", Term "c")), Term "d")) } 165 | Parsed expression: ((a AND (b OR c)) AND d) 166 | 167 | 168 | Parsing query: ((a AND b) OR c) AND d 169 | { Expression = Some (And (Or (And (Term "a", Term "b"), Term "c"), Term "d")) } 170 | Parsed expression: (((a AND b) OR c) AND d) 171 | 172 | 173 | Parsing query: status:"pending review" 174 | { Expression = Some (Field ("status", "pending review")) } 175 | Parsed expression: status:pending review 176 | 177 | 178 | Parsing query: category:pending review 179 | { Expression = Some (And (Field ("category", "pending"), Term "review")) } 180 | Parsed expression: (category:pending AND review) 181 | 182 | 183 | Parsing query: size:large color:red status:available 184 | { Expression = 185 | Some 186 | (And 187 | (And (Field ("size", "large"), Field ("color", "red")), 188 | Field ("status", "available"))) } 189 | Parsed expression: ((size:large AND color:red) AND status:available) 190 | 191 | 192 | Parsing query: category:"winter boots" AND (color:black OR color:brown) AND size:12 193 | { Expression = 194 | Some 195 | (And 196 | (And 197 | (Field ("category", "winter boots"), 198 | Or (Field ("color", "black"), Field ("color", "brown"))), 199 | Field ("size", "12"))) } 200 | Parsed expression: ((category:winter boots AND (color:black OR color:brown)) AND size:12) 201 | 202 | 203 | Parsing query: category:'winter boots' AND (color:black OR color:brown) AND AND:2 204 | Error: Error in Ln: 1 Col: 65 205 | category:'winter boots' AND (color:black OR color:brown) AND AND:2 206 | ^ 207 | Expecting: any char not in ‘ "():’ 208 | Other error messages: 209 | Cannot use reserved word 'AND' as identifier 210 | 211 | 212 | 213 | Parsing query: category:'winter boots' AND (OR:black OR color:brown) AND AND:2 214 | Error: Error in Ln: 1 Col: 29 215 | category:'winter boots' AND (OR:black OR color:brown) AND AND:2 216 | ^ 217 | Expecting: primary expression 218 | 219 | 220 | -------------------------------------------------------------------------------- /scratchpad/FsharpExpressionParser.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // AST types 6 | type Expression = 7 | | Term of string 8 | | And of Expression * Expression 9 | | Or of Expression * Expression 10 | 11 | type SearchQuery = { 12 | Expression: Expression option 13 | } 14 | 15 | // Forward declare expression parser for recursive use 16 | let expr, exprImpl = createParserForwardedToRef() 17 | 18 | // Parse a quoted string - handles escaped quotes 19 | let quotedString = 20 | between 21 | (pchar '"') 22 | (pchar '"') 23 | (manyChars (noneOf "\"" <|> (pstring "\\\"" >>% '"'))) 24 | 25 | // Parse a word (unquoted string - stops at whitespace, quotes, parens, or standalone AND/OR) 26 | let unquotedString = 27 | many1Chars (noneOf " \"()") 28 | 29 | // Parse a term (either quoted or unquoted) 30 | let term = 31 | (quotedString <|> unquotedString) |>> Term 32 | 33 | 34 | // Parse logical operators with proper precedence 35 | let opp = new OperatorPrecedenceParser() 36 | 37 | let addInfixOperator str precedence associativity f = 38 | opp.AddOperator( 39 | InfixOperator(str, spaces, precedence, associativity, f) 40 | ) 41 | 42 | // Setup operator precedence 43 | do 44 | addInfixOperator "AND" 2 Associativity.Left (fun x y -> And(x, y)) 45 | addInfixOperator "OR" 1 Associativity.Left (fun x y -> Or(x, y)) 46 | 47 | // Parse a primary expression (term, field:value, or parenthesized expression) 48 | let primaryExpr = 49 | (between (pchar '(' .>> spaces) (pchar ')') expr) 50 | <|> term 51 | .>> spaces 52 | 53 | // Implement the expression parser 54 | do exprImpl.Value <- opp.ExpressionParser .>> spaces 55 | opp.TermParser <- primaryExpr 56 | 57 | // Main parser for the full search query 58 | let searchParser = 59 | spaces >>. many expr .>> spaces .>> eof 60 | |>> fun exprs -> 61 | let mainExpr = 62 | match exprs with 63 | | [] -> None 64 | | [single] -> Some single 65 | | multiple -> 66 | Some(multiple |> List.reduce (fun left right -> Or(left, right))) 67 | 68 | { Expression = mainExpr; } 69 | 70 | // Helper function to stringify expressions 71 | let rec stringify = function 72 | | Term value -> 73 | if value.Contains(" ") then sprintf "\"%s\"" value 74 | else value 75 | | And(left, right) -> sprintf "(%s AND %s)" (stringify left) (stringify right) 76 | | Or(left, right) -> sprintf "(%s OR %s)" (stringify left) (stringify right) 77 | 78 | // Function to run the parser 79 | let parseSearchQuery (input: string) = 80 | run searchParser input 81 | 82 | // Test function 83 | let testParser input = 84 | printfn "\nParsing query: %s" input 85 | match parseSearchQuery input with 86 | | Success (result, _, _) -> 87 | match result.Expression with 88 | | Some expr -> printfn "Parsed expression: %s" (stringify expr) 89 | | None -> printfn "No main expression" 90 | | Failure (msg, _, _) -> 91 | printfn "Error: %s" msg 92 | printfn "" 93 | 94 | // Run test cases 95 | let testCases = [ 96 | "\"red shoes\" OR ((blue OR purple) AND sneakers)" 97 | "comfortable AND (leather OR suede)" 98 | "(winter OR summer) AND boots" 99 | "boots summer" 100 | ] 101 | 102 | testCases |> List.iter testParser -------------------------------------------------------------------------------- /scratchpad/FsharpParser.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "dotnet_interactive": { 8 | "language": "fsharp" 9 | }, 10 | "polyglot_notebook": { 11 | "kernelName": "fsharp" 12 | } 13 | }, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/html": [ 18 | "
Installed Packages
  • FParsec, 1.1.1
" 19 | ] 20 | }, 21 | "metadata": {}, 22 | "output_type": "display_data" 23 | } 24 | ], 25 | "source": [ 26 | "#r \"nuget: FParsec\"\n", 27 | "open FParsec" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "dotnet_interactive": { 35 | "language": "fsharp" 36 | }, 37 | "polyglot_notebook": { 38 | "kernelName": "fsharp" 39 | } 40 | }, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "Success: \"one\"\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "let searchTerm =\n", 52 | " many1Chars (noneOf \" \")\n", 53 | "\n", 54 | "printfn \"%A\" (run searchTerm \"one two three\")" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 26, 60 | "metadata": { 61 | "dotnet_interactive": { 62 | "language": "fsharp" 63 | }, 64 | "polyglot_notebook": { 65 | "kernelName": "fsharp" 66 | } 67 | }, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "Success: [\"one\"; \"two\"; \"three\"]\n", 74 | "Success: [\"one\"; \"two\"; \"three\"]\n", 75 | "Success: []\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "let searchParser =\n", 81 | " many (searchTerm .>> spaces)\n", 82 | "\n", 83 | "printfn \"%A\" (run searchParser \"one two three\")\n", 84 | "printfn \"%A\" (run searchParser \"one two three\")\n", 85 | "printfn \"%A\" (run searchParser \" one two three \")" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 27, 91 | "metadata": { 92 | "dotnet_interactive": { 93 | "language": "fsharp" 94 | }, 95 | "polyglot_notebook": { 96 | "kernelName": "fsharp" 97 | } 98 | }, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "Success: [\"one\"; \"two\"; \"three\"]\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "let searchQuery =\n", 110 | " spaces >>. searchParser .>> eof\n", 111 | "\n", 112 | "printfn \"%A\" (run searchQuery \" one two three \")\n" 113 | ] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": ".NET (C#)", 119 | "language": "C#", 120 | "name": ".net-csharp" 121 | }, 122 | "polyglot_notebook": { 123 | "kernelInfo": { 124 | "defaultKernelName": "csharp", 125 | "items": [ 126 | { 127 | "aliases": [], 128 | "name": "csharp" 129 | } 130 | ] 131 | } 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | -------------------------------------------------------------------------------- /scratchpad/FsharpQuotedStringParser.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // Define the types 6 | type SearchQuery = 7 | { SearchTerms: string list } 8 | 9 | // Parser for quoted string - handles escaped quotes 10 | let quotedString = 11 | between 12 | (pchar '"') 13 | (pchar '"') 14 | (manyChars (noneOf "\"" <|> (pstring "\\\"" >>% '"'))) 15 | 16 | // Parser for unquoted string (no spaces) 17 | let unquotedString = 18 | many1Chars (noneOf " ") 19 | 20 | // Parser for search terms 21 | let searchTerm = 22 | (quotedString <|> unquotedString) 23 | 24 | // Main parser 25 | let searchParser : Parser = 26 | many (searchTerm .>> spaces) |>> fun searchTerms -> 27 | { 28 | SearchTerms = searchTerms 29 | } 30 | 31 | // Function to run the parser 32 | let parseSearchQuery (input: string) = 33 | run (spaces >>. searchParser .>> eof) input 34 | 35 | // Test function 36 | let testParser input = 37 | printfn "Parsing query: %s" input 38 | match parseSearchQuery input with 39 | | Success (result, _, _) -> 40 | printfn "Search terms: %A" result.SearchTerms 41 | | Failure (msg, _, _) -> 42 | printfn "Error: %s" msg 43 | printfn "" 44 | 45 | testParser "\"red shoes\"" 46 | testParser "red shoes" 47 | testParser "comfortable red shoes" 48 | testParser "\"red winter shoes\" warm cozy" 49 | -------------------------------------------------------------------------------- /scratchpad/FsharpRecursiveSearch.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // AST types 6 | type Expression = 7 | | Term of string 8 | | Field of string * string 9 | | And of Expression * Expression 10 | | Or of Expression * Expression 11 | 12 | type SearchQuery = { 13 | Expression: Expression option 14 | Fields: Map 15 | } 16 | 17 | // Forward declare expression parser for recursive use 18 | let expr, exprImpl = createParserForwardedToRef() 19 | 20 | // Parse a quoted string - handles escaped quotes 21 | let quotedString = 22 | between 23 | (pchar '"') 24 | (pchar '"') 25 | (manyChars (noneOf "\"" <|> (pstring "\\\"" >>% '"'))) 26 | 27 | // Parse a word (unquoted string - stops at whitespace, quotes, parens, or standalone AND/OR) 28 | let word = 29 | many1Chars (noneOf " \"():") 30 | 31 | // Parse a term (either quoted or unquoted) 32 | let term = 33 | (quotedString <|> word) |>> Term 34 | 35 | // Parse a field:value pair 36 | let fieldValue = 37 | pipe2 38 | (many1Chars letter .>> pchar ':') 39 | (quotedString <|> word) 40 | (fun field value -> Field(field.ToLower(), value)) 41 | 42 | // Parse logical operators with proper precedence 43 | let opp = new OperatorPrecedenceParser() 44 | 45 | let addInfixOperator str precedence associativity f = 46 | opp.AddOperator( 47 | InfixOperator(str, spaces, precedence, associativity, f) 48 | ) 49 | 50 | // Setup operator precedence 51 | do 52 | addInfixOperator "AND" 2 Associativity.Left (fun x y -> And(x, y)) 53 | addInfixOperator "OR" 1 Associativity.Left (fun x y -> Or(x, y)) 54 | 55 | // Parse a primary expression (term, field:value, or parenthesized expression) 56 | let primaryExpr = 57 | (between (pchar '(' .>> spaces) (pchar ')') expr) 58 | <|> attempt fieldValue 59 | <|> term 60 | .>> spaces 61 | 62 | // Implement the expression parser 63 | do exprImpl.Value <- opp.ExpressionParser .>> spaces 64 | opp.TermParser <- primaryExpr 65 | 66 | // Main parser for the full search query 67 | let searchParser = 68 | spaces >>. many expr .>> spaces .>> eof 69 | |>> fun exprs -> 70 | let fields = 71 | exprs 72 | |> List.choose (function 73 | | Field(key, value) -> Some(key, value) 74 | | _ -> None) 75 | |> Map.ofList 76 | 77 | let nonFieldExprs = 78 | exprs 79 | |> List.filter (function 80 | | Field _ -> false 81 | | _ -> true) 82 | 83 | let mainExpr = 84 | match nonFieldExprs with 85 | | [] -> None 86 | | [single] -> Some single 87 | | multiple -> 88 | Some(multiple |> List.reduce (fun left right -> And(left, right))) 89 | 90 | { Expression = mainExpr; Fields = fields } 91 | 92 | // Helper function to stringify expressions 93 | let rec stringify = function 94 | | Term value -> 95 | if value.Contains(" ") then sprintf "\"%s\"" value 96 | else value 97 | | Field(key, value) -> sprintf "%s:%s" key value 98 | | And(left, right) -> sprintf "(%s AND %s)" (stringify left) (stringify right) 99 | | Or(left, right) -> sprintf "(%s OR %s)" (stringify left) (stringify right) 100 | 101 | // Function to run the parser 102 | let parseSearchQuery (input: string) = 103 | run searchParser input 104 | 105 | // Test function 106 | let testParser input = 107 | printfn "\nParsing query: %s" input 108 | match parseSearchQuery input with 109 | | Success (result, _, _) -> 110 | match result.Expression with 111 | | Some expr -> printfn "Parsed expression: %s" (stringify expr) 112 | | None -> printfn "No main expression" 113 | printfn "Fields:" 114 | result.Fields |> Map.iter (fun k v -> printfn " %s: %s" k v) 115 | | Failure (msg, _, _) -> 116 | printfn "Error: %s" msg 117 | printfn "" 118 | 119 | // Run test cases 120 | let testCases = [ 121 | "\"red shoes\" OR ((blue OR purple) AND sneakers) size:10 category:footwear" 122 | "comfortable AND (leather OR suede) brand:nike" 123 | "(winter OR summer) AND boots size:8" 124 | "(size:8 AND brand:nike)" 125 | ] 126 | 127 | testCases |> List.iter testParser -------------------------------------------------------------------------------- /scratchpad/FsharpRecursiveSearchErrors.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // AST types 6 | type Expression = 7 | | Term of string 8 | | And of Expression * Expression 9 | | Or of Expression * Expression 10 | 11 | type SearchQuery = { 12 | Expression: Expression option 13 | Fields: Map 14 | } 15 | 16 | // Forward declare expression parser for recursive use 17 | let expr, exprImpl = createParserForwardedToRef() 18 | 19 | // Parse a quoted string - handles escaped quotes 20 | let quotedString = 21 | between 22 | (pchar '"') 23 | (pchar '"') 24 | (manyChars (noneOf "\"" <|> (pstring "\\\"" >>% '"'))) 25 | 26 | // Parse a word (unquoted string - stops at whitespace, quotes, parens, or standalone AND/OR) 27 | let word = 28 | many1Chars (noneOf " \"()") >>= fun s -> 29 | if s.Contains(":") then 30 | fail "Fields cannot be used inside expressions. Please remove field:value pairs from expressions." 31 | else 32 | preturn s 33 | 34 | let term = 35 | (quotedString |>> Term) 36 | <|> (word |>> Term) 37 | 38 | // Parse a field:value pair 39 | let fieldValue = 40 | pipe2 41 | (many1Chars letter .>> pchar ':') 42 | (quotedString <|> word) 43 | (fun field value -> (field.ToLower(), value)) 44 | 45 | // Parse logical operators with proper precedence 46 | let opp = new OperatorPrecedenceParser() 47 | 48 | let addInfixOperator str precedence associativity f = 49 | opp.AddOperator( 50 | InfixOperator(str, spaces, precedence, associativity, f) 51 | ) 52 | 53 | // Setup operator precedence 54 | do 55 | addInfixOperator "AND" 2 Associativity.Left (fun x y -> And(x, y)) 56 | addInfixOperator "OR" 1 Associativity.Left (fun x y -> Or(x, y)) 57 | 58 | // Parse a primary expression (term or parenthesized expression) 59 | let primaryExpr = 60 | (between (pchar '(' .>> spaces) (pchar ')') expr) 61 | <|> term 62 | .>> spaces 63 | 64 | // Implement the expression parser 65 | do exprImpl.Value <- opp.ExpressionParser .>> spaces 66 | opp.TermParser <- primaryExpr 67 | 68 | // Parse either an expression or a field-value at the top level 69 | let exprOrField = 70 | (attempt (fieldValue |>> fun fv -> Choice2Of2 fv)) 71 | <|> (expr |>> fun e -> Choice1Of2 e) 72 | .>> spaces 73 | 74 | // Main parser for the full search query 75 | let searchParser = 76 | spaces >>. many exprOrField .>> eof 77 | |>> fun items -> 78 | let fields = 79 | items 80 | |> List.choose (function 81 | | Choice2Of2 (key, value) -> Some (key, value) 82 | | _ -> None) 83 | |> Map.ofList 84 | 85 | let expressions = 86 | items 87 | |> List.choose (function 88 | | Choice1Of2 e -> Some e 89 | | _ -> None) 90 | 91 | let mainExpr = 92 | match expressions with 93 | | [] -> None 94 | | [single] -> Some single 95 | | multiple -> Some (multiple |> List.reduce (fun left right -> And(left, right))) 96 | 97 | { Expression = mainExpr; Fields = fields } 98 | 99 | // Helper function to stringify expressions 100 | let rec stringify = function 101 | | Term value -> 102 | if value.Contains(" ") then sprintf "\"%s\"" value 103 | else value 104 | | And(left, right) -> sprintf "(%s AND %s)" (stringify left) (stringify right) 105 | | Or(left, right) -> sprintf "(%s OR %s)" (stringify left) (stringify right) 106 | 107 | // Function to run the parser 108 | let parseSearchQuery (input: string) = 109 | run searchParser input 110 | 111 | // Test function 112 | let testParser input = 113 | printfn "\nParsing query: %s" input 114 | match parseSearchQuery input with 115 | | Success (result, _, _) -> 116 | match result.Expression with 117 | | Some expr -> printfn "Parsed expression: %s" (stringify expr) 118 | | None -> printfn "No main expression" 119 | printfn "Fields:" 120 | result.Fields |> Map.iter (fun k v -> printfn " %s: %s" k v) 121 | | Failure (msg, _, _) -> 122 | printfn "Error: %s" msg 123 | printfn "" 124 | 125 | // Run test cases 126 | let testCases = [ 127 | "\"red shoes\" OR ((blue OR purple) AND sneakers) size:10 category:footwear" 128 | "comfortable AND (leather OR suede) brand:nike" 129 | "(winter OR summer) AND boots size:8" 130 | "(size:8 AND brand:nike)" 131 | ] 132 | 133 | testCases |> List.iter testParser -------------------------------------------------------------------------------- /scratchpad/FsharpSearchStringParser.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // Define the types 6 | type SearchQuery = 7 | { SearchTerms: string list 8 | Fields: Map } 9 | 10 | // Parser for quoted string - handles escaped quotes 11 | let quotedString = 12 | between 13 | (pchar '"') 14 | (pchar '"') 15 | (manyChars (noneOf "\"" <|> (pstring "\\\"" >>% '"'))) 16 | 17 | // Parser for unquoted string (no spaces) 18 | let unquotedString = 19 | many1Chars (noneOf " :") 20 | 21 | // Parser for field:value pairs 22 | let fieldValue = 23 | pipe2 24 | (many1Chars letter .>> pchar ':') 25 | (quotedString <|> unquotedString) 26 | (fun field value -> Choice1Of2 (field.ToLower(), value)) 27 | 28 | // Parser for search terms 29 | let searchTerm = 30 | (quotedString <|> unquotedString) 31 | |>> Choice2Of2 32 | 33 | // Parser for tokens (either field:value or search term) 34 | let token = 35 | attempt fieldValue <|> searchTerm 36 | 37 | // Main parser 38 | let searchParser : Parser = 39 | many (token .>> spaces) |>> fun tokens -> 40 | let fields, searchTerms = 41 | tokens |> List.fold (fun (fields, terms) token -> 42 | match token with 43 | | Choice1Of2 (field, value) -> (Map.add field value fields, terms) 44 | | Choice2Of2 term -> (fields, term :: terms) 45 | ) (Map.empty, []) 46 | { 47 | SearchTerms = List.rev searchTerms 48 | Fields = fields 49 | } 50 | 51 | // Function to run the parser 52 | let parseSearchQuery (input: string) = 53 | run (spaces >>. searchParser .>> eof) input 54 | 55 | // Test function 56 | let testParser input = 57 | printfn "Parsing query: %s" input 58 | match parseSearchQuery input with 59 | | Success (result, _, _) -> 60 | printfn "Search terms: %A" result.SearchTerms 61 | printfn "Fields:" 62 | result.Fields |> Map.iter (fun k v -> printfn " %s: %s" k v) 63 | | Failure (msg, _, _) -> 64 | printfn "Error: %s" msg 65 | printfn "" 66 | 67 | testParser "\"red shoes\" category:clothing size:10 color:red brand:nike" 68 | testParser "red shoes category:clothing size:10 color:red brand:nike" 69 | testParser "comfortable red shoes category:clothing size:10" 70 | testParser "category:clothing \"red winter shoes\" warm cozy" 71 | testParser "\"quoted term\" another term yet:another" 72 | -------------------------------------------------------------------------------- /scratchpad/FsharpStringParser.fsx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dotnet fsi 2 | #r "nuget: FParsec" 3 | open FParsec 4 | 5 | // Define the types 6 | type SearchQuery = 7 | { SearchTerms: string list } 8 | 9 | // Parser for search terms 10 | let searchTerm = 11 | many1Chars (noneOf " ") 12 | 13 | // Main parser 14 | let searchParser : Parser = 15 | many (searchTerm .>> spaces) |>> fun searchTerms -> 16 | { 17 | SearchTerms = searchTerms 18 | } 19 | 20 | // Function to run the parser 21 | let parseSearchQuery (input: string) = 22 | run (spaces >>. searchParser .>> eof) input 23 | 24 | // Test function 25 | let testParser input = 26 | printfn "Parsing query: %s" input 27 | match parseSearchQuery input with 28 | | Success (result, _, _) -> 29 | printfn "Search terms: %A" result.SearchTerms 30 | | Failure (msg, _, _) -> 31 | printfn "Error: %s" msg 32 | printfn "" 33 | 34 | testParser "red shoes" 35 | testParser "comfortable red shoes" 36 | -------------------------------------------------------------------------------- /scratchpad/PythonParser.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from ply import lex, yacc" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "213\n" 22 | ] 23 | } 24 | ], 25 | "source": [] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [] 33 | } 34 | ], 35 | "metadata": { 36 | "kernelspec": { 37 | "display_name": "Python 3", 38 | "language": "python", 39 | "name": "python3" 40 | }, 41 | "language_info": { 42 | "codemirror_mode": { 43 | "name": "ipython", 44 | "version": 3 45 | }, 46 | "file_extension": ".py", 47 | "mimetype": "text/x-python", 48 | "name": "python", 49 | "nbconvert_exporter": "python", 50 | "pygments_lexer": "ipython3", 51 | "version": "3.9.15" 52 | } 53 | }, 54 | "nbformat": 4, 55 | "nbformat_minor": 2 56 | } 57 | -------------------------------------------------------------------------------- /scratchpad/javascript-quoted-string-parser.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | class Parser { 4 | constructor(input) { 5 | this.input = input; 6 | this.position = 0; 7 | } 8 | 9 | // Main function to start parsing 10 | parse() { 11 | const searchTerms = []; 12 | this.skipWhitespace(); 13 | 14 | while (!this.atEnd()) { 15 | const savedPosition = this.position; 16 | 17 | // Try to parse a search term 18 | try { 19 | const term = this.parseTerm(); 20 | searchTerms.push(term); 21 | this.skipWhitespace(); 22 | } catch (e) { 23 | // If we can't parse a term, we can't proceed further 24 | break; 25 | } 26 | } 27 | 28 | return { searchTerms }; 29 | } 30 | 31 | // Helper to check if we've reached the end of the input 32 | atEnd() { 33 | return this.position >= this.input.length; 34 | } 35 | 36 | // Helper to consume whitespace 37 | skipWhitespace() { 38 | while (!this.atEnd() && /\s/.test(this.input[this.position])) { 39 | this.position++; 40 | } 41 | } 42 | 43 | // Parse a search term (quoted or unquoted) 44 | parseTerm() { 45 | this.skipWhitespace(); 46 | 47 | if (this.atEnd()) { 48 | throw new Error("Unexpected end of input"); 49 | } 50 | 51 | if (this.input[this.position] === '"') { 52 | return this.parseQuotedTerm(); 53 | } else { 54 | return this.parseWord(); 55 | } 56 | } 57 | 58 | // Parse a quoted term 59 | parseQuotedTerm() { 60 | if (this.atEnd() || this.input[this.position] !== '"') { 61 | throw new Error("Expected opening quote for search term"); 62 | } 63 | this.position++; // Skip opening quote 64 | 65 | let term = ""; 66 | while (!this.atEnd()) { 67 | if (this.input[this.position] === "\\") { 68 | this.position++; 69 | if (this.atEnd()) { 70 | throw new Error("Unexpected end of input after escape character"); 71 | } 72 | term += this.input[this.position]; 73 | } else if (this.input[this.position] === '"') { 74 | break; 75 | } else { 76 | term += this.input[this.position]; 77 | } 78 | this.position++; 79 | } 80 | 81 | if (this.atEnd() || this.input[this.position] !== '"') { 82 | throw new Error("Expected closing quote for search term"); 83 | } 84 | 85 | this.position++; // Skip closing quote 86 | return term; 87 | } 88 | 89 | // Parse a word (unquoted term) 90 | parseWord() { 91 | this.skipWhitespace(); 92 | let start = this.position; 93 | while ( 94 | !this.atEnd() && 95 | !/\s/.test(this.input[this.position]) 96 | ) { 97 | this.position++; 98 | } 99 | if (start === this.position) { 100 | throw new Error("Expected a word"); 101 | } 102 | return this.input.slice(start, this.position); 103 | } 104 | 105 | // Expect a specific character and move position forward 106 | expect(char) { 107 | this.skipWhitespace(); 108 | if (this.atEnd() || this.input[this.position] !== char) { 109 | throw new Error( 110 | `Expected '${char}' but found '${ 111 | this.atEnd() ? "end of input" : this.input[this.position] 112 | }'` 113 | ); 114 | } 115 | this.position++; 116 | } 117 | } 118 | 119 | // Test queries 120 | const test_queries = [ 121 | '"red shoes"', 122 | "red shoes", 123 | "comfortable red shoes", 124 | '"red winter shoes" warm cozy', 125 | '"quoted term" another term "yet another"', 126 | ]; 127 | 128 | // Run tests 129 | for (const query of test_queries) { 130 | console.log("\nParsing query:", query); 131 | try { 132 | const parser = new Parser(query); 133 | const result = parser.parse(); 134 | console.log("Search terms:", result.searchTerms); 135 | } catch (error) { 136 | console.error("Error parsing query:", error.message); 137 | } 138 | } 139 | 140 | // Export the Parser class if using as a module 141 | if (typeof module !== "undefined" && module.exports) { 142 | module.exports = Parser; 143 | } 144 | -------------------------------------------------------------------------------- /scratchpad/javascript-search-string-parser.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | class Parser { 4 | constructor(input) { 5 | this.input = input; 6 | this.position = 0; 7 | } 8 | 9 | // Main function to start parsing 10 | parse() { 11 | const searchTerms = []; 12 | const fields = {}; 13 | this.skipWhitespace(); 14 | 15 | while (!this.atEnd()) { 16 | const savedPosition = this.position; 17 | 18 | // Try to parse a field:value pair 19 | try { 20 | const key = this.parseWord().toLowerCase(); 21 | this.expect(":"); 22 | const value = this.parseValue(); 23 | fields[key] = value; 24 | this.skipWhitespace(); 25 | continue; 26 | } catch (e) { 27 | this.position = savedPosition; // Reset position 28 | } 29 | 30 | // Try to parse a search term 31 | try { 32 | const term = this.parseTerm(); 33 | searchTerms.push(term); 34 | this.skipWhitespace(); 35 | } catch (e) { 36 | // If we can't parse a term, we can't proceed further 37 | break; 38 | } 39 | } 40 | 41 | return { searchTerms, fields }; 42 | } 43 | 44 | // Helper to check if we've reached the end of the input 45 | atEnd() { 46 | return this.position >= this.input.length; 47 | } 48 | 49 | // Helper to consume whitespace 50 | skipWhitespace() { 51 | while (!this.atEnd() && /\s/.test(this.input[this.position])) { 52 | this.position++; 53 | } 54 | } 55 | 56 | // Parse a search term (quoted or unquoted) 57 | parseTerm() { 58 | this.skipWhitespace(); 59 | 60 | if (this.atEnd()) { 61 | throw new Error("Unexpected end of input"); 62 | } 63 | 64 | if (this.input[this.position] === '"') { 65 | return this.parseQuotedTerm(); 66 | } else { 67 | return this.parseWord(); 68 | } 69 | } 70 | 71 | // Parse a value (quoted or unquoted) 72 | parseValue() { 73 | this.skipWhitespace(); 74 | 75 | if (this.atEnd()) { 76 | throw new Error("Unexpected end of input"); 77 | } 78 | 79 | if (this.input[this.position] === '"') { 80 | return this.parseQuotedTerm(); 81 | } else { 82 | return this.parseWord(); 83 | } 84 | } 85 | 86 | // Parse a quoted term 87 | parseQuotedTerm() { 88 | if (this.atEnd() || this.input[this.position] !== '"') { 89 | throw new Error("Expected opening quote for search term"); 90 | } 91 | this.position++; // Skip opening quote 92 | 93 | let term = ""; 94 | while (!this.atEnd()) { 95 | if (this.input[this.position] === "\\") { 96 | this.position++; 97 | if (this.atEnd()) { 98 | throw new Error("Unexpected end of input after escape character"); 99 | } 100 | term += this.input[this.position]; 101 | } else if (this.input[this.position] === '"') { 102 | break; 103 | } else { 104 | term += this.input[this.position]; 105 | } 106 | this.position++; 107 | } 108 | 109 | if (this.atEnd() || this.input[this.position] !== '"') { 110 | throw new Error("Expected closing quote for search term"); 111 | } 112 | 113 | this.position++; // Skip closing quote 114 | return term; 115 | } 116 | 117 | // Parse a word (unquoted term) 118 | parseWord() { 119 | this.skipWhitespace(); 120 | let start = this.position; 121 | while ( 122 | !this.atEnd() && 123 | !/\s/.test(this.input[this.position]) && 124 | this.input[this.position] !== ":" 125 | ) { 126 | this.position++; 127 | } 128 | if (start === this.position) { 129 | throw new Error("Expected a word"); 130 | } 131 | return this.input.slice(start, this.position); 132 | } 133 | 134 | // Expect a specific character and move position forward 135 | expect(char) { 136 | this.skipWhitespace(); 137 | if (this.atEnd() || this.input[this.position] !== char) { 138 | throw new Error( 139 | `Expected '${char}' but found '${ 140 | this.atEnd() ? "end of input" : this.input[this.position] 141 | }'` 142 | ); 143 | } 144 | this.position++; 145 | } 146 | } 147 | 148 | // Test queries 149 | const test_queries = [ 150 | '"red shoes" category:clothing size:10 color:red brand:nike', 151 | "red shoes category:clothing size:10 color:red brand:nike", 152 | "comfortable red shoes category:clothing size:10", 153 | 'category:clothing "red winter shoes" warm cozy', 154 | '"quoted term" another term yet:another', 155 | ]; 156 | 157 | // Run tests 158 | for (const query of test_queries) { 159 | console.log("\nParsing query:", query); 160 | try { 161 | const parser = new Parser(query); 162 | const result = parser.parse(); 163 | console.log("Search terms:", result.searchTerms); 164 | console.log("Fields:"); 165 | for (const [key, value] of Object.entries(result.fields)) { 166 | console.log(` ${key}: ${value}`); 167 | } 168 | } catch (error) { 169 | console.error("Error parsing query:", error.message); 170 | } 171 | } 172 | 173 | // Export the Parser class if using as a module 174 | if (typeof module !== "undefined" && module.exports) { 175 | module.exports = Parser; 176 | } 177 | -------------------------------------------------------------------------------- /scratchpad/javascript-string-parser.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | class Parser { 4 | constructor(input) { 5 | this.input = input; 6 | this.position = 0; 7 | } 8 | 9 | // Main function to start parsing 10 | parse() { 11 | const searchTerms = []; 12 | this.skipWhitespace(); 13 | 14 | while (!this.atEnd()) { 15 | const savedPosition = this.position; 16 | 17 | // Try to parse a search term 18 | try { 19 | const term = this.parseTerm(); 20 | searchTerms.push(term); 21 | this.skipWhitespace(); 22 | } catch (e) { 23 | // If we can't parse a term, we can't proceed further 24 | break; 25 | } 26 | } 27 | 28 | return { searchTerms }; 29 | } 30 | 31 | // Helper to check if we've reached the end of the input 32 | atEnd() { 33 | return this.position >= this.input.length; 34 | } 35 | 36 | // Helper to consume whitespace 37 | skipWhitespace() { 38 | while (!this.atEnd() && /\s/.test(this.input[this.position])) { 39 | this.position++; 40 | } 41 | } 42 | 43 | // Parse a search term (quoted or unquoted) 44 | parseTerm() { 45 | this.skipWhitespace(); 46 | let start = this.position; 47 | while (!this.atEnd() && !/\s/.test(this.input[this.position])) { 48 | this.position++; 49 | } 50 | if (start === this.position) { 51 | throw new Error("Expected a word"); 52 | } 53 | return this.input.slice(start, this.position); 54 | } 55 | } 56 | 57 | // Test queries 58 | const test_queries = [ 59 | "red shoes", 60 | "comfortable red shoes" 61 | ]; 62 | 63 | // Run tests 64 | for (const query of test_queries) { 65 | console.log("\nParsing query:", query); 66 | try { 67 | const parser = new Parser(query); 68 | const result = parser.parse(); 69 | console.log("Search terms:", result.searchTerms); 70 | } catch (error) { 71 | console.error("Error parsing query:", error.message); 72 | } 73 | } 74 | 75 | // Export the Parser class if using as a module 76 | if (typeof module !== "undefined" && module.exports) { 77 | module.exports = Parser; 78 | } 79 | -------------------------------------------------------------------------------- /scratchpad/python_expression_fields_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Dict, List, NamedTuple, Optional, Union 4 | from dataclasses import dataclass 5 | 6 | # AST type definitions 7 | @dataclass 8 | class Term: 9 | value: str 10 | 11 | @dataclass 12 | class And: 13 | left: 'Expression' 14 | right: 'Expression' 15 | 16 | @dataclass 17 | class Or: 18 | left: 'Expression' 19 | right: 'Expression' 20 | 21 | @dataclass 22 | class Field: 23 | key: str 24 | value: str 25 | 26 | # Expression can be any of our AST node types 27 | Expression = Union[Term, And, Or, Field] 28 | 29 | class SearchQuery(NamedTuple): 30 | expression: Optional[Expression] 31 | 32 | # Lexer rules 33 | tokens = ( 34 | 'QUOTED_STRING', 35 | 'UNQUOTED_STRING', 36 | 'LPAREN', 37 | 'RPAREN', 38 | 'AND', 39 | 'OR', 40 | 'COLON', 41 | ) 42 | 43 | # Simple tokens 44 | t_LPAREN = r'\(' 45 | t_RPAREN = r'\)' 46 | t_COLON = r':' 47 | 48 | t_ignore = ' \t\n\r' # whitespace 49 | 50 | def t_QUOTED_STRING(t): 51 | r'"([^"\\]|\\.)*"' 52 | t.value = t.value[1:-1].replace(r'\"', '"').replace(r'\\', '\\') 53 | return t 54 | 55 | def t_AND(t): 56 | r'AND(?=[\s()])' 57 | return t 58 | 59 | def t_OR(t): 60 | r'OR(?=[\s()])' 61 | return t 62 | 63 | def t_UNQUOTED_STRING(t): 64 | r'[^\s"():]+' 65 | if t.value in ('AND', 'OR'): 66 | t.type = t.value 67 | return t 68 | 69 | def t_error(t): 70 | print(f"Illegal character '{t.value[0]}'") 71 | # t.lexer.skip(1) 72 | 73 | # Parser rules 74 | precedence = ( 75 | ('left', 'OR'), 76 | ('left', 'AND'), 77 | ) 78 | 79 | def p_search_query(p): 80 | ''' 81 | search_query : expression 82 | | empty 83 | ''' 84 | p[0] = SearchQuery(expression=p[1]) 85 | 86 | def p_empty(p): 87 | ''' 88 | empty : 89 | ''' 90 | p[0] = None 91 | 92 | def p_expression(p): 93 | ''' 94 | expression : primary 95 | | expression AND expression 96 | | expression OR expression 97 | ''' 98 | if len(p) == 2: 99 | p[0] = p[1] 100 | elif p[2] == 'AND': 101 | p[0] = And(left=p[1], right=p[3]) 102 | else: # OR 103 | p[0] = Or(left=p[1], right=p[3]) 104 | 105 | def p_primary(p): 106 | ''' 107 | primary : term 108 | | field_value 109 | | LPAREN expression RPAREN 110 | | primary primary 111 | ''' 112 | if len(p) == 2: 113 | p[0] = p[1] 114 | elif len(p) == 3: 115 | # Implicit AND between adjacent terms 116 | p[0] = And(left=p[1], right=p[2]) 117 | else: 118 | p[0] = p[2] # For parenthesized expressions 119 | 120 | def p_term(p): 121 | ''' 122 | term : QUOTED_STRING 123 | | UNQUOTED_STRING 124 | ''' 125 | p[0] = Term(value=p[1]) 126 | 127 | def p_field_value(p): 128 | ''' 129 | field_value : field_name COLON field_content 130 | ''' 131 | p[0] = Field(key=p[1].lower(), value=p[3]) 132 | 133 | def p_field_name(p): 134 | ''' 135 | field_name : UNQUOTED_STRING 136 | ''' 137 | p[0] = p[1] 138 | 139 | def p_field_content(p): 140 | ''' 141 | field_content : QUOTED_STRING 142 | | UNQUOTED_STRING 143 | ''' 144 | p[0] = p[1] 145 | 146 | def p_error(p): 147 | if p: 148 | print(f"Syntax error at '{p.value}'") 149 | else: 150 | print("Syntax error at EOF") 151 | 152 | def stringify(expr: Expression) -> str: 153 | """Convert expression back to string form.""" 154 | if isinstance(expr, Term): 155 | return f'"{expr.value}"' if ' ' in expr.value else expr.value 156 | elif isinstance(expr, And): 157 | return f"({stringify(expr.left)} AND {stringify(expr.right)})" 158 | elif isinstance(expr, Or): 159 | return f"({stringify(expr.left)} OR {stringify(expr.right)})" 160 | elif isinstance(expr, Field): 161 | value = f'"{expr.value}"' if ' ' in expr.value else expr.value 162 | return f"{expr.key}:{value}" 163 | else: 164 | raise ValueError(f"Unknown expression type: {type(expr)}") 165 | 166 | def parse_search_query(query: str) -> SearchQuery: 167 | """ 168 | Parse a search query string into a SearchQuery object. 169 | 170 | Args: 171 | query: The search query string to parse 172 | Returns: 173 | SearchQuery object containing the parsed expression 174 | Raises: 175 | Exception: If parsing fails 176 | """ 177 | lexer = lex.lex() 178 | parser = yacc.yacc() 179 | return parser.parse(query, lexer=lexer) 180 | 181 | def main(): 182 | # Test queries 183 | test_queries = [ 184 | '"red shoes" OR ((blue OR purple) AND sneakers)', 185 | 'comfortable AND (leather OR suede)', 186 | '(winter OR summer) AND boots', 187 | 'boots summer', 188 | 'color:red AND size:large', 189 | 'category:"winter boots" AND (color:black OR color:brown)', 190 | 'winter boots color:blue', 191 | 'red boots black', 192 | 'red (boots black)', 193 | 'AND:value', 194 | 'OR:test', 195 | 'brand:"Nike\\Air"', 196 | 'brand:"Nike\"Air"', 197 | 'brand:"Nike\\"Air"', 198 | 'field: value', 199 | 'field :value', 200 | 'field : value', 201 | 'a AND b OR c', 202 | 'a OR b AND c', 203 | 'a OR b OR c AND d', 204 | '', 205 | '()', 206 | 'field:', 207 | ':value', 208 | '(a OR b) c d', 209 | 'a AND (b OR c) AND d', 210 | '((a AND b) OR c) AND d', 211 | 'status:"pending review"', 212 | 'category:pending review', 213 | 'size:large color:red status:available', 214 | 'category:"winter boots" AND (color:black OR color:brown) AND size:12' 215 | ] 216 | 217 | for query in test_queries: 218 | print(f"\nParsing query: {query}") 219 | try: 220 | result = parse_search_query(query) 221 | print(result) 222 | if result.expression: 223 | print(f"Parsed expression: {stringify(result.expression)}") 224 | except Exception as e: 225 | print(f"Error parsing query: {str(e)}") 226 | 227 | if __name__ == '__main__': 228 | main() -------------------------------------------------------------------------------- /scratchpad/python_expression_fields_parser.py.output.txt: -------------------------------------------------------------------------------- 1 | 2 | Parsing query: "red shoes" OR ((blue OR purple) AND sneakers) 3 | SearchQuery(expression=Or(left=Term(value='red shoes'), right=And(left=Or(left=Term(value='blue'), right=Term(value='purple')), right=Term(value='sneakers')))) 4 | Parsed expression: ("red shoes" OR ((blue OR purple) AND sneakers)) 5 | 6 | Parsing query: comfortable AND (leather OR suede) 7 | SearchQuery(expression=And(left=Term(value='comfortable'), right=Or(left=Term(value='leather'), right=Term(value='suede')))) 8 | Parsed expression: (comfortable AND (leather OR suede)) 9 | 10 | Parsing query: (winter OR summer) AND boots 11 | SearchQuery(expression=And(left=Or(left=Term(value='winter'), right=Term(value='summer')), right=Term(value='boots'))) 12 | Parsed expression: ((winter OR summer) AND boots) 13 | 14 | Parsing query: boots summer 15 | SearchQuery(expression=And(left=Term(value='boots'), right=Term(value='summer'))) 16 | Parsed expression: (boots AND summer) 17 | 18 | Parsing query: color:red AND size:large 19 | Syntax error at ':' 20 | Syntax error at ':' 21 | SearchQuery(expression=Term(value='large')) 22 | Parsed expression: large 23 | 24 | Parsing query: category:"winter boots" AND (color:black OR color:brown) 25 | Syntax error at ':' 26 | Syntax error at ':' 27 | Syntax error at ':' 28 | SearchQuery(expression=None) 29 | 30 | Parsing query: winter boots color:blue 31 | Syntax error at ':' 32 | SearchQuery(expression=Term(value='blue')) 33 | Parsed expression: blue 34 | 35 | Parsing query: red boots black 36 | SearchQuery(expression=And(left=Term(value='red'), right=And(left=Term(value='boots'), right=Term(value='black')))) 37 | Parsed expression: (red AND (boots AND black)) 38 | 39 | Parsing query: red (boots black) 40 | SearchQuery(expression=And(left=Term(value='red'), right=And(left=Term(value='boots'), right=Term(value='black')))) 41 | Parsed expression: (red AND (boots AND black)) 42 | 43 | Parsing query: AND:value 44 | Syntax error at 'AND' 45 | SearchQuery(expression=Term(value='value')) 46 | Parsed expression: value 47 | 48 | Parsing query: OR:test 49 | Syntax error at 'OR' 50 | SearchQuery(expression=Term(value='test')) 51 | Parsed expression: test 52 | 53 | Parsing query: brand:"Nike\Air" 54 | Syntax error at ':' 55 | SearchQuery(expression=Term(value='Nike\\Air')) 56 | Parsed expression: Nike\Air 57 | 58 | Parsing query: brand:"Nike"Air" 59 | Syntax error at ':' 60 | Illegal character '"' 61 | Error parsing query: Scanning error. Illegal character '"' 62 | 63 | Parsing query: brand:"Nike\"Air" 64 | Syntax error at ':' 65 | SearchQuery(expression=Term(value='Nike"Air')) 66 | Parsed expression: Nike"Air 67 | 68 | Parsing query: field: value 69 | Syntax error at ':' 70 | SearchQuery(expression=Term(value='value')) 71 | Parsed expression: value 72 | 73 | Parsing query: field :value 74 | Syntax error at ':' 75 | SearchQuery(expression=Term(value='value')) 76 | Parsed expression: value 77 | 78 | Parsing query: field : value 79 | Syntax error at ':' 80 | SearchQuery(expression=Term(value='value')) 81 | Parsed expression: value 82 | 83 | Parsing query: a AND b OR c 84 | SearchQuery(expression=Or(left=And(left=Term(value='a'), right=Term(value='b')), right=Term(value='c'))) 85 | Parsed expression: ((a AND b) OR c) 86 | 87 | Parsing query: a OR b AND c 88 | SearchQuery(expression=Or(left=Term(value='a'), right=And(left=Term(value='b'), right=Term(value='c')))) 89 | Parsed expression: (a OR (b AND c)) 90 | 91 | Parsing query: a OR b OR c AND d 92 | SearchQuery(expression=Or(left=Or(left=Term(value='a'), right=Term(value='b')), right=And(left=Term(value='c'), right=Term(value='d')))) 93 | Parsed expression: ((a OR b) OR (c AND d)) 94 | 95 | Parsing query: 96 | SearchQuery(expression=None) 97 | 98 | Parsing query: () 99 | Syntax error at ')' 100 | SearchQuery(expression=None) 101 | 102 | Parsing query: field: 103 | Syntax error at ':' 104 | SearchQuery(expression=None) 105 | 106 | Parsing query: :value 107 | Syntax error at ':' 108 | SearchQuery(expression=Term(value='value')) 109 | Parsed expression: value 110 | 111 | Parsing query: (a OR b) c d 112 | SearchQuery(expression=And(left=Or(left=Term(value='a'), right=Term(value='b')), right=And(left=Term(value='c'), right=Term(value='d')))) 113 | Parsed expression: ((a OR b) AND (c AND d)) 114 | 115 | Parsing query: a AND (b OR c) AND d 116 | SearchQuery(expression=And(left=And(left=Term(value='a'), right=Or(left=Term(value='b'), right=Term(value='c'))), right=Term(value='d'))) 117 | Parsed expression: ((a AND (b OR c)) AND d) 118 | 119 | Parsing query: ((a AND b) OR c) AND d 120 | SearchQuery(expression=And(left=Or(left=And(left=Term(value='a'), right=Term(value='b')), right=Term(value='c')), right=Term(value='d'))) 121 | Parsed expression: (((a AND b) OR c) AND d) 122 | 123 | Parsing query: status:"pending review" 124 | Syntax error at ':' 125 | SearchQuery(expression=Term(value='pending review')) 126 | Parsed expression: "pending review" 127 | 128 | Parsing query: category:pending review 129 | Syntax error at ':' 130 | SearchQuery(expression=And(left=Term(value='pending'), right=Term(value='review'))) 131 | Parsed expression: (pending AND review) 132 | 133 | Parsing query: size:large color:red status:available 134 | Syntax error at ':' 135 | SearchQuery(expression=Term(value='available')) 136 | Parsed expression: available 137 | 138 | Parsing query: category:"winter boots" AND (color:black OR color:brown) AND size:12 139 | Syntax error at ':' 140 | Syntax error at ':' 141 | Syntax error at ':' 142 | SearchQuery(expression=Term(value='12')) 143 | Parsed expression: 12 144 | -------------------------------------------------------------------------------- /scratchpad/python_expression_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Dict, List, NamedTuple, Optional, Union 4 | from dataclasses import dataclass 5 | 6 | # AST type definitions 7 | @dataclass 8 | class Term: 9 | value: str 10 | 11 | @dataclass 12 | class And: 13 | left: 'Expression' 14 | right: 'Expression' 15 | 16 | @dataclass 17 | class Or: 18 | left: 'Expression' 19 | right: 'Expression' 20 | 21 | # Expression can be any of our AST node types 22 | Expression = Union[Term, And, Or] 23 | 24 | class SearchQuery(NamedTuple): 25 | expression: Optional[Expression] 26 | 27 | # Lexer rules 28 | tokens = ( 29 | 'QUOTED_STRING', 30 | 'UNQUOTED_STRING', 31 | 'LPAREN', 32 | 'RPAREN', 33 | 'AND', 34 | 'OR' 35 | ) 36 | 37 | # Simple tokens 38 | t_LPAREN = r'\(' 39 | t_RPAREN = r'\)' 40 | 41 | # Ignore whitespace 42 | t_ignore = ' \t\n' 43 | 44 | # Complex tokens 45 | def t_QUOTED_STRING(t): 46 | r'"([^"\\]|\\.)*"' 47 | # Remove quotes and handle escaped characters 48 | t_value = t.value[1:-1].replace(r'\"', '"').replace(r'\\', '\\') 49 | t.value = t_value 50 | return t 51 | 52 | def t_AND(t): 53 | r'AND(?=[\s()])' # Only match AND when followed by whitespace or parentheses 54 | return t 55 | 56 | def t_OR(t): 57 | r'OR(?=[\s()])' # Only match OR when followed by whitespace or parentheses 58 | return t 59 | 60 | def t_UNQUOTED_STRING(t): 61 | r'[^\s"():]+' # Anything not whitespace, quotes, parens, or colon 62 | # Check if this might be an AND/OR operator 63 | if t.value == 'AND' or t.value == 'OR': 64 | t.type = t.value 65 | return t 66 | 67 | def t_error(t): 68 | print(f"Illegal character '{t.value[0]}'") 69 | t.lexer.skip(1) 70 | 71 | # Parser rules with precedence 72 | precedence = ( 73 | ('left', 'OR'), 74 | ('left', 'AND'), 75 | ) 76 | 77 | def p_start(p): 78 | ''' 79 | start : query 80 | ''' 81 | p[0] = p[1] 82 | 83 | def p_query(p): 84 | ''' 85 | query : expression 86 | ''' 87 | p[0] = SearchQuery(expression=p[1]) 88 | 89 | def p_expression(p): 90 | ''' 91 | expression : primary_list 92 | | expression AND expression 93 | | expression OR expression 94 | ''' 95 | if len(p) == 2: 96 | p[0] = p[1] 97 | elif p[2] == 'AND': 98 | p[0] = And(p[1], p[3]) 99 | else: # OR 100 | p[0] = Or(p[1], p[3]) 101 | 102 | def p_primary_list(p): 103 | ''' 104 | primary_list : primary 105 | | primary_list primary 106 | ''' 107 | if len(p) == 2: 108 | p[0] = p[1] 109 | else: 110 | # Create implicit AND between adjacent terms 111 | p[0] = And(p[1], p[2]) 112 | 113 | def p_primary(p): 114 | ''' 115 | primary : QUOTED_STRING 116 | | UNQUOTED_STRING 117 | | LPAREN expression RPAREN 118 | ''' 119 | if len(p) == 2: 120 | p[0] = Term(p[1]) 121 | else: 122 | p[0] = p[2] 123 | 124 | def p_error(p): 125 | if p: 126 | print(f"Syntax error at '{p.value}'") 127 | else: 128 | print("Syntax error at EOF") 129 | 130 | # Helper function to stringify expressions 131 | def stringify(expr: Expression) -> str: 132 | if isinstance(expr, Term): 133 | # Quote strings containing spaces 134 | return f'"{expr.value}"' if ' ' in expr.value else expr.value 135 | elif isinstance(expr, And): 136 | return f"({stringify(expr.left)} AND {stringify(expr.right)})" 137 | elif isinstance(expr, Or): 138 | return f"({stringify(expr.left)} OR {stringify(expr.right)})" 139 | else: 140 | raise ValueError(f"Unknown expression type: {type(expr)}") 141 | 142 | def parse_search_query(query: str) -> SearchQuery: 143 | """ 144 | Parse a search query string into a SearchQuery object. 145 | Args: 146 | query: The search query string to parse 147 | Returns: 148 | SearchQuery object containing the main expression 149 | Raises: 150 | Exception: If parsing fails 151 | """ 152 | lexer = lex.lex() 153 | parser = yacc.yacc() 154 | return parser.parse(query, lexer=lexer) 155 | 156 | def main(): 157 | # Test queries 158 | test_queries = [ 159 | '"red shoes" OR ((blue OR purple) AND sneakers)', 160 | 'comfortable AND (leather OR suede)', 161 | '(winter OR summer) AND boots', 162 | 'boots summer' 163 | ] 164 | 165 | for query in test_queries: 166 | print(f"\nParsing query: {query}") 167 | try: 168 | result = parse_search_query(query) 169 | print(result) 170 | if result.expression: 171 | print(f"Parsed expression: {stringify(result.expression)}") 172 | except Exception as e: 173 | print(f"Error parsing query: {str(e)}") 174 | 175 | if __name__ == '__main__': 176 | main() -------------------------------------------------------------------------------- /scratchpad/python_lexer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex 3 | 4 | # Define the list of token names 5 | tokens = ( 6 | 'TERM', 7 | ) 8 | 9 | # Regular expression rule for TERM 10 | def t_TERM(t): 11 | r'[^\s]+' # Match any non-whitespace characters 12 | return t 13 | 14 | # Ignore whitespace 15 | t_ignore = ' \t\n' 16 | 17 | # Error handling 18 | def t_error(t): 19 | print(f"Illegal character '{t.value[0]}'") 20 | t.lexer.skip(1) 21 | 22 | # Build the lexer 23 | lexer = lex.lex() 24 | 25 | def tokenize(text): 26 | """ 27 | Tokenize input text and print each token. 28 | 29 | Args: 30 | text: String to tokenize 31 | """ 32 | print(f"\nTokenizing: {text!r}") 33 | 34 | # Give the lexer our input text 35 | lexer.input(text) 36 | 37 | # Print token information 38 | while True: 39 | tok = lexer.token() 40 | if not tok: 41 | break 42 | # Print detailed token information 43 | print(f"Token(type={tok.type!r}, value={tok.value!r}, lineno={tok.lineno}, lexpos={tok.lexpos})") 44 | 45 | def main(): 46 | # Test various inputs 47 | test_inputs = [ 48 | "red shoes", 49 | " spaced out terms ", 50 | "single", 51 | "line1\nline2\n line3", 52 | "mixed.punctuation!and@symbols#here", 53 | "", # Empty string 54 | " ", # Just whitespace 55 | ] 56 | 57 | for text in test_inputs: 58 | tokenize(text) 59 | 60 | if __name__ == '__main__': 61 | main() -------------------------------------------------------------------------------- /scratchpad/python_lexer_quoted_strings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex 3 | 4 | # Define the list of token names 5 | tokens = ( 6 | 'QUOTED_STRING', 7 | 'TERM', 8 | ) 9 | 10 | # Regular expression rule for quoted strings 11 | def t_QUOTED_STRING(t): 12 | r'"([^"\\]|\\.)*"' # Match quoted strings with escaped characters 13 | # Remove quotes and handle escaped characters 14 | t.value = t.value[1:-1].replace(r'\"', '"').replace(r'\\', '\\') 15 | return t 16 | 17 | # Regular expression rule for TERM - note we exclude quotes now 18 | def t_TERM(t): 19 | r'[^\s"]+' # Match any non-whitespace, non-quote characters 20 | return t 21 | 22 | # Ignore whitespace 23 | t_ignore = ' \t\n' 24 | 25 | # Error handling 26 | def t_error(t): 27 | print(f"Illegal character '{t.value[0]}'") 28 | t.lexer.skip(1) 29 | 30 | # Build the lexer 31 | lexer = lex.lex() 32 | 33 | def tokenize(text): 34 | """ 35 | Tokenize input text and print each token. 36 | 37 | Args: 38 | text: String to tokenize 39 | """ 40 | print(f"\nTokenizing: {text!r}") 41 | 42 | # Give the lexer our input text 43 | lexer.input(text) 44 | 45 | # Print token information 46 | while True: 47 | tok = lexer.token() 48 | if not tok: 49 | break 50 | # Print detailed token information 51 | print(f"Token(type={tok.type!r}, value={tok.value!r}, lineno={tok.lineno}, lexpos={tok.lexpos})") 52 | 53 | def main(): 54 | # Test various inputs demonstrating quoted and unquoted terms 55 | test_inputs = [ 56 | 'red shoes', # Basic unquoted terms 57 | '"red shoes"', # Simple quoted string 58 | 'find "red shoes" here', # Mix of quoted and unquoted 59 | '"escaped \\"quotes\\" here"', # Escaped quotes 60 | '"multi-word string" followed by terms', # Mixed 61 | 'term "quoted" term', # Term-quote-term 62 | 'missing"quote', # No space before quote 63 | '"unterminated quote', # Error case 64 | 'multiple "quoted" "strings" here', # Multiple quotes 65 | '"" empty', # Empty quotes 66 | '"spaces inside quotes"', # Space preservation in quotes 67 | 'term1"adjacent"term2', # No spaces around quotes 68 | '"special chars !@#$%^&*()"', # Special characters in quotes 69 | '"\\"starts with quote"', # Escaped quote at start 70 | '"ends with quote\\""', # Escaped quote at end 71 | ] 72 | 73 | for text in test_inputs: 74 | try: 75 | tokenize(text) 76 | except Exception as e: 77 | print(f"Error processing input: {str(e)}") 78 | 79 | if __name__ == '__main__': 80 | main() -------------------------------------------------------------------------------- /scratchpad/python_nested_operators_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Dict, List, NamedTuple 4 | 5 | class SearchQuery(NamedTuple): 6 | search_terms: List[str] 7 | fields: Dict[str, str] 8 | 9 | # Lexer 10 | tokens = ( 11 | 'QUOTED_STRING', 12 | 'WORD', 13 | 'COLON', 14 | 'OPEN_PARENS', 15 | 'CLOSE_PARENS', 16 | 'AND', 17 | 'OR', 18 | 'NOT' 19 | ) 20 | 21 | def t_QUOTED_STRING(t): 22 | r'"([^"\\]|\\.)*"' 23 | # Remove quotes and handle escaped characters 24 | t.value = t.value[1:-1].replace(r'\"', '"').replace(r'\\', '\\') 25 | return t 26 | 27 | def t_WORD(t): 28 | r'[^\s":]+' 29 | return t 30 | 31 | t_COLON = r':' 32 | 33 | t_OPEN_PARENS = r'(' 34 | 35 | t_CLOSE_PARENS = r')' 36 | 37 | t_AND = r'AND' 38 | 39 | t_OR = r'OR' 40 | 41 | t_NOT = r'NOT' 42 | 43 | t_ignore = ' \t' 44 | 45 | def t_newline(t): 46 | r'\n+' 47 | pass # Ignore newlines (if any) 48 | 49 | def t_error(t): 50 | print(f"Illegal character '{t.value[0]}'") 51 | t.lexer.skip(1) 52 | 53 | # Parser 54 | def p_query(p): 55 | ''' 56 | query : tokens 57 | ''' 58 | search_terms = [] 59 | fields = {} 60 | for token in p[1]: 61 | if isinstance(token, dict): 62 | fields.update(token) 63 | else: 64 | search_terms.append(token) 65 | p[0] = SearchQuery(search_terms=search_terms, fields=fields) 66 | 67 | def p_tokens(p): 68 | ''' 69 | tokens : token 70 | | tokens token 71 | ''' 72 | if len(p) == 2: 73 | p[0] = [p[1]] 74 | else: 75 | p[0] = p[1] + [p[2]] 76 | 77 | def p_token(p): 78 | ''' 79 | token : term 80 | | field 81 | ''' 82 | p[0] = p[1] 83 | 84 | def p_term(p): 85 | ''' 86 | term : QUOTED_STRING 87 | | WORD 88 | ''' 89 | p[0] = p[1] 90 | 91 | def p_field(p): 92 | ''' 93 | field : WORD COLON value 94 | ''' 95 | key = p[1].lower() 96 | value = p[3] 97 | p[0] = {key: value} 98 | 99 | def p_value(p): 100 | ''' 101 | value : QUOTED_STRING 102 | | WORD 103 | ''' 104 | p[0] = p[1] 105 | 106 | def p_error(p): 107 | if p: 108 | print(f"Syntax error at '{p.value}'") 109 | else: 110 | print("Syntax error at EOF") 111 | 112 | # Build lexer and parser 113 | lexer = lex.lex() 114 | parser = yacc.yacc(debug=False) 115 | 116 | def parse_search_query(query: str) -> SearchQuery: 117 | """ 118 | Parse a search query string into a SearchQuery object. 119 | 120 | Args: 121 | query: The search query string to parse 122 | 123 | Returns: 124 | SearchQuery object containing search terms and fields 125 | 126 | Raises: 127 | Exception: If parsing fails 128 | """ 129 | result = parser.parse(query) 130 | return result 131 | 132 | def main(): 133 | # Example usage with both quoted and unquoted terms 134 | test_queries = [ 135 | '"red shoes" category:clothing size:10 color:red brand:nike', 136 | 'red shoes category:clothing size:10 color:red brand:nike', 137 | 'comfortable red shoes category:clothing size:10', 138 | 'category:clothing "red winter shoes" warm cozy', 139 | '"quoted term" another term yet:another' 140 | ] 141 | 142 | for query in test_queries: 143 | print(f"\nParsing query: {query}") 144 | try: 145 | result = parse_search_query(query) 146 | print(f"Search terms: {result.search_terms}") 147 | print("Fields:") 148 | for key, value in result.fields.items(): 149 | print(f" {key}: {value}") 150 | except Exception as e: 151 | print(f"Error parsing query: {e}") 152 | 153 | if __name__ == '__main__': 154 | main() 155 | -------------------------------------------------------------------------------- /scratchpad/python_nested_parens.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Union, List, Any 4 | 5 | # Define the types for our AST 6 | Expression = Union[str, List[Any]] # Can be either an atom (str) or a list of expressions 7 | 8 | # Lexer rules 9 | tokens = ( 10 | 'ATOM', 11 | 'LPAREN', 12 | 'RPAREN', 13 | ) 14 | 15 | t_LPAREN = r'\(' 16 | t_RPAREN = r'\)' 17 | 18 | # Atom is anything that's not whitespace or parentheses 19 | def t_ATOM(t): 20 | r'[^\s())]+' 21 | return t 22 | 23 | # Ignore whitespace 24 | t_ignore = ' \t\n' 25 | 26 | # Error handling 27 | def t_error(t): 28 | print(f"Illegal character '{t.value[0]}'") 29 | t.lexer.skip(1) 30 | 31 | # Parser rules 32 | def p_expression(p): 33 | ''' 34 | expression : atom 35 | | list 36 | ''' 37 | p[0] = p[1] 38 | 39 | def p_atom(p): 40 | ''' 41 | atom : ATOM 42 | ''' 43 | p[0] = p[1] 44 | 45 | def p_list(p): 46 | ''' 47 | list : LPAREN expressions RPAREN 48 | ''' 49 | p[0] = p[2] 50 | 51 | def p_expressions(p): 52 | ''' 53 | expressions : expression expressions 54 | | empty 55 | ''' 56 | if len(p) == 3: 57 | if p[2] is None: # If expressions is empty 58 | p[0] = [p[1]] 59 | else: 60 | p[0] = [p[1]] + p[2] 61 | else: 62 | p[0] = [] 63 | 64 | def p_empty(p): 65 | ''' 66 | empty : 67 | ''' 68 | p[0] = None 69 | 70 | def p_error(p): 71 | if p: 72 | print(f"Syntax error at '{p.value}'") 73 | else: 74 | print("Syntax error at EOF") 75 | 76 | # Build the lexer and parser 77 | lexer = lex.lex() 78 | parser = yacc.yacc() 79 | 80 | def parse(input_str: str) -> Expression: 81 | """ 82 | Parse a string containing nested parentheses expressions. 83 | 84 | Args: 85 | input_str: The input string to parse 86 | 87 | Returns: 88 | The parsed expression (either an atom or a list of expressions) 89 | 90 | Raises: 91 | Exception: If parsing fails 92 | """ 93 | return parser.parse(input_str) 94 | 95 | def stringify(expr: Expression) -> str: 96 | """ 97 | Convert a parsed expression back to a string representation. 98 | 99 | Args: 100 | expr: The expression to stringify 101 | 102 | Returns: 103 | String representation of the expression 104 | """ 105 | if isinstance(expr, list): 106 | return f"({' '.join(stringify(e) for e in expr)})" 107 | return str(expr) 108 | 109 | def main(): 110 | # Example usage with test cases 111 | test_cases = [ 112 | "((one) two three)", 113 | "((a b) (c d) e)", 114 | "(a)", 115 | "((a (b c)) d)", 116 | "(((deeply) nested) parens)", 117 | ] 118 | 119 | for test_input in test_cases: 120 | print(f"\nInput: {test_input}") 121 | try: 122 | result = parse(test_input) 123 | print(f"Parsed: {result}") 124 | print(f"Stringified: {stringify(result)}") 125 | except Exception as e: 126 | print(f"Error parsing: {str(e)}") 127 | 128 | if __name__ == '__main__': 129 | main() -------------------------------------------------------------------------------- /scratchpad/python_quoted_string_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Dict, List, NamedTuple 4 | 5 | class SearchQuery(NamedTuple): 6 | search_terms: List[str] 7 | 8 | # Lexer 9 | tokens = ( 10 | 'QUOTED_STRING', 11 | 'WORD', 12 | ) 13 | 14 | def t_QUOTED_STRING(t): 15 | r'"([^"\\]|\\.)*"' 16 | # Remove quotes and handle escaped characters 17 | t.value = t.value[1:-1].replace(r'\"', '"').replace(r'\\', '\\') 18 | return t 19 | 20 | def t_WORD(t): 21 | r'[^\s"]+' 22 | return t 23 | 24 | t_ignore = ' \t' 25 | 26 | def t_newline(t): 27 | r'\n+' 28 | pass # Ignore newlines (if any) 29 | 30 | def t_error(t): 31 | print(f"Illegal character '{t.value[0]}'") 32 | t.lexer.skip(1) 33 | 34 | # Parser 35 | def p_query(p): 36 | ''' 37 | string : terms 38 | ''' 39 | p[0] = SearchQuery(search_terms=p[1]) 40 | 41 | def p_terms(p): 42 | ''' 43 | terms : term 44 | | terms term 45 | ''' 46 | if len(p) == 2: 47 | p[0] = [p[1]] 48 | else: 49 | p[0] = p[1] + [p[2]] 50 | 51 | def p_term(p): 52 | ''' 53 | term : QUOTED_STRING 54 | | WORD 55 | ''' 56 | p[0] = p[1] 57 | 58 | def p_error(p): 59 | if p: 60 | print(f"Syntax error at '{p.value}'") 61 | else: 62 | print("Syntax error at EOF") 63 | 64 | # Build lexer and parser 65 | lexer = lex.lex() 66 | parser = yacc.yacc(debug=False) 67 | 68 | def parse_string(query: str) -> SearchQuery: 69 | """ 70 | Parse a search query string into a SearchQuery object. 71 | 72 | Args: 73 | query: The search query string to parse 74 | 75 | Returns: 76 | SearchQuery object containing search terms 77 | 78 | Raises: 79 | Exception: If parsing fails 80 | """ 81 | result = parser.parse(query) 82 | return result 83 | 84 | def main(): 85 | # Example usage with both quoted and unquoted terms 86 | test_queries = [ 87 | '"red shoes"', 88 | 'red shoes', 89 | 'comfortable red shoes', 90 | '"red winter shoes" warm cozy' 91 | ] 92 | 93 | for query in test_queries: 94 | print(f"\nParsing query: {query}") 95 | try: 96 | result = parse_string(query) 97 | print(f"Search terms: {result.search_terms}") 98 | except Exception as e: 99 | print(f"Error parsing query: {e}") 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /scratchpad/python_recursive_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Dict, List, NamedTuple, Optional, Union 4 | from dataclasses import dataclass 5 | 6 | # AST type definitions 7 | @dataclass 8 | class Term: 9 | value: str 10 | 11 | @dataclass 12 | class Field: 13 | key: str 14 | value: str 15 | 16 | @dataclass 17 | class And: 18 | left: 'Expression' 19 | right: 'Expression' 20 | 21 | @dataclass 22 | class Or: 23 | left: 'Expression' 24 | right: 'Expression' 25 | 26 | # Expression can be any of our AST node types 27 | Expression = Union[Term, Field, And, Or] 28 | 29 | class SearchQuery(NamedTuple): 30 | expression: Optional[Expression] 31 | fields: Dict[str, str] 32 | 33 | # Lexer rules 34 | tokens = ( 35 | 'QUOTED_STRING', 36 | 'WORD', 37 | 'COLON', 38 | 'LPAREN', 39 | 'RPAREN', 40 | 'AND', 41 | 'OR' 42 | ) 43 | 44 | # Simple tokens 45 | t_COLON = r':' 46 | t_LPAREN = r'\(' 47 | t_RPAREN = r'\)' 48 | 49 | # Ignore whitespace 50 | t_ignore = ' \t\n' 51 | 52 | # Complex tokens 53 | def t_QUOTED_STRING(t): 54 | r'"([^"\\]|\\.)*"' 55 | # Remove quotes and handle escaped characters 56 | t_value = t.value[1:-1].replace(r'\"', '"').replace(r'\\', '\\') 57 | t.value = t_value 58 | return t 59 | 60 | def t_AND(t): 61 | r'AND(?=[\s()])' # Only match AND when followed by whitespace or parentheses 62 | return t 63 | 64 | def t_OR(t): 65 | r'OR(?=[\s()])' # Only match OR when followed by whitespace or parentheses 66 | return t 67 | 68 | def t_WORD(t): 69 | r'[^\s"():]+' # Anything not whitespace, quotes, parens, or colon 70 | # Check if this might be an AND/OR operator 71 | if t.value == 'AND' or t.value == 'OR': 72 | t.type = t.value 73 | return t 74 | 75 | def t_error(t): 76 | print(f"Illegal character '{t.value[0]}'") 77 | t.lexer.skip(1) 78 | 79 | # Parser rules with precedence 80 | precedence = ( 81 | ('left', 'OR'), 82 | ('left', 'AND'), 83 | ) 84 | 85 | def p_start(p): 86 | ''' 87 | start : query 88 | ''' 89 | p[0] = p[1] 90 | 91 | def p_query(p): 92 | ''' 93 | query : token_list 94 | ''' 95 | tokens = p[1] 96 | fields = {} 97 | non_field_exprs = [] 98 | 99 | # Separate fields from other expressions 100 | for token in tokens: 101 | if isinstance(token, Field): 102 | fields[token.key] = token.value 103 | else: 104 | non_field_exprs.append(token) 105 | 106 | # Combine non-field expressions with AND if there are multiple 107 | main_expr = None 108 | if non_field_exprs: 109 | main_expr = non_field_exprs[0] 110 | for expr in non_field_exprs[1:]: 111 | main_expr = And(main_expr, expr) 112 | 113 | p[0] = SearchQuery(expression=main_expr, fields=fields) 114 | 115 | def p_token_list(p): 116 | ''' 117 | token_list : token 118 | | token_list token 119 | ''' 120 | if len(p) == 2: 121 | p[0] = [p[1]] 122 | else: 123 | p[0] = p[1] + [p[2]] 124 | 125 | def p_token(p): 126 | ''' 127 | token : expression 128 | | field_value 129 | ''' 130 | p[0] = p[1] 131 | 132 | def p_expression(p): 133 | ''' 134 | expression : primary 135 | | expression AND expression 136 | | expression OR expression 137 | ''' 138 | if len(p) == 2: 139 | p[0] = p[1] 140 | elif p[2] == 'AND': 141 | p[0] = And(p[1], p[3]) 142 | else: # OR 143 | p[0] = Or(p[1], p[3]) 144 | 145 | def p_primary(p): 146 | ''' 147 | primary : QUOTED_STRING 148 | | WORD 149 | | LPAREN expression RPAREN 150 | ''' 151 | if len(p) == 2: 152 | p[0] = Term(p[1]) 153 | else: 154 | p[0] = p[2] 155 | 156 | def p_field_value(p): 157 | ''' 158 | field_value : WORD COLON value 159 | ''' 160 | p[0] = Field(p[1].lower(), p[3]) 161 | 162 | def p_value(p): 163 | ''' 164 | value : QUOTED_STRING 165 | | WORD 166 | ''' 167 | p[0] = p[1] 168 | 169 | def p_error(p): 170 | if p: 171 | print(f"Syntax error at '{p.value}'") 172 | else: 173 | print("Syntax error at EOF") 174 | 175 | # Helper function to stringify expressions 176 | def stringify(expr: Expression) -> str: 177 | if isinstance(expr, Term): 178 | # Quote strings containing spaces 179 | return f'"{expr.value}"' if ' ' in expr.value else expr.value 180 | elif isinstance(expr, Field): 181 | return f"{expr.key}:{expr.value}" 182 | elif isinstance(expr, And): 183 | return f"({stringify(expr.left)} AND {stringify(expr.right)})" 184 | elif isinstance(expr, Or): 185 | return f"({stringify(expr.left)} OR {stringify(expr.right)})" 186 | else: 187 | raise ValueError(f"Unknown expression type: {type(expr)}") 188 | 189 | def parse_search_query(query: str) -> SearchQuery: 190 | """ 191 | Parse a search query string into a SearchQuery object. 192 | 193 | Args: 194 | query: The search query string to parse 195 | 196 | Returns: 197 | SearchQuery object containing the main expression and fields 198 | 199 | Raises: 200 | Exception: If parsing fails 201 | """ 202 | lexer = lex.lex() 203 | parser = yacc.yacc() 204 | return parser.parse(query, lexer=lexer) 205 | 206 | def main(): 207 | # Test queries 208 | test_queries = [ 209 | '("red shoes" OR ((blue OR purple) AND sneakers)) size:10 category:footwear', 210 | 'comfortable AND (leather OR suede) brand:nike', 211 | '(winter OR summer) AND boots size:8', 212 | '(size:8 AND brand:nike)' 213 | ] 214 | 215 | for query in test_queries: 216 | print(f"\nParsing query: {query}") 217 | try: 218 | result = parse_search_query(query) 219 | if result.expression: 220 | print(f"Parsed expression: {stringify(result.expression)}") 221 | print("Fields:") 222 | for key, value in result.fields.items(): 223 | print(f" {key}: {value}") 224 | except Exception as e: 225 | print(f"Error parsing query: {str(e)}") 226 | 227 | if __name__ == '__main__': 228 | main() -------------------------------------------------------------------------------- /scratchpad/python_search_string_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Dict, List, NamedTuple 4 | 5 | class SearchQuery(NamedTuple): 6 | search_terms: List[str] 7 | fields: Dict[str, str] 8 | 9 | # Lexer 10 | tokens = ( 11 | 'QUOTED_STRING', 12 | 'WORD', 13 | 'COLON', 14 | ) 15 | 16 | def t_QUOTED_STRING(t): 17 | r'"([^"\\]|\\.)*"' 18 | # Remove quotes and handle escaped characters 19 | t.value = t.value[1:-1].replace(r'\"', '"').replace(r'\\', '\\') 20 | return t 21 | 22 | def t_WORD(t): 23 | r'[^\s":]+' 24 | return t 25 | 26 | t_COLON = r':' 27 | 28 | t_ignore = ' \t' 29 | 30 | def t_newline(t): 31 | r'\n+' 32 | pass # Ignore newlines (if any) 33 | 34 | def t_error(t): 35 | print(f"Illegal character '{t.value[0]}'") 36 | t.lexer.skip(1) 37 | 38 | # Parser 39 | def p_query(p): 40 | ''' 41 | query : tokens 42 | ''' 43 | search_terms = [] 44 | fields = {} 45 | for token in p[1]: 46 | if isinstance(token, dict): 47 | fields.update(token) 48 | else: 49 | search_terms.append(token) 50 | p[0] = SearchQuery(search_terms=search_terms, fields=fields) 51 | 52 | def p_tokens(p): 53 | ''' 54 | tokens : token 55 | | tokens token 56 | ''' 57 | if len(p) == 2: 58 | p[0] = [p[1]] 59 | else: 60 | p[0] = p[1] + [p[2]] 61 | 62 | def p_token(p): 63 | ''' 64 | token : term 65 | | field 66 | ''' 67 | p[0] = p[1] 68 | 69 | def p_term(p): 70 | ''' 71 | term : QUOTED_STRING 72 | | WORD 73 | ''' 74 | p[0] = p[1] 75 | 76 | def p_field(p): 77 | ''' 78 | field : WORD COLON value 79 | ''' 80 | key = p[1].lower() 81 | value = p[3] 82 | p[0] = {key: value} 83 | 84 | def p_value(p): 85 | ''' 86 | value : QUOTED_STRING 87 | | WORD 88 | ''' 89 | p[0] = p[1] 90 | 91 | def p_error(p): 92 | if p: 93 | print(f"Syntax error at '{p.value}'") 94 | else: 95 | print("Syntax error at EOF") 96 | 97 | # Build lexer and parser 98 | lexer = lex.lex() 99 | parser = yacc.yacc(debug=False) 100 | 101 | def parse_search_query(query: str) -> SearchQuery: 102 | """ 103 | Parse a search query string into a SearchQuery object. 104 | 105 | Args: 106 | query: The search query string to parse 107 | 108 | Returns: 109 | SearchQuery object containing search terms and fields 110 | 111 | Raises: 112 | Exception: If parsing fails 113 | """ 114 | result = parser.parse(query) 115 | return result 116 | 117 | def main(): 118 | # Example usage with both quoted and unquoted terms 119 | test_queries = [ 120 | '"red shoes" category:clothing size:10 color:red brand:nike', 121 | 'red shoes category:clothing size:10 color:red brand:nike', 122 | 'comfortable red shoes category:clothing size:10', 123 | 'category:clothing "red winter shoes" warm cozy', 124 | '"quoted term" another term yet:another' 125 | ] 126 | 127 | for query in test_queries: 128 | print(f"\nParsing query: {query}") 129 | try: 130 | result = parse_search_query(query) 131 | print(f"Search terms: {result.search_terms}") 132 | print("Fields:") 133 | for key, value in result.fields.items(): 134 | print(f" {key}: {value}") 135 | except Exception as e: 136 | print(f"Error parsing query: {e}") 137 | 138 | if __name__ == '__main__': 139 | main() 140 | -------------------------------------------------------------------------------- /scratchpad/python_single_word_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ply import lex, yacc 4 | 5 | # Lexer 6 | tokens = ( 7 | 'TERM', 8 | ) 9 | 10 | t_TERM = r'[^\s]+' 11 | 12 | # Ignore whitespace 13 | t_ignore = ' \t\n' 14 | 15 | def t_error(t): 16 | print(f"Illegal character '{t.value[0]}'") 17 | t.lexer.skip(1) 18 | 19 | # Parser 20 | def p_search_query(p): 21 | ''' 22 | search_query : TERM 23 | ''' 24 | p[0] = p[1] 25 | 26 | def p_error(p): 27 | if p: 28 | print(f"Syntax error at '{p.value}'") 29 | else: 30 | print("Syntax error at EOF") 31 | 32 | # Build the lexer and parser 33 | lexer = lex.lex() 34 | parser = yacc.yacc() 35 | 36 | if __name__ == "__main__": 37 | # Example usage 38 | test_input = "one" 39 | result = parser.parse(test_input) 40 | print(f"Result: {result}") -------------------------------------------------------------------------------- /scratchpad/python_string_list_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ply import lex, yacc 4 | 5 | # Lexer 6 | tokens = ( 7 | 'TERM', 8 | ) 9 | 10 | t_TERM = r'[^\s]+' 11 | 12 | # Ignore whitespace 13 | t_ignore = ' \t\n' 14 | 15 | def t_error(t): 16 | print(f"Illegal character '{t.value[0]}'") 17 | t.lexer.skip(1) 18 | 19 | # Parser 20 | def p_search_query(p): 21 | ''' 22 | search_query : search_terms 23 | ''' 24 | p[0] = p[1] 25 | 26 | def p_search_terms(p): 27 | ''' 28 | search_terms : search_terms TERM 29 | | TERM 30 | ''' 31 | if len(p) == 2: 32 | p[0] = [p[1]] 33 | else: 34 | p[0] = p[1] + [p[2]] 35 | 36 | def p_error(p): 37 | if p: 38 | print(f"Syntax error at '{p.value}'") 39 | else: 40 | print("Syntax error at EOF") 41 | 42 | # Build the lexer and parser 43 | lexer = lex.lex() 44 | parser = yacc.yacc() 45 | 46 | if __name__ == "__main__": 47 | # Example usage 48 | test_input = " one two three " 49 | result = parser.parse(test_input) 50 | print(f"Result: {result}") -------------------------------------------------------------------------------- /scratchpad/python_string_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from ply import lex, yacc 3 | from typing import Dict, List, NamedTuple 4 | 5 | class SearchQuery(NamedTuple): 6 | search_terms: List[str] 7 | 8 | # Lexer 9 | tokens = ( 10 | 'TERM', 11 | ) 12 | 13 | def t_TERM(t): 14 | r'[^\s]+' 15 | return t 16 | 17 | t_ignore = ' \t' 18 | 19 | def t_newline(t): 20 | r'\n+' 21 | pass # Ignore newlines (if any) 22 | 23 | def t_error(t): 24 | print(f"Illegal character '{t.value[0]}'") 25 | t.lexer.skip(1) 26 | 27 | # Parser 28 | def p_query(p): 29 | ''' 30 | string : terms 31 | ''' 32 | p[0] = SearchQuery(search_terms=p[1]) 33 | 34 | def p_terms(p): 35 | ''' 36 | terms : TERM 37 | | terms TERM 38 | ''' 39 | if len(p) == 2: 40 | p[0] = [p[1]] 41 | else: 42 | p[0] = p[1] + [p[2]] 43 | 44 | def p_error(p): 45 | if p: 46 | print(f"Syntax error at '{p.value}'") 47 | else: 48 | print("Syntax error at EOF") 49 | 50 | # Build lexer and parser 51 | lexer = lex.lex() 52 | parser = yacc.yacc(debug=False) 53 | 54 | def parse_string(query: str) -> SearchQuery: 55 | """ 56 | Parse a search query string into a SearchQuery object. 57 | 58 | Args: 59 | query: The search query string to parse 60 | 61 | Returns: 62 | SearchQuery object containing search terms 63 | 64 | Raises: 65 | Exception: If parsing fails 66 | """ 67 | result = parser.parse(query) 68 | return result 69 | 70 | def main(): 71 | # Example usage with both quoted and unquoted terms 72 | test_queries = [ 73 | 'red shoes', 74 | 'comfortable red shoes' 75 | ] 76 | 77 | for query in test_queries: 78 | print(f"\nParsing query: {query}") 79 | try: 80 | result = parse_string(query) 81 | print(f"Search terms: {result.search_terms}") 82 | except Exception as e: 83 | print(f"Error parsing query: {e}") 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /scratchpad/react-input-with-error.js: -------------------------------------------------------------------------------- 1 | /* 2 | https://microsoft.github.io/monaco-editor/playground.html?source=v0.52.0#example-interacting-with-the-editor-line-and-inline-decorations 3 | 4 | probably better to just use the monaco editor 5 | */ 6 | 7 | import React, { useState, useRef } from "react"; 8 | 9 | const SearchInput = () => { 10 | const [query, setQuery] = useState( 11 | "invalid_field:test category:books bad_field:value color:red size:large location:paris date:2024" 12 | ); 13 | const inputRef = useRef(null); 14 | const errorRanges = [ 15 | [0, 13], 16 | [34, 43], 17 | ]; 18 | 19 | const handleInput = (e) => { 20 | setQuery(e.currentTarget.textContent); 21 | }; 22 | 23 | const renderContent = () => { 24 | let result = []; 25 | let lastIndex = 0; 26 | 27 | errorRanges.forEach(([start, end], i) => { 28 | if (start > lastIndex) { 29 | result.push( 30 | {query.slice(lastIndex, start)} 31 | ); 32 | } 33 | 34 | result.push( 35 | 36 | {query.slice(start, end)} 37 | 38 | ); 39 | 40 | lastIndex = end; 41 | }); 42 | 43 | if (lastIndex < query.length) { 44 | result.push({query.slice(lastIndex)}); 45 | } 46 | 47 | return result; 48 | }; 49 | 50 | return ( 51 |
52 |
53 |
63 | {renderContent()} 64 |
65 |
66 |
67 | ); 68 | }; 69 | 70 | export default SearchInput; 71 | -------------------------------------------------------------------------------- /scratchpad/typescript-ast-to-sql.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ts-node 2 | 3 | import { 4 | Expression, 5 | SearchQuery, 6 | parseSearchQuery, 7 | } from "./typescript-lexer-parser-functional"; 8 | import { validate } from "./typescript-field-validator"; 9 | 10 | interface SqlQueryResult { 11 | text: string; 12 | values: any[]; 13 | } 14 | 15 | interface SqlState { 16 | paramCounter: number; 17 | values: any[]; 18 | searchableColumns: string[]; 19 | } 20 | 21 | // Constants 22 | const SPECIAL_CHARS = ["%", "_"] as const; 23 | const ESCAPE_CHAR = "\\"; 24 | 25 | // Helper Functions 26 | const escapeRegExp = (str: string): string => 27 | str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 28 | 29 | const escapeSpecialChars = (value: string): string => 30 | SPECIAL_CHARS.reduce( 31 | (escaped, char) => 32 | escaped.replace(new RegExp(escapeRegExp(char), "g"), ESCAPE_CHAR + char), 33 | value 34 | ); 35 | 36 | // Create a new parameter placeholder and update state 37 | const nextParam = (state: SqlState): [string, SqlState] => { 38 | const paramName = `$${state.paramCounter}`; 39 | const newState = { 40 | ...state, 41 | paramCounter: state.paramCounter + 1, 42 | }; 43 | return [paramName, newState]; 44 | }; 45 | 46 | // Add a value to the state and return updated state 47 | const addValue = (state: SqlState, value: any): SqlState => ({ 48 | ...state, 49 | values: [...state.values, value], 50 | }); 51 | 52 | /** 53 | * Convert a search term to SQL ILIKE conditions 54 | */ 55 | const termToSql = (term: string, state: SqlState): [string, SqlState] => { 56 | const escapedTerm = escapeSpecialChars(term); 57 | const [paramName, newState] = nextParam(state); 58 | 59 | const conditions = state.searchableColumns.map( 60 | (column) => `${column} ILIKE ${paramName}` 61 | ); 62 | 63 | return [ 64 | `(${conditions.join(" OR ")})`, 65 | addValue(newState, `%${escapedTerm}%`), 66 | ]; 67 | }; 68 | 69 | /** 70 | * Handle special date fields 71 | */ 72 | const dateFieldToSql = ( 73 | field: string, 74 | value: string, 75 | state: SqlState 76 | ): [string, SqlState] => { 77 | const [paramName, newState] = nextParam(state); 78 | return [`${field}::date = ${paramName}::date`, addValue(newState, value)]; 79 | }; 80 | 81 | /** 82 | * Handle ID fields 83 | */ 84 | const idFieldToSql = ( 85 | field: string, 86 | value: string, 87 | state: SqlState 88 | ): [string, SqlState] => { 89 | const [paramName, newState] = nextParam(state); 90 | return [`${field} = ${paramName}`, addValue(newState, value)]; 91 | }; 92 | 93 | /** 94 | * Handle default field comparison using ILIKE 95 | */ 96 | const defaultFieldToSql = ( 97 | field: string, 98 | value: string, 99 | state: SqlState 100 | ): [string, SqlState] => { 101 | const escapedValue = escapeSpecialChars(value); 102 | const [paramName, newState] = nextParam(state); 103 | return [ 104 | `${field} ILIKE ${paramName}`, 105 | addValue(newState, `%${escapedValue}%`), 106 | ]; 107 | }; 108 | 109 | /** 110 | * Convert a field:value pair to SQL 111 | */ 112 | const fieldToSql = ( 113 | field: string, 114 | value: string, 115 | state: SqlState 116 | ): [string, SqlState] => { 117 | switch (field.toLowerCase()) { 118 | case "date": 119 | case "timestamp": 120 | return dateFieldToSql(field, value, state); 121 | case "id": 122 | case "user_id": 123 | return idFieldToSql(field, value, state); 124 | default: 125 | return defaultFieldToSql(field, value, state); 126 | } 127 | }; 128 | 129 | /** 130 | * Convert a binary operation (AND/OR) to SQL 131 | */ 132 | const binaryOpToSql = ( 133 | operator: string, 134 | left: Expression, 135 | right: Expression, 136 | state: SqlState 137 | ): [string, SqlState] => { 138 | const [leftText, leftState] = expressionToSql(left, state); 139 | const [rightText, rightState] = expressionToSql(right, leftState); 140 | 141 | return [`(${leftText} ${operator} ${rightText})`, rightState]; 142 | }; 143 | 144 | /** 145 | * Convert a single expression to SQL 146 | */ 147 | const expressionToSql = ( 148 | expr: Expression, 149 | state: SqlState, 150 | ): [string, SqlState] => { 151 | switch (expr.type) { 152 | case "TERM": 153 | return termToSql(expr.value, state); 154 | case "FIELD": 155 | return fieldToSql(expr.key, expr.value, state); 156 | case "AND": 157 | return binaryOpToSql("AND", expr.left, expr.right, state); 158 | case "OR": 159 | return binaryOpToSql("OR", expr.left, expr.right, state); 160 | default: 161 | throw new Error(`Unknown expression type: ${(expr as any).type}`); 162 | } 163 | }; 164 | 165 | /** 166 | * Convert a SearchQuery to a SQL WHERE clause 167 | */ 168 | const searchQueryToSql = (query: SearchQuery, searchableColumns: string[]): SqlQueryResult => { 169 | const initialState: SqlState = { 170 | paramCounter: 1, 171 | values: [], 172 | searchableColumns, 173 | }; 174 | 175 | if (!query.expression) { 176 | return { text: "1=1", values: [] }; 177 | } 178 | 179 | const [text, finalState] = expressionToSql(query.expression, initialState); 180 | return { text, values: finalState.values }; 181 | }; 182 | 183 | /** 184 | * Convert a search string directly to SQL 185 | */ 186 | const searchStringToSql = (searchString: string): SqlQueryResult => { 187 | const query = parseSearchQuery(searchString); 188 | const validFields = ["color", "category", "date"]; 189 | const searchableColumns = ["title", "description", "content", "name"]; 190 | const validQuery = validate(query, validFields); 191 | 192 | console.log("Valid query:", validQuery); 193 | 194 | if (!validQuery.isValid) { 195 | throw new Error(`Invalid query: ${validQuery.errors[0].message}`); 196 | } 197 | return searchQueryToSql(query, searchableColumns); 198 | }; 199 | 200 | const testQueries = [ 201 | "comfortable AND (leather OR suede) brand:nike", 202 | 'category:"winter boots" AND (color:black OR color:brown)', 203 | "red boots color:blue date:2024-01-01", 204 | "winter boots ((user_id:123 OR admin_id:456) AND status:active)", 205 | ]; 206 | 207 | for (const query of testQueries) { 208 | console.log("\nSearch query:", query); 209 | try { 210 | const result = searchStringToSql(query); 211 | console.log("SQL:", result.text); 212 | console.log("Values:", result.values); 213 | } catch (error) { 214 | console.error( 215 | "Error converting query:", 216 | error instanceof Error ? error.message : error 217 | ); 218 | } 219 | } 220 | 221 | export { searchQueryToSql, searchStringToSql, type SqlQueryResult }; 222 | -------------------------------------------------------------------------------- /scratchpad/typescript-ast-to-sql.ts.output.txt: -------------------------------------------------------------------------------- 1 | 2 | Search query: comfortable AND (leather OR suede) brand:nike 3 | Valid query: { 4 | isValid: false, 5 | errors: [ 6 | { 7 | field: 'brand', 8 | message: 'Invalid field: "brand". Allowed fields are: color, category, date' 9 | } 10 | ] 11 | } 12 | 13 | Search query: category:"winter boots" AND (color:black OR color:brown) 14 | Valid query: { isValid: true, errors: [] } 15 | SQL: (category ILIKE $1 AND (color ILIKE $2 OR color ILIKE $3)) 16 | Values: [ '%winter boots%', '%black%', '%brown%' ] 17 | 18 | Search query: red boots color:blue date:2024-01-01 19 | Valid query: { isValid: true, errors: [] } 20 | SQL: ((((title ILIKE $1 OR description ILIKE $1 OR content ILIKE $1 OR name ILIKE $1) AND (title ILIKE $2 OR description ILIKE $2 OR content ILIKE $2 OR name ILIKE $2)) AND color ILIKE $3) AND date::date = $4::date) 21 | Values: [ '%red%', '%boots%', '%blue%', '2024-01-01' ] 22 | 23 | Search query: winter boots ((user_id:123 OR admin_id:456) AND status:active) 24 | Valid query: { 25 | isValid: false, 26 | errors: [ 27 | { 28 | field: 'user_id', 29 | message: 'Invalid field: "user_id". Allowed fields are: color, category, date' 30 | }, 31 | { 32 | field: 'admin_id', 33 | message: 'Invalid field: "admin_id". Allowed fields are: color, category, date' 34 | }, 35 | { 36 | field: 'status', 37 | message: 'Invalid field: "status". Allowed fields are: color, category, date' 38 | } 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /scratchpad/typescript-field-validator.ts: -------------------------------------------------------------------------------- 1 | import { Expression, SearchQuery } from "./typescript-expression-fields-parser"; 2 | 3 | export interface ValidationError { 4 | field: string; 5 | message: string; 6 | } 7 | 8 | export interface ValidationResult { 9 | isValid: boolean; 10 | errors: ValidationError[]; 11 | } 12 | 13 | const createFieldError = ( 14 | field: string, 15 | allowedColumns: Set 16 | ): ValidationError => ({ 17 | field, 18 | message: `Invalid field: "${field}". Allowed fields are: ${[ 19 | ...allowedColumns, 20 | ].join(", ")}`, 21 | }); 22 | 23 | const validateField = ( 24 | field: string, 25 | allowedColumns: Set 26 | ): ValidationError[] => { 27 | if (!allowedColumns.has(field.toLowerCase())) { 28 | return [createFieldError(field, allowedColumns)]; 29 | } 30 | return []; 31 | }; 32 | 33 | const validateExpression = ( 34 | expr: Expression, 35 | allowedColumns: Set 36 | ): ValidationError[] => { 37 | switch (expr.type) { 38 | case "FIELD": 39 | return validateField(expr.key, allowedColumns); 40 | case "AND": 41 | case "OR": 42 | return [ 43 | ...validateExpression(expr.left, allowedColumns), 44 | ...validateExpression(expr.right, allowedColumns), 45 | ]; 46 | case "TERM": 47 | return []; 48 | } 49 | }; 50 | 51 | export const validate = ( 52 | query: SearchQuery, 53 | columns: string[] 54 | ): ValidationResult => { 55 | const allowedColumns = new Set(columns.map((col) => col.toLowerCase())); 56 | 57 | if (!query.expression) { 58 | return { isValid: true, errors: [] }; 59 | } 60 | 61 | const errors = validateExpression(query.expression, allowedColumns); 62 | return { 63 | isValid: errors.length === 0, 64 | errors, 65 | }; 66 | }; 67 | 68 | // Example usage: 69 | const testValidation = () => { 70 | const columns = ["title", "description", "category", "status"]; 71 | 72 | const validQuery: SearchQuery = { 73 | expression: { 74 | type: "AND", 75 | left: { 76 | type: "FIELD", 77 | key: "category", 78 | value: "books", 79 | }, 80 | right: { 81 | type: "OR", 82 | left: { 83 | type: "FIELD", 84 | key: "status", 85 | value: "active", 86 | }, 87 | right: { 88 | type: "TERM", 89 | value: "fiction", 90 | }, 91 | }, 92 | }, 93 | }; 94 | 95 | const invalidQuery: SearchQuery = { 96 | expression: { 97 | type: "AND", 98 | left: { 99 | type: "FIELD", 100 | key: "invalid_field", 101 | value: "test", 102 | }, 103 | right: { 104 | type: "FIELD", 105 | key: "category", 106 | value: "books", 107 | }, 108 | }, 109 | }; 110 | 111 | console.log("Valid query validation:", validate(validQuery, columns)); 112 | console.log("Invalid query validation:", validate(invalidQuery, columns)); 113 | }; 114 | -------------------------------------------------------------------------------- /scratchpad/typescript-quoted-string-parser.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ts-node 2 | 3 | // Type definitions 4 | interface ParserState { 5 | readonly input: string; 6 | readonly position: number; 7 | } 8 | 9 | interface ParseResult { 10 | readonly state: ParserState; 11 | readonly value: T; 12 | } 13 | 14 | 15 | interface SearchQuery { 16 | readonly searchTerms: string[]; 17 | } 18 | 19 | type Parser = (state: ParserState) => ParseResult; 20 | 21 | // Parser state constructor 22 | const makeState = (input: string, position: number = 0): ParserState => ({ 23 | input, 24 | position, 25 | }); 26 | 27 | // Core parsing primitives 28 | const atEnd = (state: ParserState): boolean => 29 | state.position >= state.input.length; 30 | 31 | const currentChar = (state: ParserState): string | null => 32 | atEnd(state) ? null : state.input[state.position]; 33 | 34 | const advance = (state: ParserState): ParserState => 35 | makeState(state.input, state.position + 1); 36 | 37 | const skipWhile = 38 | (predicate: (char: string) => boolean) => 39 | (state: ParserState): ParserState => { 40 | let current = state; 41 | while (!atEnd(current) && predicate(currentChar(current)!)) { 42 | current = advance(current); 43 | } 44 | return current; 45 | }; 46 | 47 | const skipWhitespace = skipWhile((char) => /\s/.test(char)); 48 | 49 | // Parsing combinators 50 | const takeWhile = 51 | (predicate: (char: string) => boolean): Parser => 52 | (state: ParserState): ParseResult => { 53 | let result = ""; 54 | let current = state; 55 | 56 | while (!atEnd(current) && predicate(currentChar(current)!)) { 57 | result += currentChar(current); 58 | current = advance(current); 59 | } 60 | 61 | return { 62 | state: current, 63 | value: result, 64 | }; 65 | }; 66 | 67 | // Specific parsers 68 | const parseQuotedString: Parser = ( 69 | state: ParserState 70 | ): ParseResult => { 71 | state = skipWhitespace(state); 72 | if (atEnd(state) || currentChar(state) !== '"') { 73 | throw new Error("Expected opening quote"); 74 | } 75 | 76 | let current = advance(state); // Skip opening quote 77 | let result = ""; 78 | 79 | while (!atEnd(current) && currentChar(current) !== '"') { 80 | if (currentChar(current) === "\\") { 81 | current = advance(current); 82 | if (atEnd(current)) { 83 | throw new Error("Unexpected end of input after escape character"); 84 | } 85 | result += currentChar(current); 86 | } else { 87 | result += currentChar(current); 88 | } 89 | current = advance(current); 90 | } 91 | 92 | if (atEnd(current)) { 93 | throw new Error("Expected closing quote"); 94 | } 95 | 96 | return { 97 | state: advance(current), // Skip closing quote 98 | value: result, 99 | }; 100 | }; 101 | 102 | const parseWord: Parser = (state: ParserState): ParseResult => { 103 | state = skipWhitespace(state); 104 | const result = takeWhile((char) => /[^\s]/.test(char))(state); 105 | 106 | if (result.value.length === 0) { 107 | throw new Error("Expected a word"); 108 | } 109 | 110 | return result; 111 | }; 112 | 113 | const parseTerm: Parser = (state: ParserState): ParseResult => { 114 | state = skipWhitespace(state); 115 | 116 | if (atEnd(state)) { 117 | throw new Error("Unexpected end of input"); 118 | } 119 | 120 | if (currentChar(state) === '"') { 121 | return parseQuotedString(state); 122 | } else { 123 | return parseWord(state); 124 | } 125 | }; 126 | 127 | // Main parser 128 | const parseSearchQuery = (input: string): SearchQuery => { 129 | try { 130 | let state = makeState(input); 131 | const searchTerms: string[] = []; 132 | 133 | state = skipWhitespace(state); 134 | 135 | while (!atEnd(state)) { 136 | // Try to parse a token (either field-value pair or search term) 137 | try { 138 | const result = parseTerm(state); 139 | 140 | searchTerms.push(result.value as string); 141 | 142 | state = skipWhitespace(result.state); 143 | } catch (e) { 144 | // If we can't parse a token, break 145 | break; 146 | } 147 | } 148 | 149 | return { 150 | searchTerms 151 | }; 152 | } catch (error) { 153 | if (error instanceof Error) { 154 | throw new Error(`Parse error: ${error.message}`); 155 | } 156 | throw error; 157 | } 158 | }; 159 | 160 | // Example usage with test queries 161 | const test_queries = [ 162 | '"red shoes"', 163 | "red shoes", 164 | "comfortable red shoes", 165 | '"red winter shoes" warm cozy', 166 | '"quoted term" another term "yet another"', 167 | ]; 168 | 169 | for (const query of test_queries) { 170 | console.log("\nParsing query:", query); 171 | try { 172 | const result = parseSearchQuery(query); 173 | console.log("Search terms:", result.searchTerms); 174 | } catch (error) { 175 | if (error instanceof Error) { 176 | console.error("Error parsing query:", error.message); 177 | } 178 | } 179 | } 180 | 181 | // Export for use as a module 182 | export { 183 | parseSearchQuery, 184 | type SearchQuery, 185 | type ParserState, 186 | type ParseResult, 187 | type Parser, 188 | }; 189 | -------------------------------------------------------------------------------- /scratchpad/typescript-search-string-parser.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ts-node 2 | "use strict"; 3 | Object.defineProperty(exports, "__esModule", { value: true }); 4 | exports.parseSearchQuery = void 0; 5 | // Parser state constructor 6 | var makeState = function (input, position) { 7 | if (position === void 0) { position = 0; } 8 | return ({ 9 | input: input, 10 | position: position, 11 | }); 12 | }; 13 | // Core parsing primitives 14 | var atEnd = function (state) { 15 | return state.position >= state.input.length; 16 | }; 17 | var currentChar = function (state) { 18 | return atEnd(state) ? null : state.input[state.position]; 19 | }; 20 | var advance = function (state) { 21 | return makeState(state.input, state.position + 1); 22 | }; 23 | var skipWhile = function (predicate) { 24 | return function (state) { 25 | var current = state; 26 | while (!atEnd(current) && predicate(currentChar(current))) { 27 | current = advance(current); 28 | } 29 | return current; 30 | }; 31 | }; 32 | var skipWhitespace = skipWhile(function (char) { return /\s/.test(char); }); 33 | // Parsing combinators 34 | var takeWhile = function (predicate) { 35 | return function (state) { 36 | var result = ""; 37 | var current = state; 38 | while (!atEnd(current) && predicate(currentChar(current))) { 39 | result += currentChar(current); 40 | current = advance(current); 41 | } 42 | return { 43 | state: current, 44 | value: result, 45 | }; 46 | }; 47 | }; 48 | // Specific parsers 49 | var parseQuotedString = function (state) { 50 | state = skipWhitespace(state); 51 | if (atEnd(state) || currentChar(state) !== '"') { 52 | throw new Error("Expected opening quote"); 53 | } 54 | var current = advance(state); // Skip opening quote 55 | var result = ""; 56 | while (!atEnd(current) && currentChar(current) !== '"') { 57 | if (currentChar(current) === "\\") { 58 | current = advance(current); 59 | if (atEnd(current)) { 60 | throw new Error("Unexpected end of input after escape character"); 61 | } 62 | result += currentChar(current); 63 | } 64 | else { 65 | result += currentChar(current); 66 | } 67 | current = advance(current); 68 | } 69 | if (atEnd(current)) { 70 | throw new Error("Expected closing quote"); 71 | } 72 | return { 73 | state: advance(current), // Skip closing quote 74 | value: result, 75 | }; 76 | }; 77 | var parseWord = function (state) { 78 | state = skipWhitespace(state); 79 | var result = takeWhile(function (char) { return /[^\s:]/.test(char); })(state); 80 | if (result.value.length === 0) { 81 | throw new Error("Expected a word"); 82 | } 83 | return result; 84 | }; 85 | var parseTerm = function (state) { 86 | state = skipWhitespace(state); 87 | if (atEnd(state)) { 88 | throw new Error("Unexpected end of input"); 89 | } 90 | if (currentChar(state) === '"') { 91 | return parseQuotedString(state); 92 | } 93 | else { 94 | return parseWord(state); 95 | } 96 | }; 97 | var parseKeyValuePair = function (state) { 98 | state = skipWhitespace(state); 99 | var _a = parseWord(state), afterKey = _a.state, key = _a.value; 100 | if (atEnd(afterKey) || currentChar(afterKey) !== ":") { 101 | throw new Error("Expected colon after key"); 102 | } 103 | var afterColon = advance(afterKey); // Skip colon 104 | var _b = parseTerm(afterColon), afterValue = _b.state, value = _b.value; 105 | return { 106 | state: afterValue, 107 | value: { key: key.toLowerCase(), value: value }, 108 | }; 109 | }; 110 | var parseToken = function (state) { 111 | var savedState = state; 112 | // Try to parse as a key-value pair 113 | try { 114 | var result = parseKeyValuePair(state); 115 | return { 116 | state: result.state, 117 | value: result.value, 118 | }; 119 | } 120 | catch (e) { 121 | // Reset state if parsing as key-value pair fails 122 | state = savedState; 123 | } 124 | // Try to parse as a search term 125 | try { 126 | var result = parseTerm(state); 127 | return { 128 | state: result.state, 129 | value: result.value, 130 | }; 131 | } 132 | catch (e) { 133 | throw new Error("Failed to parse token"); 134 | } 135 | }; 136 | // Main parser 137 | var parseSearchQuery = function (input) { 138 | try { 139 | var state = makeState(input); 140 | var searchTerms = []; 141 | var fields = {}; 142 | state = skipWhitespace(state); 143 | while (!atEnd(state)) { 144 | // Try to parse a token (either field-value pair or search term) 145 | try { 146 | var result = parseToken(state); 147 | if (typeof result.value === "object" && "key" in result.value) { 148 | // It's a KeyValuePair 149 | fields[result.value.key] = result.value.value; 150 | } 151 | else { 152 | // It's a search term 153 | searchTerms.push(result.value); 154 | } 155 | state = skipWhitespace(result.state); 156 | } 157 | catch (e) { 158 | // If we can't parse a token, break 159 | break; 160 | } 161 | } 162 | return { 163 | searchTerms: searchTerms, 164 | fields: fields, 165 | }; 166 | } 167 | catch (error) { 168 | if (error instanceof Error) { 169 | throw new Error("Parse error: ".concat(error.message)); 170 | } 171 | throw error; 172 | } 173 | }; 174 | exports.parseSearchQuery = parseSearchQuery; 175 | // Example usage with test queries 176 | var test_queries = [ 177 | '"red shoes" category:clothing size:10 color:red brand:nike', 178 | "red shoes category:clothing size:10 color:red brand:nike", 179 | "comfortable red shoes category:clothing size:10", 180 | 'category:clothing "red winter shoes" warm cozy', 181 | '"quoted term" another term yet:another', 182 | ]; 183 | for (var _i = 0, test_queries_1 = test_queries; _i < test_queries_1.length; _i++) { 184 | var query = test_queries_1[_i]; 185 | console.log("\nParsing query:", query); 186 | try { 187 | var result = parseSearchQuery(query); 188 | console.log("Search terms:", result.searchTerms); 189 | console.log("Fields:"); 190 | for (var _a = 0, _b = Object.entries(result.fields); _a < _b.length; _a++) { 191 | var _c = _b[_a], key = _c[0], value = _c[1]; 192 | console.log(" ".concat(key, ": ").concat(value)); 193 | } 194 | } 195 | catch (error) { 196 | if (error instanceof Error) { 197 | console.error("Error parsing query:", error.message); 198 | } 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /scratchpad/typescript-string-parser.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ts-node 2 | 3 | // Type definitions 4 | interface ParserState { 5 | readonly input: string; 6 | readonly position: number; 7 | } 8 | 9 | interface ParseResult { 10 | readonly state: ParserState; 11 | readonly value: T; 12 | } 13 | 14 | interface SearchQuery { 15 | readonly searchTerms: string[]; 16 | } 17 | 18 | type Parser = (state: ParserState) => ParseResult; 19 | 20 | // Parser state constructor 21 | const makeState = (input: string, position: number = 0): ParserState => ({ 22 | input, 23 | position, 24 | }); 25 | 26 | // Core parsing primitives 27 | const atEnd = (state: ParserState): boolean => 28 | state.position >= state.input.length; 29 | 30 | const currentChar = (state: ParserState): string | null => 31 | atEnd(state) ? null : state.input[state.position]; 32 | 33 | const advance = (state: ParserState): ParserState => 34 | makeState(state.input, state.position + 1); 35 | 36 | const skipWhile = 37 | (predicate: (char: string) => boolean) => 38 | (state: ParserState): ParserState => { 39 | let current = state; 40 | while (!atEnd(current) && predicate(currentChar(current)!)) { 41 | current = advance(current); 42 | } 43 | return current; 44 | }; 45 | 46 | const skipWhitespace = skipWhile((char) => /\s/.test(char)); 47 | 48 | // Parsing combinators 49 | const takeWhile = 50 | (predicate: (char: string) => boolean): Parser => 51 | (state: ParserState): ParseResult => { 52 | let result = ""; 53 | let current = state; 54 | 55 | while (!atEnd(current) && predicate(currentChar(current)!)) { 56 | result += currentChar(current); 57 | current = advance(current); 58 | } 59 | 60 | return { 61 | state: current, 62 | value: result, 63 | }; 64 | }; 65 | 66 | const parseTerm: Parser = (state: ParserState): ParseResult => { 67 | state = skipWhitespace(state); 68 | 69 | const result = takeWhile((char) => /[^\s]/.test(char))(state); 70 | 71 | if (result.value.length === 0) { 72 | throw new Error("Expected a word"); 73 | } 74 | 75 | return result; 76 | }; 77 | 78 | // Main parser 79 | const parseSearchQuery = (input: string): SearchQuery => { 80 | try { 81 | let state = makeState(input); 82 | const searchTerms: string[] = []; 83 | 84 | state = skipWhitespace(state); 85 | 86 | while (!atEnd(state)) { 87 | try { 88 | const result = parseTerm(state); 89 | 90 | searchTerms.push(result.value as string); 91 | 92 | state = skipWhitespace(result.state); 93 | } catch (e) { 94 | // If we can't parse a token, break 95 | break; 96 | } 97 | } 98 | 99 | return { 100 | searchTerms, 101 | }; 102 | } catch (error) { 103 | if (error instanceof Error) { 104 | throw new Error(`Parse error: ${error.message}`); 105 | } 106 | throw error; 107 | } 108 | }; 109 | 110 | // Example usage with test queries 111 | const test_queries = [ 112 | "red shoes", 113 | "comfortable red shoes", 114 | ]; 115 | 116 | for (const query of test_queries) { 117 | console.log("\nParsing query:", query); 118 | try { 119 | const result = parseSearchQuery(query); 120 | console.log("Search terms:", result.searchTerms); 121 | } catch (error) { 122 | if (error instanceof Error) { 123 | console.error("Error parsing query:", error.message); 124 | } 125 | } 126 | } 127 | 128 | // Export for use as a module 129 | export { 130 | parseSearchQuery, 131 | type SearchQuery, 132 | type ParserState, 133 | type ParseResult, 134 | type Parser, 135 | }; 136 | -------------------------------------------------------------------------------- /scratchpad/typescript_nested_parens.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ts-node 2 | 3 | // Type definitions 4 | interface ParserState { 5 | readonly input: string; 6 | readonly position: number; 7 | } 8 | 9 | interface ParseResult { 10 | readonly state: ParserState; 11 | readonly value: T; 12 | } 13 | 14 | interface SearchQuery { 15 | readonly searchTerms: string[]; 16 | } 17 | 18 | type Parser = (state: ParserState) => ParseResult; 19 | 20 | // Parser state constructor 21 | const makeState = (input: string, position: number = 0): ParserState => ({ 22 | input, 23 | position, 24 | }); 25 | 26 | // Core parsing primitives 27 | const atEnd = (state: ParserState): boolean => 28 | state.position >= state.input.length; 29 | 30 | const currentChar = (state: ParserState): string | null => 31 | atEnd(state) ? null : state.input[state.position]; 32 | 33 | const advance = (state: ParserState): ParserState => 34 | makeState(state.input, state.position + 1); 35 | 36 | const skipWhile = 37 | (predicate: (char: string) => boolean) => 38 | (state: ParserState): ParserState => { 39 | let current = state; 40 | while (!atEnd(current) && predicate(currentChar(current)!)) { 41 | current = advance(current); 42 | } 43 | return current; 44 | }; 45 | 46 | const skipWhitespace = skipWhile((char) => /\s/.test(char)); 47 | 48 | // Parsing combinators 49 | const takeWhile = 50 | (predicate: (char: string) => boolean): Parser => 51 | (state: ParserState): ParseResult => { 52 | let result = ""; 53 | let current = state; 54 | 55 | while (!atEnd(current) && predicate(currentChar(current)!)) { 56 | result += currentChar(current); 57 | current = advance(current); 58 | } 59 | 60 | return { 61 | state: current, 62 | value: result, 63 | }; 64 | }; 65 | 66 | // Type definitions for AST nodes 67 | type Atom = string; 68 | type List = Expression[]; 69 | type Expression = Atom | List; 70 | 71 | // Parser combinators for specific syntax elements 72 | const atom: Parser = (state: ParserState): ParseResult => { 73 | state = skipWhitespace(state); 74 | const result = takeWhile((char) => /[^\s()]/.test(char))(state); 75 | 76 | if (result.value.length === 0) { 77 | throw new Error(`Expected atom at position ${state.position}`); 78 | } 79 | 80 | return result; 81 | }; 82 | 83 | const list: Parser = (state: ParserState): ParseResult => { 84 | state = skipWhitespace(state); 85 | 86 | // Check opening parenthesis 87 | if (currentChar(state) !== "(") { 88 | throw new Error(`Expected '(' at position ${state.position}`); 89 | } 90 | state = advance(state); 91 | 92 | // Parse expressions until closing parenthesis 93 | const expressions: Expression[] = []; 94 | state = skipWhitespace(state); 95 | 96 | while (!atEnd(state) && currentChar(state) !== ")") { 97 | const result = expression(state); 98 | expressions.push(result.value); 99 | state = result.state; 100 | state = skipWhitespace(state); 101 | } 102 | 103 | // Check closing parenthesis 104 | if (currentChar(state) !== ")") { 105 | throw new Error(`Expected ')' at position ${state.position}`); 106 | } 107 | state = advance(state); 108 | 109 | return { 110 | state, 111 | value: expressions, 112 | }; 113 | }; 114 | 115 | const expression: Parser = ( 116 | state: ParserState 117 | ): ParseResult => { 118 | state = skipWhitespace(state); 119 | 120 | if (atEnd(state)) { 121 | throw new Error(`Unexpected end of input at position ${state.position}`); 122 | } 123 | 124 | // Check if we're starting a list or an atom 125 | if (currentChar(state) === "(") { 126 | return list(state); 127 | } else { 128 | return atom(state); 129 | } 130 | }; 131 | 132 | // Main parse function 133 | const parse = (input: string): Expression => { 134 | try { 135 | const initialState = makeState(input); 136 | const result = expression(initialState); 137 | 138 | // Check if we've consumed all input 139 | const finalState = skipWhitespace(result.state); 140 | if (!atEnd(finalState)) { 141 | throw new Error(`Unexpected input at position ${finalState.position}`); 142 | } 143 | 144 | return result.value; 145 | } catch (error) { 146 | if (error instanceof Error) { 147 | throw new Error(`Parse error: ${error.message}`); 148 | } 149 | throw error; 150 | } 151 | }; 152 | 153 | // Helper function to stringify the parsed result 154 | const stringify = (expr: Expression): string => { 155 | if (Array.isArray(expr)) { 156 | return `(${expr.map(stringify).join(" ")})`; 157 | } 158 | return expr; 159 | }; 160 | 161 | // Test function 162 | const testParse = (input: string) => { 163 | console.log("\nParsing:", input); 164 | try { 165 | const result = parse(input); 166 | console.log("\nFinal Result:", result); 167 | console.log("Pretty printed:", stringify(result)); 168 | } catch (error) { 169 | if (error instanceof Error) { 170 | console.log("Error:", error.message); 171 | } 172 | } 173 | }; 174 | 175 | // Test cases showing paren matching 176 | const testCases = [ 177 | "((one) two three)", 178 | "((a b) (c d) e)", 179 | "(a)", 180 | "((a (b c)) d)", 181 | "(((deeply) nested) parens)" 182 | ]; 183 | 184 | // Run tests 185 | testCases.forEach(testParse); -------------------------------------------------------------------------------- /search-input-query-demo/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | 26 | .vite/ 27 | -------------------------------------------------------------------------------- /search-input-query-demo/README.md: -------------------------------------------------------------------------------- 1 | # Search Input Query 2 | 3 | ## Demo 4 | 5 | ![sqi-demo](https://github.com/user-attachments/assets/1463555b-91a3-4f7b-9e0e-4172dd78afdd) 6 | 7 | ### Running Demo 8 | 9 | ```bash 10 | cd search-input-query-demo 11 | npm install 12 | npm run dev 13 | ``` 14 | -------------------------------------------------------------------------------- /search-input-query-demo/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js' 2 | import globals from 'globals' 3 | import reactHooks from 'eslint-plugin-react-hooks' 4 | import reactRefresh from 'eslint-plugin-react-refresh' 5 | import tseslint from 'typescript-eslint' 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist'] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ['**/*.{ts,tsx}'], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | 'react-hooks': reactHooks, 18 | 'react-refresh': reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | 'react-refresh/only-export-components': [ 23 | 'warn', 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /search-input-query-demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Search Input Query 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /search-input-query-demo/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "search-input-query-demo", 3 | "private": true, 4 | "version": "0.7.2", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc -b && vite build", 9 | "lint": "eslint .", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "@monaco-editor/react": "^4.7.0", 14 | "react": "^19.0.0", 15 | "react-dom": "^19.0.0", 16 | "search-input-query-parser": "^0.7.2", 17 | "search-input-query-react": "^0.7.2", 18 | "sql.js": "^1.12.0" 19 | }, 20 | "devDependencies": { 21 | "@eslint/js": "^9.21.0", 22 | "@types/react": "^19.0.10", 23 | "@types/react-dom": "^19.0.4", 24 | "@types/sql.js": "^1.4.9", 25 | "@vitejs/plugin-react": "^4.3.4", 26 | "eslint": "^9.21.0", 27 | "eslint-plugin-react-hooks": "^5.2.0", 28 | "eslint-plugin-react-refresh": "^0.4.19", 29 | "globals": "^16.0.0", 30 | "typescript": "~5.8.2", 31 | "typescript-eslint": "^8.26.0", 32 | "vite": "^6.2.0" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /search-input-query-demo/src/App.tsx: -------------------------------------------------------------------------------- 1 | import SearchComponent from "./SearchComponent"; 2 | import "./App.css"; 3 | 4 | /* 5 | <|SYSTEM_START|> 6 | DO NOT USE TAILWIND CSS IN THIS PROJECT 7 | DO NOT UNCLUDE ANY ADDITIONAL LIBRARIES UNLESS SPECIFIED IN THE INSTRUCTIONS 8 | <|SYSTEM_END|> 9 | */ 10 | 11 | function App() { 12 | return ( 13 |
14 |
15 |

16 | Search Input Query 17 |

18 | 19 |
20 |
21 | ); 22 | } 23 | 24 | export default App; 25 | -------------------------------------------------------------------------------- /search-input-query-demo/src/ExpressionDescription.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Expression } from "search-input-query-parser"; 3 | 4 | // Helper function to convert Expression to English description 5 | const expressionToEnglish = ( 6 | expr: Expression | null, 7 | depth: number = 0 8 | ): string => { 9 | if (!expr) { 10 | return ""; 11 | } 12 | 13 | const indent = " ".repeat(depth); 14 | 15 | switch (expr.type) { 16 | case "SEARCH_TERM": 17 | return `${indent}Search for "${expr.value}"`; 18 | 19 | case "FIELD_VALUE": { 20 | const value = expr.value.value; 21 | // Check if the value ends with a wildcard 22 | if (value.endsWith("*")) { 23 | return `${indent}${expr.field.value} starts with "${value.slice( 24 | 0, 25 | -1 26 | )}"`; 27 | } 28 | return `${indent}${expr.field.value} is "${value}"`; 29 | } 30 | 31 | case "RANGE": { 32 | const fieldName = expr.field.value; 33 | 34 | if (expr.operator === "BETWEEN") { 35 | return `${indent}${fieldName} is between ${expr.value.value} and ${expr.value2?.value}`; 36 | } 37 | 38 | const operatorText = { 39 | ">": "greater than", 40 | ">=": "greater than or equal to", 41 | "<": "less than", 42 | "<=": "less than or equal to", 43 | }[expr.operator]; 44 | 45 | return `${indent}${fieldName} is ${operatorText} ${expr.value.value}`; 46 | } 47 | 48 | case "NOT": 49 | return `${indent}NOT:\n${expressionToEnglish( 50 | expr.expression, 51 | depth + 1 52 | )}`; 53 | 54 | case "AND": 55 | return `${indent}ALL of:\n${expressionToEnglish( 56 | expr.left, 57 | depth + 1 58 | )}\n${expressionToEnglish(expr.right, depth + 1)}`; 59 | 60 | case "OR": 61 | return `${indent}ANY of:\n${expressionToEnglish( 62 | expr.left, 63 | depth + 1 64 | )}\n${expressionToEnglish(expr.right, depth + 1)}`; 65 | 66 | case "WILDCARD": 67 | if (expr.prefix === "") { 68 | return `${indent}Match all records`; 69 | } 70 | return `${indent}starts with "${expr.prefix}"`; 71 | 72 | case "IN": { 73 | const values = expr.values.map((v) => `"${v.value}"`).join(", "); 74 | return `${indent}${expr.field.value} is in [${values}]`; 75 | } 76 | } 77 | }; 78 | 79 | interface ExpressionDescriptionProps { 80 | expression: Expression | null; 81 | } 82 | 83 | export const ExpressionDescription: React.FC = ({ 84 | expression, 85 | }) => { 86 | const description = expressionToEnglish(expression); 87 | 88 | if (!description) { 89 | return null; 90 | } 91 | 92 | return ( 93 |
94 |

Query Description:

95 |
{description}
96 |
97 | ); 98 | }; 99 | -------------------------------------------------------------------------------- /search-input-query-demo/src/SearchTypeSelector.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { SearchType } from "search-input-query-parser/search-query-to-sql"; 3 | 4 | interface SearchTypeOption { 5 | value: SearchType; 6 | label: string; 7 | description: string; 8 | } 9 | 10 | const searchTypes: SearchTypeOption[] = [ 11 | { 12 | value: "ilike", 13 | label: "ILIKE", 14 | description: "Case-insensitive pattern matching using LIKE", 15 | }, 16 | { 17 | value: "tsvector", 18 | label: "Full Text Search", 19 | description: "PostgreSQL native full text search using tsvector/tsquery", 20 | }, 21 | { 22 | value: "paradedb", 23 | label: "ParadeDB", 24 | description: "Full text search using ParadeDB's @@@ operator", 25 | }, 26 | ]; 27 | 28 | interface SearchTypeSelectorProps { 29 | searchType: SearchType; 30 | onSearchTypeChange: (type: SearchType) => void; 31 | } 32 | 33 | const SearchTypeSelector: React.FC = ({ 34 | searchType, 35 | onSearchTypeChange, 36 | }) => { 37 | return ( 38 |
39 |

Search Type:

40 |
41 | {searchTypes.map((type) => ( 42 |
43 | onSearchTypeChange(e.target.value as SearchType)} 50 | /> 51 | 55 |
56 | ))} 57 |
58 |
59 | ); 60 | }; 61 | 62 | export default SearchTypeSelector; 63 | -------------------------------------------------------------------------------- /search-input-query-demo/src/db-service.ts: -------------------------------------------------------------------------------- 1 | // db-service.ts 2 | import type { Database, SqlJsStatic } from "sql.js"; 3 | import initSqlJs from "sql.js"; 4 | 5 | export interface Product { 6 | id: number; 7 | title: string; 8 | description: string; 9 | status: string; 10 | category: string; 11 | price: number; 12 | date: string; 13 | } 14 | 15 | type QueryResult = { 16 | columns: string[]; 17 | values: unknown[][]; 18 | }[]; 19 | 20 | interface DatabaseState { 21 | db: Database | null; 22 | initialized: boolean; 23 | initializationPromise: Promise | null; 24 | } 25 | 26 | const state: DatabaseState = { 27 | db: null, 28 | initialized: false, 29 | initializationPromise: null, 30 | }; 31 | 32 | const sampleData: Omit[] = [ 33 | { 34 | title: "Winter Boots", 35 | description: "Comfortable winter boots for extreme cold", 36 | status: "in stock", 37 | category: "winter boots", 38 | price: 129.99, 39 | date: "2024-01-15", 40 | }, 41 | { 42 | title: "Summer Sandals", 43 | description: "Light and breathable sandals for beach", 44 | status: "in stock", 45 | category: "summer shoes", 46 | price: 49.99, 47 | date: "2024-03-01", 48 | }, 49 | { 50 | title: "Running Shoes Nike Air", 51 | description: "Professional running shoes with air cushioning", 52 | status: "low stock", 53 | category: "athletic shoes", 54 | price: 159.99, 55 | date: "2024-02-15", 56 | }, 57 | { 58 | title: "Leather Boots", 59 | description: "Classic leather boots for all occasions", 60 | status: "out of stock", 61 | category: "boots", 62 | price: 199.99, 63 | date: "2023-12-01", 64 | }, 65 | { 66 | title: "Hiking Boots", 67 | description: "Waterproof hiking boots for mountain terrain", 68 | status: "in stock", 69 | category: "outdoor boots", 70 | price: 179.99, 71 | date: "2024-01-20", 72 | }, 73 | { 74 | title: "Dress Shoes Black", 75 | description: "Elegant black dress shoes for formal occasions", 76 | status: "in stock", 77 | category: "formal shoes", 78 | price: 149.99, 79 | date: "2024-02-01", 80 | }, 81 | { 82 | title: "Canvas Sneakers", 83 | description: "Casual canvas sneakers for everyday wear", 84 | status: "in stock", 85 | category: "casual shoes", 86 | price: 39.99, 87 | date: "2024-03-10", 88 | }, 89 | { 90 | title: "Kids Sport Shoes", 91 | description: "Durable sports shoes for active kids", 92 | status: "low stock", 93 | category: "children shoes", 94 | price: 49.99, 95 | date: "2024-02-20", 96 | }, 97 | ]; 98 | 99 | export async function initialize(): Promise { 100 | if (state.initialized) return; 101 | if (state.initializationPromise) return state.initializationPromise; 102 | 103 | state.initializationPromise = (async () => { 104 | const SQL: SqlJsStatic = await initSqlJs({ 105 | locateFile: (file: string) => `https://sql.js.org/dist/${file}`, 106 | }); 107 | 108 | state.db = new SQL.Database(); 109 | 110 | // Create the products table 111 | state.db.run(` 112 | CREATE TABLE products ( 113 | id INTEGER PRIMARY KEY, 114 | title TEXT, 115 | description TEXT, 116 | status TEXT, 117 | category TEXT, 118 | price REAL, 119 | date TEXT 120 | ); 121 | `); 122 | 123 | const stmt = state.db.prepare(` 124 | INSERT INTO products (title, description, status, category, price, date) 125 | VALUES (?, ?, ?, ?, ?, ?); 126 | `); 127 | 128 | sampleData.forEach((product) => { 129 | stmt.run([ 130 | product.title, 131 | product.description, 132 | product.status, 133 | product.category, 134 | product.price, 135 | product.date, 136 | ]); 137 | }); 138 | 139 | stmt.free(); 140 | state.initialized = true; 141 | })(); 142 | 143 | return state.initializationPromise; 144 | } 145 | 146 | export async function executeQuery( 147 | sqlQuery: string, 148 | params: (string | number)[] = [] 149 | ): Promise { 150 | await initialize(); 151 | 152 | if (!state.db) { 153 | throw new Error("Database not initialized"); 154 | } 155 | 156 | try { 157 | const results: QueryResult = state.db.exec(sqlQuery, params); 158 | 159 | if (!results || results.length === 0) { 160 | return []; 161 | } 162 | 163 | const columns = results[0].columns; 164 | return results[0].values.map((row) => { 165 | const product = columns.reduce>( 166 | (acc, col, index) => ({ 167 | ...acc, 168 | [col]: row[index], 169 | }), 170 | {} 171 | ); 172 | 173 | return { 174 | id: Number(product.id), 175 | title: String(product.title), 176 | description: String(product.description), 177 | status: String(product.status), 178 | category: String(product.category), 179 | price: Number(product.price), 180 | date: String(product.date), 181 | }; 182 | }); 183 | } catch (error) { 184 | if (error instanceof Error) { 185 | throw new Error(`Database query error: ${error.message}`); 186 | } 187 | throw new Error("Unknown database error occurred"); 188 | } 189 | } 190 | 191 | export async function searchProducts( 192 | whereClause: string, 193 | params: (string | number)[] = [] 194 | ): Promise { 195 | const query = ` 196 | SELECT * 197 | FROM products 198 | WHERE ${whereClause || "1=1"} 199 | `; 200 | const result = await executeQuery(query, params); 201 | return result; 202 | } 203 | -------------------------------------------------------------------------------- /search-input-query-demo/src/index.css: -------------------------------------------------------------------------------- 1 | :root { 2 | font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif; 3 | line-height: 1.5; 4 | font-weight: 400; 5 | 6 | color-scheme: light dark; 7 | color: rgba(255, 255, 255, 0.87); 8 | background-color: #242424; 9 | 10 | font-synthesis: none; 11 | text-rendering: optimizeLegibility; 12 | -webkit-font-smoothing: antialiased; 13 | -moz-osx-font-smoothing: grayscale; 14 | } 15 | 16 | a { 17 | font-weight: 500; 18 | color: #646cff; 19 | text-decoration: inherit; 20 | } 21 | a:hover { 22 | color: #535bf2; 23 | } 24 | 25 | body { 26 | margin: 0; 27 | display: flex; 28 | /* place-items: center; */ 29 | min-width: 320px; 30 | min-height: 100vh; 31 | } 32 | 33 | h1 { 34 | font-size: 3.2em; 35 | line-height: 1.1; 36 | } 37 | 38 | button { 39 | border-radius: 8px; 40 | border: 1px solid transparent; 41 | padding: 0.6em 1.2em; 42 | font-size: 1em; 43 | font-weight: 500; 44 | font-family: inherit; 45 | background-color: #1a1a1a; 46 | cursor: pointer; 47 | transition: border-color 0.25s; 48 | } 49 | button:hover { 50 | border-color: #646cff; 51 | } 52 | button:focus, 53 | button:focus-visible { 54 | outline: 4px auto -webkit-focus-ring-color; 55 | } 56 | 57 | @media (prefers-color-scheme: light) { 58 | :root { 59 | color: #213547; 60 | background-color: #ffffff; 61 | } 62 | a:hover { 63 | color: #747bff; 64 | } 65 | button { 66 | background-color: #f9f9f9; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /search-input-query-demo/src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from 'react' 2 | import { createRoot } from 'react-dom/client' 3 | import './index.css' 4 | import App from './App.tsx' 5 | 6 | createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | , 10 | ) 11 | -------------------------------------------------------------------------------- /search-input-query-demo/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /search-input-query-demo/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "incremental": true, 4 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 5 | "target": "ES2020", 6 | "useDefineForClassFields": true, 7 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 8 | "module": "ESNext", 9 | "skipLibCheck": true, 10 | /* Bundler mode */ 11 | "moduleResolution": "bundler", 12 | "allowImportingTsExtensions": true, 13 | "isolatedModules": true, 14 | "moduleDetection": "force", 15 | "noEmit": true, 16 | "jsx": "react-jsx", 17 | /* Linting */ 18 | "strict": true, 19 | "noUnusedLocals": true, 20 | "noUnusedParameters": true, 21 | "noFallthroughCasesInSwitch": true 22 | }, 23 | "include": ["src"] 24 | } -------------------------------------------------------------------------------- /search-input-query-demo/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "./tsconfig.app.json" }, 5 | { "path": "./tsconfig.node.json" } 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /search-input-query-demo/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "incremental": true, 4 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 5 | "target": "ES2022", 6 | "lib": ["ES2023"], 7 | "module": "ESNext", 8 | "skipLibCheck": true, 9 | /* Bundler mode */ 10 | "moduleResolution": "Bundler", 11 | "allowImportingTsExtensions": true, 12 | "isolatedModules": true, 13 | "moduleDetection": "force", 14 | "noEmit": true, 15 | /* Linting */ 16 | "strict": true, 17 | "noUnusedLocals": true, 18 | "noUnusedParameters": true, 19 | "noFallthroughCasesInSwitch": true 20 | }, 21 | "include": ["vite.config.ts"] 22 | } 23 | -------------------------------------------------------------------------------- /search-input-query-demo/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import react from '@vitejs/plugin-react' 3 | 4 | // https://vite.dev/config/ 5 | export default defineConfig({ 6 | plugins: [react()], 7 | }) 8 | -------------------------------------------------------------------------------- /search-input-query-parser/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | .DS_Store -------------------------------------------------------------------------------- /search-input-query-parser/README.md: -------------------------------------------------------------------------------- 1 | # Search Input Query 2 | 3 | ## Parser 4 | 5 | ### Installation 6 | 7 | ```typescript 8 | npm install search-input-query-parser 9 | ``` 10 | 11 | ### Basic Usage 12 | 13 | ```typescript 14 | import { 15 | parseSearchInputQuery, 16 | type FieldSchema 17 | } from 'search-input-query-parser'; 18 | 19 | // Define your field schemas 20 | const schemas: FieldSchema[] = [ 21 | { name: 'title', type: 'string' }, 22 | { name: 'price', type: 'number' }, 23 | { name: 'date', type: 'date' }, 24 | { name: 'in_stock', type: 'boolean' } 25 | ]; 26 | 27 | // Parse a search query 28 | const query = 'title:"winter boots" AND price:>100'; 29 | const result = parseSearchInputQuery(query, schemas); 30 | 31 | if (result.type === 'SEARCH_QUERY') { 32 | // Handle successful parse where the expression is in AST format 33 | console.log(result.expression); 34 | } else { 35 | // Handle validation errors 36 | console.log(result.errors); 37 | } 38 | ``` 39 | ## SQL Conversion 40 | 41 | The parser can convert search queries into SQL WHERE clauses using three different search strategies: 42 | 43 | 1. ILIKE - Case-insensitive pattern matching 44 | 2. tsvector - PostgreSQL full-text search 45 | 3. ParadeDB - BM25-based full-text search 46 | 47 | ### Basic Usage 48 | 49 | ```typescript 50 | import { 51 | searchStringToIlikeSql, 52 | searchStringToTsVectorSql, 53 | searchStringToParadeDBSql, 54 | type FieldSchema 55 | } from 'search-input-query-parser'; 56 | 57 | // Define searchable columns and schemas 58 | const searchableColumns = ['title', 'description']; 59 | const schemas: FieldSchema[] = [ 60 | { name: 'title', type: 'string' }, 61 | { name: 'price', type: 'number' }, 62 | { name: 'date', type: 'date' }, 63 | { name: 'in_stock', type: 'boolean' } 64 | ]; 65 | 66 | // Convert a search query to SQL 67 | const query = 'winter boots AND price:>100'; 68 | 69 | // Using ILIKE (case-insensitive pattern matching) 70 | const ilikeResult = searchStringToIlikeSql(query, searchableColumns, schemas); 71 | // Result: 72 | // { 73 | // text: "((lower(title) LIKE lower($1) OR lower(description) LIKE lower($1)) AND price > $2)", 74 | // values: ["%winter boots%", 100] 75 | // } 76 | 77 | // Using tsvector (PostgreSQL full-text search) 78 | const tsvectorResult = searchStringToTsVectorSql(query, searchableColumns, schemas); 79 | // Result: 80 | // { 81 | // text: "((to_tsvector('english', title) || to_tsvector('english', description)) @@ plainto_tsquery('english', $1) AND price > $2)", 82 | // values: ["winter boots", 100] 83 | // } 84 | 85 | // Using ParadeDB (BM25 search) 86 | const paradedbResult = searchStringToParadeDBSql(query, searchableColumns, schemas); 87 | // Result: 88 | // { 89 | // text: "((title @@@ $1 OR description @@@ $1) AND price @@@ '>' || $2)", 90 | // values: ['"winter boots"', 100] 91 | // } 92 | ``` 93 | 94 | ### Search Types Comparison 95 | 96 | | Feature | ILIKE | tsvector | ParadeDB | 97 | |---------|-------|----------|----------| 98 | | Case Sensitivity | Case-insensitive | Case-insensitive | Case-sensitive | 99 | | Pattern Matching | Simple wildcards | Language-aware tokens | BM25 ranking | 100 | | Performance | Slower on large datasets | Fast with proper indexes | Fast with proper indexes | 101 | | Setup Required | None | PostgreSQL extension | ParadeDB extension | 102 | | Best For | Simple searches, small datasets | Advanced text search | Relevance-based search | 103 | 104 | ### Configuration Options 105 | 106 | ```typescript 107 | // Common options for all search types 108 | interface SearchQueryOptions { 109 | language?: string; // Language for text analysis (default: 'english') 110 | } 111 | 112 | // Example with options 113 | const result = searchStringToTsVectorSql( 114 | query, 115 | searchableColumns, 116 | schemas, 117 | { 118 | language: 'spanish' 119 | } 120 | ); 121 | ``` 122 | 123 | ### Using with Raw SQL 124 | 125 | The converters return objects with `text` (the WHERE clause) and `values` (the parameter values): 126 | 127 | ```typescript 128 | import { searchStringToIlikeSql } from 'search-input-query-parser'; 129 | 130 | const base = 'SELECT * FROM products'; 131 | const { text, values } = searchStringToIlikeSql(query, searchableColumns, schemas); 132 | const fullQuery = `${base} WHERE ${text}`; 133 | 134 | // Using with node-postgres 135 | const result = await client.query(fullQuery, values); 136 | 137 | // Using with Prisma 138 | const result = await prisma.$queryRaw`${Prisma.raw(base)} WHERE ${Prisma.raw(text)}`; 139 | ``` 140 | -------------------------------------------------------------------------------- /search-input-query-parser/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "search-input-query-parser", 3 | "version": "0.7.5", 4 | "description": "A parser for advanced search query syntax with field:value support", 5 | "keywords": [ 6 | "search", 7 | "parser", 8 | "query", 9 | "typescript" 10 | ], 11 | "author": "William Cotton", 12 | "repository": { 13 | "type": "git", 14 | "url": "https://github.com/williamcotton/search-input-query" 15 | }, 16 | "files": [ 17 | "dist", 18 | "src" 19 | ], 20 | "exports": { 21 | ".": { 22 | "types": "./dist/types/index.d.ts", 23 | "import": "./dist/esm/index.js", 24 | "require": "./dist/cjs/index.js" 25 | }, 26 | "./parser": { 27 | "types": "./dist/types/parser.d.ts", 28 | "import": "./dist/esm/parser.js", 29 | "require": "./dist/cjs/parser.js" 30 | }, 31 | "./validator": { 32 | "types": "./dist/types/validator.d.ts", 33 | "import": "./dist/esm/validator.js", 34 | "require": "./dist/cjs/validator.js" 35 | }, 36 | "./search-query-to-sql": { 37 | "types": "./dist/types/search-query-to-sql.d.ts", 38 | "import": "./dist/esm/search-query-to-sql.js", 39 | "require": "./dist/cjs/search-query-to-sql.js" 40 | } 41 | }, 42 | "scripts": { 43 | "test": "jest", 44 | "test:watch": "jest --watch", 45 | "type-check": "tsc --noEmit", 46 | "build": "npm run build:esm && npm run build:cjs && npm run build:types", 47 | "build:esm": "tsc -p tsconfig.esm.json", 48 | "build:cjs": "tsc -p tsconfig.cjs.json", 49 | "build:types": "tsc -p tsconfig.types.json", 50 | "prepublishOnly": "npm run build" 51 | }, 52 | "jest": { 53 | "preset": "ts-jest", 54 | "testEnvironment": "node", 55 | "testMatch": [ 56 | "**/*.test.ts" 57 | ] 58 | }, 59 | "license": "ISC", 60 | "devDependencies": { 61 | "@types/jest": "^29.5.14", 62 | "@types/node": "^22.15.3", 63 | "jest": "^29.7.0", 64 | "npm-check-updates": "^18.0.1", 65 | "ts-jest": "^29.3.2", 66 | "typescript": "^5.8.3" 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /search-input-query-parser/src/first-pass-parser.ts: -------------------------------------------------------------------------------- 1 | import { TokenType, TokenStream, currentToken, advanceStream } from "./lexer"; 2 | import { parsePrimary } from "./parse-primary"; 3 | import { SearchQueryErrorCode } from "./validator"; 4 | 5 | // First Pass AST types (from tokenizer/parser) 6 | export type PositionLength = { 7 | position: number; 8 | length: number; 9 | }; 10 | 11 | export type StringLiteral = { 12 | readonly type: "STRING"; 13 | readonly value: string; 14 | readonly quoted: boolean; 15 | } & PositionLength; 16 | 17 | export type WildcardPattern = { 18 | readonly type: "WILDCARD"; 19 | readonly prefix: string; 20 | readonly quoted: boolean; 21 | } & PositionLength; 22 | 23 | export type AndExpression = { 24 | readonly type: "AND"; 25 | readonly left: FirstPassExpression; 26 | readonly right: FirstPassExpression; 27 | } & PositionLength; 28 | 29 | export type OrExpression = { 30 | readonly type: "OR"; 31 | readonly left: FirstPassExpression; 32 | readonly right: FirstPassExpression; 33 | } & PositionLength; 34 | 35 | export type NotExpression = { 36 | readonly type: "NOT"; 37 | readonly expression: FirstPassExpression; 38 | } & PositionLength; 39 | 40 | export type InExpression = { 41 | readonly type: "IN"; 42 | readonly field: string; 43 | readonly values: string[]; 44 | } & PositionLength; 45 | 46 | export type FirstPassExpression = 47 | | StringLiteral 48 | | WildcardPattern 49 | | AndExpression 50 | | OrExpression 51 | | NotExpression 52 | | InExpression; 53 | 54 | // Parser functions 55 | export interface ParseResult { 56 | readonly result: T; 57 | readonly stream: TokenStream; 58 | } 59 | 60 | const getOperatorPrecedence = (type: TokenType): number => 61 | type === TokenType.AND ? 2 : type === TokenType.OR ? 1 : 0; 62 | 63 | export const parseExpression = ( 64 | stream: TokenStream, 65 | minPrecedence: number = 0 66 | ): ParseResult => { 67 | const token = currentToken(stream); 68 | if (token.type === TokenType.STRING && token.value === "*") { 69 | return { 70 | result: { 71 | type: "WILDCARD", 72 | prefix: "", 73 | quoted: false, 74 | position: token.position, 75 | length: token.length, 76 | }, 77 | stream: advanceStream(stream), 78 | }; 79 | } 80 | 81 | let result = parsePrimary(stream); 82 | 83 | while (true) { 84 | const token = currentToken(result.stream); 85 | if (token.type === TokenType.EOF) break; 86 | 87 | if (token.type === TokenType.AND || token.type === TokenType.OR) { 88 | const precedence = getOperatorPrecedence(token.type); 89 | if (precedence < minPrecedence) break; 90 | 91 | const operator = token.type; 92 | const nextStream = advanceStream(result.stream); 93 | const nextToken = currentToken(nextStream); 94 | if (nextToken.type === TokenType.EOF) { 95 | throw { 96 | message: `Unexpected token: ${token.value}`, 97 | code: SearchQueryErrorCode.SYNTAX_TOKEN_UNEXPECTED, 98 | value: token.value, 99 | position: token.position, 100 | length: token.length, 101 | }; 102 | } 103 | 104 | const right = parseExpression(nextStream, precedence + 1); 105 | 106 | result = { 107 | result: { 108 | type: operator, 109 | left: result.result, 110 | right: right.result, 111 | position: token.position, 112 | length: token.length, 113 | }, 114 | stream: right.stream, 115 | }; 116 | continue; 117 | } 118 | 119 | if ( 120 | token.type === TokenType.STRING || 121 | token.type === TokenType.QUOTED_STRING || 122 | token.type === TokenType.LPAREN || 123 | token.type === TokenType.NOT 124 | ) { 125 | const precedence = getOperatorPrecedence(TokenType.AND); 126 | if (precedence < minPrecedence) break; 127 | 128 | const right = parseExpression(result.stream, precedence + 1); 129 | result = { 130 | result: { 131 | type: TokenType.AND, 132 | left: result.result, 133 | right: right.result, 134 | position: token.position, 135 | length: token.length, 136 | }, 137 | stream: right.stream, 138 | }; 139 | continue; 140 | } 141 | 142 | break; 143 | } 144 | 145 | return result; 146 | }; 147 | -------------------------------------------------------------------------------- /search-input-query-parser/src/index.ts: -------------------------------------------------------------------------------- 1 | // Re-export everything from parser.ts 2 | export * from "./parser"; 3 | 4 | // Re-export everything from validator.ts 5 | export * from "./validator"; 6 | 7 | // Re-export everything from lexer.ts 8 | export * from "./lexer"; 9 | 10 | // Re-export everything from first-pass-parser.ts with renamed types to avoid conflicts 11 | export { 12 | parseExpression, 13 | type PositionLength, 14 | type StringLiteral, 15 | type WildcardPattern as FirstPassWildcardPattern, 16 | type AndExpression, 17 | type OrExpression, 18 | type NotExpression, 19 | type InExpression as FirstPassInExpression, 20 | type FirstPassExpression, 21 | type ParseResult, 22 | } from "./first-pass-parser"; 23 | 24 | // Re-export SQL-related files with renamed types to avoid conflicts 25 | export { 26 | searchQueryToSql, 27 | searchStringToSql, 28 | type SqlQueryResult as SqlQueryResultBase, 29 | type SearchType, 30 | type SearchQueryOptions as SearchQueryOptionsBase, 31 | } from "./search-query-to-sql"; 32 | 33 | export { 34 | searchQueryToTsVectorSql, 35 | searchStringToTsVectorSql, 36 | type SqlQueryResult as TsVectorSqlQueryResult, 37 | type SearchQueryOptions as TsVectorSearchQueryOptions, 38 | } from "./search-query-to-tsvector-sql"; 39 | 40 | export { 41 | searchQueryToIlikeSql, 42 | searchStringToIlikeSql, 43 | type SqlQueryResult as IlikeSqlQueryResult, 44 | type SearchQueryOptions as IlikeSearchQueryOptions, 45 | } from "./search-query-to-ilike-sql"; 46 | 47 | export { 48 | searchQueryToParadeDbSql, 49 | searchStringToParadeDbSql, 50 | type SqlQueryResult as ParadeDbSqlQueryResult, 51 | type SearchQueryOptions as ParadeDbSearchQueryOptions, 52 | } from "./search-query-to-paradedb-sql"; 53 | 54 | // Re-export utility functions 55 | export * from "./validate-expression-fields"; 56 | export * from "./validate-string"; 57 | export * from "./validate-wildcard"; 58 | export * from "./validate-in-expression"; 59 | export * from "./parse-in-values"; 60 | export * from "./parse-primary"; 61 | export * from "./parse-range-expression"; 62 | export * from "./transform-to-expression"; -------------------------------------------------------------------------------- /search-input-query-parser/src/parse-in-values.ts: -------------------------------------------------------------------------------- 1 | import { ParseResult } from "./first-pass-parser"; 2 | import { TokenStream, currentToken, TokenType, advanceStream } from "./lexer"; 3 | import { SearchQueryErrorCode } from "./validator"; 4 | 5 | export const parseInValues = ( 6 | stream: TokenStream, 7 | inValuePosition: number 8 | ): ParseResult => { 9 | const values: string[] = []; 10 | let currentStream = stream; 11 | 12 | // Expect opening parenthesis 13 | if (currentToken(currentStream).type !== TokenType.LPAREN) { 14 | throw { 15 | message: "Expected '(' after IN", 16 | code: SearchQueryErrorCode.IN_LPAREN_MISSING, 17 | position: inValuePosition, // Use the position passed from the caller 18 | length: 1, 19 | }; 20 | } 21 | currentStream = advanceStream(currentStream); 22 | 23 | while (true) { 24 | const token = currentToken(currentStream); 25 | 26 | if (token.type === TokenType.RPAREN) { 27 | if (values.length === 0) { 28 | throw { 29 | message: "IN operator requires at least one value", 30 | code: SearchQueryErrorCode.IN_LIST_EMPTY, 31 | position: token.position, 32 | length: 1, 33 | }; 34 | } 35 | return { 36 | result: values, 37 | stream: advanceStream(currentStream), 38 | }; 39 | } 40 | 41 | if (token.type === TokenType.EOF || 42 | (token.type !== TokenType.STRING && 43 | token.type !== TokenType.QUOTED_STRING && 44 | token.type !== TokenType.NUMBER && 45 | token.type !== TokenType.COMMA)) { 46 | throw { 47 | message: "Expected ',' or ')' after IN value", 48 | code: SearchQueryErrorCode.IN_SEPARATOR_MISSING, 49 | position: token.position, 50 | length: 1, 51 | }; 52 | } 53 | 54 | if (token.type === TokenType.STRING || 55 | token.type === TokenType.QUOTED_STRING || 56 | token.type === TokenType.NUMBER) { 57 | values.push(token.value); 58 | currentStream = advanceStream(currentStream); 59 | 60 | const nextToken = currentToken(currentStream); 61 | if (nextToken.type === TokenType.COMMA) { 62 | currentStream = advanceStream(currentStream); 63 | continue; 64 | } 65 | if (nextToken.type === TokenType.RPAREN) { 66 | continue; 67 | } 68 | throw { 69 | message: "Expected ',' or ')' after IN value", 70 | code: SearchQueryErrorCode.IN_SEPARATOR_MISSING, 71 | position: nextToken.position, 72 | length: 1, 73 | }; 74 | } 75 | 76 | currentStream = advanceStream(currentStream); 77 | } 78 | }; 79 | -------------------------------------------------------------------------------- /search-input-query-parser/src/parse-primary.ts: -------------------------------------------------------------------------------- 1 | import { ParseResult, FirstPassExpression, parseExpression } from "./first-pass-parser"; 2 | import { parseInValues } from "./parse-in-values"; 3 | import { TokenStream, currentToken, TokenType, advanceStream } from "./lexer"; 4 | import { SearchQueryErrorCode } from "./validator"; 5 | 6 | export const expectToken = ( 7 | stream: TokenStream, 8 | type: TokenType, 9 | message?: string 10 | ): TokenStream => { 11 | const token = currentToken(stream); 12 | if (token.type !== type) { 13 | throw { 14 | message: message ? message : `Expected ${type}`, 15 | code: SearchQueryErrorCode.SYNTAX_TOKEN_MISSING, 16 | value: type, 17 | position: token.position, 18 | length: token.length, 19 | }; 20 | } 21 | return advanceStream(stream); 22 | }; 23 | 24 | // Helper to check if a string value represents a field:value pattern 25 | export const isFieldValuePattern = (value: string): boolean => { 26 | return value.includes(":"); 27 | }; 28 | 29 | // Helper to extract field and value from a field:value pattern 30 | export const extractFieldValue = (value: string): [string, string] => { 31 | const [field, ...valueParts] = value.split(":"); 32 | return [field, valueParts.join(":")]; 33 | }; 34 | 35 | export const parsePrimary = ( 36 | stream: TokenStream 37 | ): ParseResult => { 38 | const token = currentToken(stream); 39 | 40 | switch (token.type) { 41 | case TokenType.NOT: { 42 | const nextStream = advanceStream(stream); 43 | const nextToken = currentToken(nextStream); 44 | 45 | if (nextToken.type === TokenType.LPAREN) { 46 | const afterLParen = advanceStream(nextStream); 47 | const exprResult = parseExpression(afterLParen); 48 | const finalStream = expectToken( 49 | exprResult.stream, 50 | TokenType.RPAREN, 51 | "Expected ')'" 52 | ); 53 | return { 54 | result: { 55 | type: "NOT", 56 | expression: exprResult.result, 57 | position: token.position, 58 | length: token.length, 59 | }, 60 | stream: finalStream, 61 | }; 62 | } 63 | 64 | const exprResult = parsePrimary(nextStream); 65 | return { 66 | result: { 67 | type: "NOT", 68 | expression: exprResult.result, 69 | position: token.position, 70 | length: token.length, 71 | }, 72 | stream: exprResult.stream, 73 | }; 74 | } 75 | 76 | case TokenType.LPAREN: { 77 | const innerStream = advanceStream(stream); 78 | const exprResult = parseExpression(innerStream); 79 | const finalStream = expectToken( 80 | exprResult.stream, 81 | TokenType.RPAREN, 82 | "Expected ')'" 83 | ); 84 | return { result: exprResult.result, stream: finalStream }; 85 | } 86 | 87 | case TokenType.STRING: 88 | case TokenType.QUOTED_STRING: { 89 | const { value } = token; 90 | const isQuoted = token.type === TokenType.QUOTED_STRING; 91 | 92 | // Check for field:IN pattern 93 | if (value.includes(":")) { 94 | const [field, remainder] = value.split(":"); 95 | if (remainder.toUpperCase() === "IN") { 96 | const nextStream = advanceStream(stream); 97 | const colonIndex = value.indexOf(":"); 98 | const inValuePosition = token.position + colonIndex + 2; // After field:IN 99 | const inValuesResult = parseInValues(nextStream, inValuePosition); 100 | 101 | return { 102 | result: { 103 | type: "IN", 104 | field, 105 | values: inValuesResult.result, 106 | position: token.position, 107 | length: token.length + inValuesResult.stream.position - nextStream.position, 108 | }, 109 | stream: inValuesResult.stream, 110 | }; 111 | } 112 | } 113 | 114 | // Handle field:value patterns 115 | if (isFieldValuePattern(value)) { 116 | const [field, rawValue] = extractFieldValue(value); 117 | 118 | // If it has a trailing wildcard 119 | if (rawValue.endsWith("*")) { 120 | return { 121 | result: { 122 | type: "WILDCARD", 123 | prefix: `${field}:${rawValue.slice(0, -1)}`, 124 | quoted: isQuoted, 125 | position: token.position, 126 | length: token.length, 127 | }, 128 | stream: advanceStream(stream), 129 | }; 130 | } 131 | } 132 | 133 | // Handle regular terms with wildcards 134 | if (value.endsWith("*")) { 135 | return { 136 | result: { 137 | type: "WILDCARD", 138 | prefix: value.slice(0, -1), 139 | quoted: isQuoted, 140 | position: token.position, 141 | length: token.length, 142 | }, 143 | stream: advanceStream(stream), 144 | }; 145 | } 146 | 147 | // Regular string without wildcards 148 | return { 149 | result: { 150 | type: "STRING", 151 | value, 152 | quoted: token.type === TokenType.QUOTED_STRING, 153 | position: token.position, 154 | length: token.length, 155 | }, 156 | stream: advanceStream(stream), 157 | }; 158 | } 159 | 160 | case TokenType.AND: 161 | case TokenType.OR: 162 | throw { 163 | message: `${token.value} is a reserved word`, 164 | code: SearchQueryErrorCode.SYNTAX_KEYWORD_RESERVED, 165 | value: token.value, 166 | position: token.position, 167 | length: token.length, 168 | }; 169 | 170 | case TokenType.RPAREN: 171 | throw { 172 | message: 'Unexpected ")"', 173 | code: SearchQueryErrorCode.SYNTAX_PARENTHESIS_UNEXPECTED, 174 | position: token.position, 175 | length: token.length, 176 | }; 177 | 178 | default: 179 | throw { 180 | message: "Unexpected token", 181 | code: SearchQueryErrorCode.SYNTAX_TOKEN_UNEXPECTED, 182 | position: token.position, 183 | length: token.length, 184 | }; 185 | } 186 | }; 187 | -------------------------------------------------------------------------------- /search-input-query-parser/src/parse-range-expression.ts: -------------------------------------------------------------------------------- 1 | import { FieldSchema, RangeExpression, FieldValue, RangeOperator } from "./parser"; 2 | 3 | const isRangeOperator = (str: string): str is RangeOperator => { 4 | return [">=", ">", "<=", "<"].includes(str); 5 | }; 6 | 7 | export const parseRangeExpression = ( 8 | fieldName: string, 9 | value: string, 10 | schema: FieldSchema | undefined, 11 | position: number, 12 | colonIndex: number 13 | ): RangeExpression | FieldValue => { 14 | // Handle ..20 (less than or equal) 15 | if (value.startsWith("..")) { 16 | const numValue = value.slice(2); 17 | return { 18 | type: "RANGE", 19 | field: { 20 | type: "FIELD", 21 | value: fieldName, 22 | position, 23 | length: colonIndex, 24 | }, 25 | operator: "<=", 26 | value: { 27 | type: "VALUE", 28 | value: numValue, 29 | position: position + colonIndex + 3, // after colon and .. 30 | length: numValue.length, 31 | }, 32 | position, 33 | length: colonIndex + 1 + value.length, 34 | }; 35 | } 36 | 37 | // Handle 10.. (greater than or equal) 38 | if (value.endsWith("..")) { 39 | const numValue = value.slice(0, -2); 40 | return { 41 | type: "RANGE", 42 | field: { 43 | type: "FIELD", 44 | value: fieldName, 45 | position, 46 | length: colonIndex, 47 | }, 48 | operator: ">=", 49 | value: { 50 | type: "VALUE", 51 | value: numValue, 52 | position: position + colonIndex + 1, 53 | length: numValue.length, 54 | }, 55 | position, 56 | length: colonIndex + 1 + value.length, 57 | }; 58 | } 59 | 60 | // Handle date ranges with YYYY-MM-DD format 61 | if (schema?.type === "date") { 62 | const betweenMatch = value.match( 63 | /^(\d{4}-\d{2}-\d{2})\.\.(\d{4}-\d{2}-\d{2})$/ 64 | ); 65 | if (betweenMatch) { 66 | const [_, start, end] = betweenMatch; 67 | return { 68 | type: "RANGE", 69 | field: { 70 | type: "FIELD", 71 | value: fieldName, 72 | position, 73 | length: colonIndex, 74 | }, 75 | operator: "BETWEEN", 76 | value: { 77 | type: "VALUE", 78 | value: start, 79 | position: position + colonIndex + 1, 80 | length: start.length, 81 | }, 82 | value2: { 83 | type: "VALUE", 84 | value: end, 85 | position: position + colonIndex + start.length + 3, 86 | length: end.length, 87 | }, 88 | position, 89 | length: colonIndex + 1 + value.length, 90 | }; 91 | } 92 | } 93 | 94 | // Handle 10..20 (between), handling floats and negative numbers 95 | const betweenMatch = value.match(/^(-?\d*\.?\d+)\.\.(-?\d*\.?\d+)$/); 96 | if (betweenMatch) { 97 | const [_, start, end] = betweenMatch; 98 | return { 99 | type: "RANGE", 100 | field: { 101 | type: "FIELD", 102 | value: fieldName, 103 | position, 104 | length: colonIndex, 105 | }, 106 | operator: "BETWEEN", 107 | value: { 108 | type: "VALUE", 109 | value: start, 110 | position: position + colonIndex + 1, 111 | length: start.length, 112 | }, 113 | value2: { 114 | type: "VALUE", 115 | value: end, 116 | position: position + colonIndex + start.length + 3, 117 | length: end.length, 118 | }, 119 | position, 120 | length: colonIndex + 1 + value.length, 121 | }; 122 | } 123 | 124 | // Handle >100, >=100, <100, <=100 125 | if (value.length > 1 && isRangeOperator(value.slice(0, 2))) { 126 | const operator = value.slice(0, 2) as RangeOperator; 127 | const numValue = value.slice(2); 128 | return { 129 | type: "RANGE", 130 | field: { 131 | type: "FIELD", 132 | value: fieldName, 133 | position, 134 | length: colonIndex, 135 | }, 136 | operator, 137 | value: { 138 | type: "VALUE", 139 | value: numValue, 140 | position: position + colonIndex + 3, 141 | length: numValue.length, 142 | }, 143 | position, 144 | length: colonIndex + 1 + value.length, 145 | }; 146 | } 147 | 148 | if (value.length > 0 && isRangeOperator(value.slice(0, 1))) { 149 | const operator = value.slice(0, 1) as RangeOperator; 150 | const numValue = value.slice(1); 151 | return { 152 | type: "RANGE", 153 | field: { 154 | type: "FIELD", 155 | value: fieldName, 156 | position, 157 | length: colonIndex, 158 | }, 159 | operator, 160 | value: { 161 | type: "VALUE", 162 | value: numValue, 163 | position: position + colonIndex + 2, 164 | length: numValue.length, 165 | }, 166 | position, 167 | length: colonIndex + 1 + value.length, 168 | }; 169 | } 170 | 171 | // If no range pattern is matched, return a regular field value 172 | return { 173 | type: "FIELD_VALUE", 174 | field: { 175 | type: "FIELD", 176 | value: fieldName, 177 | position, 178 | length: colonIndex, 179 | }, 180 | value: { 181 | type: "VALUE", 182 | value, 183 | position: position + colonIndex + 1, 184 | length: value.length, 185 | }, 186 | }; 187 | }; 188 | -------------------------------------------------------------------------------- /search-input-query-parser/src/parser.ts: -------------------------------------------------------------------------------- 1 | import { tokenize, createStream, currentToken, TokenType } from "./lexer"; 2 | import { 3 | parseExpression, 4 | PositionLength, 5 | WildcardPattern as FirstPassWildcard, 6 | } from "./first-pass-parser"; 7 | import { 8 | validateSearchQuery, 9 | ValidationError, 10 | SearchQueryErrorCode, 11 | } from "./validator"; 12 | import { validateExpressionFields } from "./validate-expression-fields"; 13 | import { transformToExpression } from "./transform-to-expression"; 14 | 15 | // Re-export these types from validator for backward compatibility 16 | export { ValidationError, SearchQueryErrorCode }; 17 | 18 | // Schema types for range queries 19 | export interface FieldSchema { 20 | name: string; 21 | type: "string" | "number" | "date" | "boolean"; 22 | } 23 | 24 | // Second Pass AST types (semantic analysis) 25 | export type SearchTerm = { 26 | readonly type: "SEARCH_TERM"; 27 | readonly value: string; 28 | } & PositionLength; 29 | 30 | export type WildcardPattern = { 31 | readonly type: "WILDCARD"; 32 | readonly prefix: string; 33 | readonly quoted: boolean; 34 | } & PositionLength; 35 | 36 | export type Field = { 37 | readonly type: "FIELD"; 38 | readonly value: string; 39 | } & PositionLength; 40 | 41 | export type Value = { 42 | readonly type: "VALUE"; 43 | readonly value: string; 44 | } & PositionLength; 45 | 46 | export type RangeOperator = ">=" | ">" | "<=" | "<" | "BETWEEN"; 47 | 48 | export type RangeExpression = { 49 | readonly type: "RANGE"; 50 | readonly field: Field; 51 | readonly operator: RangeOperator; 52 | readonly value: Value; 53 | readonly value2?: Value; // For BETWEEN 54 | } & PositionLength; 55 | 56 | export type FieldValue = { 57 | readonly type: "FIELD_VALUE"; 58 | readonly field: Field; 59 | readonly value: Value; 60 | }; 61 | 62 | export type And = { 63 | readonly type: "AND"; 64 | readonly left: Expression; 65 | readonly right: Expression; 66 | } & PositionLength; 67 | 68 | export type Or = { 69 | readonly type: "OR"; 70 | readonly left: Expression; 71 | readonly right: Expression; 72 | } & PositionLength; 73 | 74 | export type Not = { 75 | readonly type: "NOT"; 76 | readonly expression: Expression; 77 | } & PositionLength; 78 | 79 | export type InExpression = { 80 | readonly type: "IN"; 81 | readonly field: Field; 82 | readonly values: Value[]; 83 | } & PositionLength; 84 | 85 | export type Expression = 86 | | SearchTerm 87 | | WildcardPattern 88 | | FieldValue 89 | | RangeExpression 90 | | And 91 | | Or 92 | | Not 93 | | InExpression; 94 | 95 | export type SearchQuery = { 96 | readonly type: "SEARCH_QUERY"; 97 | readonly expression: Expression | null; 98 | }; 99 | 100 | export type SearchQueryError = { 101 | readonly type: "SEARCH_QUERY_ERROR"; 102 | readonly expression: null; 103 | readonly errors: ValidationError[]; 104 | }; 105 | 106 | // Helper function to stringify expressions 107 | export const stringify = (expr: Expression): string => { 108 | switch (expr.type) { 109 | case "SEARCH_TERM": 110 | return expr.value; 111 | case "WILDCARD": 112 | return `${expr.prefix}*`; 113 | case "FIELD_VALUE": 114 | return `${expr.field.value}:${expr.value.value}`; 115 | case "RANGE": 116 | if (expr.operator === "BETWEEN") { 117 | return `${expr.field.value}:${expr.value.value}..${expr.value2?.value}`; 118 | } 119 | return `${expr.field.value}:${expr.operator}${expr.value.value}`; 120 | case "NOT": 121 | return `NOT (${stringify(expr.expression)})`; 122 | case "AND": 123 | return `(${stringify(expr.left)} AND ${stringify(expr.right)})`; 124 | case "OR": 125 | return `(${stringify(expr.left)} OR ${stringify(expr.right)})`; 126 | case "IN": { 127 | const values = expr.values.map((v: { value: string }) => v.value).join(","); 128 | return `${expr.field.value}:IN(${values})`; 129 | } 130 | } 131 | }; 132 | 133 | // Main parse function 134 | export const parseSearchInputQuery = ( 135 | input: string, 136 | fieldSchemas: FieldSchema[] = [] 137 | ): SearchQuery | SearchQueryError => { 138 | try { 139 | const tokens = tokenize(input); 140 | const stream = createStream(tokens); 141 | 142 | if (currentToken(stream).type === TokenType.EOF) { 143 | return { type: "SEARCH_QUERY", expression: null }; 144 | } 145 | 146 | const result = parseExpression(stream); 147 | 148 | const finalToken = currentToken(result.stream); 149 | if (finalToken.type !== TokenType.EOF) { 150 | throw { 151 | message: 'Unexpected ")"', 152 | code: SearchQueryErrorCode.SYNTAX_PARENTHESIS_UNEXPECTED, 153 | position: finalToken.position, 154 | length: finalToken.length, 155 | }; 156 | } 157 | 158 | const errors = validateSearchQuery(result.result); 159 | const fieldErrors: ValidationError[] = []; 160 | 161 | const allowedFields = fieldSchemas.map((s) => s.name.toLowerCase()); 162 | 163 | if (allowedFields.length > 0) { 164 | const columnSet = new Set(allowedFields.map((col) => col.toLowerCase())); 165 | const schemaMap = new Map( 166 | fieldSchemas.map((s) => [s.name.toLowerCase(), s]) 167 | ); 168 | validateExpressionFields( 169 | result.result, 170 | columnSet, 171 | fieldErrors, 172 | schemaMap 173 | ); 174 | } 175 | 176 | const fieldErrorKeys = fieldErrors.map( 177 | ({ position, length }) => `${position}-${length}` 178 | ); 179 | const errorsToRemove = errors.filter(({ position, length }) => 180 | fieldErrorKeys.includes(`${position}-${length}`) 181 | ); 182 | const fieldErrorsFiltered = fieldErrors.filter( 183 | ({ position, length }) => 184 | !errorsToRemove.some( 185 | (error) => error.position === position && error.length === length 186 | ) 187 | ); 188 | 189 | const allErrors = [...errors, ...fieldErrorsFiltered].sort( 190 | (a, b) => a.position - b.position 191 | ); 192 | 193 | if (allErrors.length > 0) { 194 | return { 195 | type: "SEARCH_QUERY_ERROR", 196 | expression: null, 197 | errors: allErrors, 198 | }; 199 | } 200 | 201 | const schemaMap = new Map( 202 | fieldSchemas.map((s) => [s.name.toLowerCase(), s]) 203 | ); 204 | const expression = transformToExpression(result.result, schemaMap); 205 | 206 | return { type: "SEARCH_QUERY", expression }; 207 | } catch (error: any) { 208 | return { 209 | type: "SEARCH_QUERY_ERROR", 210 | expression: null, 211 | errors: [error], 212 | }; 213 | } 214 | }; 215 | 216 | -------------------------------------------------------------------------------- /search-input-query-parser/src/search-query-to-paradedb-sql.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, test } from "@jest/globals"; 2 | import { searchStringToParadeDbSql } from "./search-query-to-paradedb-sql"; 3 | import { FieldSchema } from "./parser"; 4 | 5 | describe("ParadeDB SQL Converter", () => { 6 | const schemas: FieldSchema[] = [ 7 | { name: "title", type: "string" }, 8 | { name: "description", type: "string" }, 9 | { name: "content", type: "string" }, 10 | { name: "price", type: "number" }, 11 | { name: "date", type: "date" }, 12 | { name: "in_stock", type: "boolean" }, 13 | ]; 14 | 15 | const searchableColumns = ["title", "description", "content"]; 16 | 17 | const testParadeDBConversion = ( 18 | query: string, 19 | expectedSql: string, 20 | expectedValues: any[] 21 | ) => { 22 | const result = searchStringToParadeDbSql( 23 | query, 24 | searchableColumns, 25 | schemas 26 | ); 27 | expect(result.text).toBe(expectedSql); 28 | expect(result.values).toEqual(expectedValues); 29 | }; 30 | 31 | describe("ParadeDB Date Handling", () => { 32 | test("handles date year shorthand format", () => { 33 | testParadeDBConversion( 34 | "date:2024", 35 | "date @@@ '[' || $1 || ' TO ' || $2 || ']'", 36 | ["2024-01-01", "2024-12-31"] 37 | ); 38 | }); 39 | 40 | test("handles date month shorthand format", () => { 41 | testParadeDBConversion( 42 | "date:2024-02", 43 | "date @@@ '[' || $1 || ' TO ' || $2 || ']'", 44 | ["2024-02-01", "2024-02-29"] // 2024 is a leap year 45 | ); 46 | 47 | testParadeDBConversion( 48 | "date:2023-04", 49 | "date @@@ '[' || $1 || ' TO ' || $2 || ']'", 50 | ["2023-04-01", "2023-04-30"] 51 | ); 52 | }); 53 | 54 | test("handles date shorthand formats with comparison operators", () => { 55 | testParadeDBConversion( 56 | "date:>=2024 AND date:<2025", 57 | "(date @@@ '>=' || $1 AND date @@@ '<' || $2)", 58 | ["2024-01-01", "2025-12-31"] 59 | ); 60 | }); 61 | }); 62 | }); -------------------------------------------------------------------------------- /search-input-query-parser/src/transform-to-expression.ts: -------------------------------------------------------------------------------- 1 | import { FirstPassExpression } from "./first-pass-parser"; 2 | import { parseRangeExpression } from "./parse-range-expression"; 3 | import { FieldSchema, Expression, Value } from "./parser"; 4 | 5 | // Helper to transform FirstPassExpression into Expression 6 | export const transformToExpression = ( 7 | expr: FirstPassExpression, 8 | schemas: Map 9 | ): Expression => { 10 | switch (expr.type) { 11 | case "NOT": 12 | return { 13 | type: "NOT", 14 | expression: transformToExpression(expr.expression, schemas), 15 | position: expr.position, 16 | length: expr.length, 17 | }; 18 | 19 | case "WILDCARD": 20 | // Check if this is part of a field:value pattern by looking at the prefix 21 | const colonIndex = expr.prefix.indexOf(":"); 22 | if (colonIndex !== -1) { 23 | const field = expr.prefix.substring(0, colonIndex).trim(); 24 | const prefix = expr.prefix.substring(colonIndex + 1).trim(); 25 | 26 | return { 27 | type: "FIELD_VALUE", 28 | field: { 29 | type: "FIELD", 30 | value: field, 31 | position: expr.position - colonIndex - 1, // Adjust for the field part 32 | length: colonIndex, 33 | }, 34 | value: { 35 | type: "VALUE", 36 | value: prefix + "*", // Preserve the wildcard in the value 37 | position: expr.position, 38 | length: prefix.length + 1, 39 | }, 40 | }; 41 | } 42 | 43 | // If not a field:value pattern, return as a wildcard search term 44 | return { 45 | type: "WILDCARD", 46 | prefix: expr.prefix, 47 | quoted: expr.quoted, 48 | position: expr.position, 49 | length: expr.length, 50 | }; 51 | 52 | case "STRING": { 53 | // Check if the string is a field:value pattern 54 | const colonIndex = expr.value.indexOf(":"); 55 | if (colonIndex !== -1) { 56 | const field = expr.value.substring(0, colonIndex).trim(); 57 | let value = expr.value.substring(colonIndex + 1).trim(); 58 | // Remove quotes if present 59 | value = 60 | value.startsWith('"') && value.endsWith('"') 61 | ? value.slice(1, -1) 62 | : value; 63 | 64 | const schema = schemas.get(field.toLowerCase()); 65 | 66 | // Check for range patterns when we have a numeric or date field 67 | if (schema && (schema.type === "number" || schema.type === "date")) { 68 | return parseRangeExpression( 69 | field, 70 | value, 71 | schema, 72 | expr.position, 73 | colonIndex 74 | ); 75 | } 76 | 77 | return { 78 | type: "FIELD_VALUE", 79 | field: { 80 | type: "FIELD", 81 | value: field, 82 | position: expr.position, 83 | length: colonIndex, 84 | }, 85 | value: { 86 | type: "VALUE", 87 | value, 88 | position: expr.position + colonIndex + 1, 89 | length: value.length, 90 | }, 91 | }; 92 | } 93 | 94 | return { 95 | type: "SEARCH_TERM", 96 | value: expr.value, 97 | position: expr.position, 98 | length: expr.length, 99 | }; 100 | } 101 | 102 | case "AND": 103 | return { 104 | type: "AND", 105 | left: transformToExpression(expr.left, schemas), 106 | right: transformToExpression(expr.right, schemas), 107 | position: expr.position, 108 | length: expr.length, 109 | }; 110 | 111 | case "OR": 112 | return { 113 | type: "OR", 114 | left: transformToExpression(expr.left, schemas), 115 | right: transformToExpression(expr.right, schemas), 116 | position: expr.position, 117 | length: expr.length, 118 | }; 119 | 120 | case "IN": { 121 | const schema = schemas.get(expr.field.toLowerCase()); 122 | const transformedValues: Value[] = expr.values.map((value, index) => { 123 | let transformedValue = value; 124 | 125 | // Handle type conversion based on schema 126 | if (schema?.type === "number") { 127 | transformedValue = String(Number(value)); 128 | } 129 | 130 | return { 131 | type: "VALUE", 132 | value: transformedValue, 133 | position: 134 | expr.position + expr.field.length + 3 + index * (value.length + 1), // +3 for ":IN" 135 | length: value.length, 136 | }; 137 | }); 138 | 139 | return { 140 | type: "IN", 141 | field: { 142 | type: "FIELD", 143 | value: expr.field, 144 | position: expr.position, 145 | length: expr.field.length, 146 | }, 147 | values: transformedValues, 148 | position: expr.position, 149 | length: expr.length, 150 | }; 151 | } 152 | } 153 | }; 154 | -------------------------------------------------------------------------------- /search-input-query-parser/src/validate-in-expression.ts: -------------------------------------------------------------------------------- 1 | import { InExpression } from "./first-pass-parser"; 2 | import { ValidationError, reservedWords, SearchQueryErrorCode } from "./validator"; 3 | 4 | export const validateInExpression = ( 5 | expr: InExpression, 6 | errors: ValidationError[] 7 | ): void => { 8 | // Validate field name pattern 9 | if (!/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(expr.field)) { 10 | errors.push({ 11 | message: "Invalid characters in field name", 12 | code: SearchQueryErrorCode.FIELD_CHARS_INVALID, 13 | position: expr.position, 14 | length: expr.field.length, 15 | }); 16 | } 17 | 18 | // Check for reserved words 19 | if (reservedWords.has(expr.field.toUpperCase())) { 20 | errors.push({ 21 | message: `${expr.field} is a reserved word`, 22 | code: SearchQueryErrorCode.FIELD_NAME_RESERVED, 23 | value: expr.field, 24 | position: expr.position, 25 | length: expr.field.length, 26 | }); 27 | } 28 | 29 | // Validate value format based on field type 30 | expr.values.forEach((value, index) => { 31 | if (value.includes(",")) { 32 | errors.push({ 33 | message: "Invalid character in IN value", 34 | code: SearchQueryErrorCode.IN_VALUE_INVALID, 35 | position: expr.position + expr.field.length + 3 + index * (value.length + 1), 36 | length: value.length, 37 | }); 38 | } 39 | }); 40 | }; 41 | -------------------------------------------------------------------------------- /search-input-query-parser/src/validate-string.ts: -------------------------------------------------------------------------------- 1 | import { StringLiteral, WildcardPattern } from "./first-pass-parser"; 2 | import { 3 | ValidationError, 4 | reservedWords, 5 | SearchQueryErrorCode, 6 | } from "./validator"; 7 | import { validateWildcard } from "./validate-wildcard"; 8 | 9 | // Validate individual strings (field:value pairs or plain terms) 10 | export const validateString = ( 11 | expr: StringLiteral | WildcardPattern, 12 | errors: ValidationError[] 13 | ) => { 14 | // Validate wildcard usage 15 | validateWildcard(expr, errors); 16 | 17 | // For wildcard patterns, no additional validation needed 18 | if (expr.type === "WILDCARD") { 19 | return; 20 | } 21 | 22 | // Handle STRING type 23 | // Check for empty field values 24 | if (expr.value.endsWith(":")) { 25 | errors.push({ 26 | message: "Expected field value", 27 | code: SearchQueryErrorCode.SYNTAX_FIELD_VALUE_MISSING, 28 | position: expr.position, 29 | length: expr.length, 30 | }); 31 | return; 32 | } 33 | 34 | // Check for field values that start with colon 35 | if (expr.value.startsWith(":")) { 36 | errors.push({ 37 | message: "Missing field name", 38 | code: SearchQueryErrorCode.SYNTAX_FIELD_NAME_MISSING, 39 | position: expr.position, 40 | length: expr.length, 41 | }); 42 | return; 43 | } 44 | 45 | // For field:value patterns, validate the field name 46 | if (expr.value.includes(":")) { 47 | const [fieldName] = expr.value.split(":"); 48 | 49 | // Check for reserved words used as field names 50 | if (reservedWords.has(fieldName.toUpperCase())) { 51 | errors.push({ 52 | message: `${fieldName} is a reserved word`, 53 | code: SearchQueryErrorCode.FIELD_NAME_RESERVED, 54 | value: fieldName, 55 | position: expr.position, 56 | length: fieldName.length, 57 | }); 58 | return; 59 | } 60 | 61 | // Check for invalid characters in field names 62 | if (!/^[a-zA-Z0-9_-]+$/.test(fieldName)) { 63 | errors.push({ 64 | message: "Invalid characters in field name", 65 | code: SearchQueryErrorCode.FIELD_CHARS_INVALID, 66 | position: expr.position, 67 | length: fieldName.length, 68 | }); 69 | return; 70 | } 71 | } 72 | 73 | // Handle standalone reserved words (not in field:value pattern) 74 | if (!expr.value.includes(":") && 75 | reservedWords.has(expr.value.toUpperCase())) { 76 | errors.push({ 77 | message: `${expr.value} is a reserved word`, 78 | code: SearchQueryErrorCode.FIELD_NAME_RESERVED, 79 | value: expr.value, 80 | position: expr.position, 81 | length: expr.length, 82 | }); 83 | } 84 | }; 85 | -------------------------------------------------------------------------------- /search-input-query-parser/src/validate-wildcard.ts: -------------------------------------------------------------------------------- 1 | import { StringLiteral, WildcardPattern } from "./first-pass-parser"; 2 | import { ValidationError, SearchQueryErrorCode } from "./validator"; 3 | 4 | // Validates wildcard patterns 5 | 6 | export const validateWildcard = ( 7 | expr: StringLiteral | WildcardPattern, 8 | errors: ValidationError[] 9 | ) => { 10 | const value = expr.type === "STRING" ? expr.value : expr.prefix + "*"; 11 | const starCount = (value.match(/\*/g) || []).length; 12 | const isQuoted = expr.quoted; 13 | 14 | // For unquoted strings 15 | if (!isQuoted) { 16 | const firstStar = value.indexOf("*"); 17 | if (starCount > 1) { 18 | const secondStar = value.indexOf("*", firstStar + 1); 19 | errors.push({ 20 | message: "Only one trailing wildcard (*) is allowed", 21 | code: SearchQueryErrorCode.WILDCARD_MULTIPLE_NOT_PERMITTED, 22 | position: expr.position + secondStar, 23 | length: 1, 24 | }); 25 | } 26 | if ((firstStar !== -1 && firstStar !== value.length - 1) && !value.endsWith("**")) { 27 | errors.push({ 28 | message: "Wildcard (*) can only appear at the end of a term", 29 | code: SearchQueryErrorCode.WILDCARD_POSITION_INVALID, 30 | position: expr.position + firstStar, 31 | length: 1, 32 | }); 33 | } 34 | } 35 | 36 | // For quoted strings 37 | else { 38 | // Handle multiple wildcards or internal wildcards in quoted strings 39 | if (value.endsWith("**")) { 40 | errors.push({ 41 | message: "Only one trailing wildcard (*) is allowed", 42 | code: SearchQueryErrorCode.WILDCARD_MULTIPLE_NOT_PERMITTED, 43 | position: expr.position + value.length - 1, 44 | length: 1, 45 | }); 46 | } 47 | } 48 | }; 49 | -------------------------------------------------------------------------------- /search-input-query-parser/src/validator.ts: -------------------------------------------------------------------------------- 1 | import { 2 | FirstPassExpression, 3 | } from "./first-pass-parser"; 4 | 5 | import { FieldSchema } from "./parser"; 6 | import { validateInExpression } from "./validate-in-expression"; 7 | import { validateString } from "./validate-string"; 8 | 9 | export enum SearchQueryErrorCode { 10 | UNKNOWN = 0, 11 | 12 | // Syntax Errors (1000-1999) 13 | SYNTAX_QUOTE_UNTERMINATED = 1001, 14 | SYNTAX_VALUE_MISSING = 1002, 15 | SYNTAX_FIELD_NAME_MISSING = 1003, 16 | SYNTAX_FIELD_VALUE_MISSING = 1004, 17 | SYNTAX_PARENTHESIS_UNEXPECTED = 1005, 18 | SYNTAX_PARENTHESIS_MISSING = 1006, 19 | SYNTAX_TOKEN_UNEXPECTED = 1007, 20 | SYNTAX_TOKEN_MISSING = 1008, 21 | SYNTAX_OPERATOR_OR_SPACE_MISSING = 1009, 22 | SYNTAX_KEYWORD_RESERVED = 1010, 23 | 24 | // Field Validation Errors (2000-2999) 25 | FIELD_NAME_INVALID = 2001, 26 | FIELD_CHARS_INVALID = 2002, 27 | FIELD_NAME_RESERVED = 2003, 28 | 29 | // Value Validation Errors (3000-3999) 30 | VALUE_NUMERIC_INVALID = 3001, 31 | VALUE_DATE_FORMAT_INVALID = 3002, 32 | VALUE_RANGE_FORMAT_INVALID = 3003, 33 | VALUE_RANGE_OPERATOR_INVALID = 3004, 34 | VALUE_RANGE_MISSING = 3005, 35 | VALUE_RANGE_START_EXCEEDS_END = 3006, 36 | VALUE_WILDCARD_NOT_PERMITTED = 3007, 37 | VALUE_BOOLEAN_INVALID = 3008, 38 | 39 | // Wildcard Errors (4000-4999) 40 | WILDCARD_POSITION_INVALID = 4001, 41 | WILDCARD_MULTIPLE_NOT_PERMITTED = 4002, 42 | 43 | // IN Expression Errors (5000-5999) 44 | IN_LIST_EMPTY = 5001, 45 | IN_VALUE_INVALID = 5002, 46 | IN_SEPARATOR_MISSING = 5003, 47 | IN_LPAREN_MISSING = 5004, 48 | } 49 | 50 | // Validation error type 51 | export type ValidationError = { 52 | message: string; 53 | code: SearchQueryErrorCode; 54 | value?: string; 55 | position: number; 56 | length: number; 57 | }; 58 | 59 | export const reservedWords = new Set(["AND", "OR"]); 60 | 61 | const walkExpression = ( 62 | expr: FirstPassExpression, 63 | errors: ValidationError[] 64 | ) => { 65 | switch (expr.type) { 66 | case "STRING": 67 | case "WILDCARD": 68 | validateString(expr, errors); 69 | break; 70 | case "AND": 71 | case "OR": 72 | walkExpression(expr.left, errors); 73 | walkExpression(expr.right, errors); 74 | break; 75 | case "NOT": 76 | walkExpression(expr.expression, errors); 77 | break; 78 | case "IN": 79 | validateInExpression(expr, errors); 80 | break; 81 | } 82 | }; 83 | 84 | export const validateSearchQuery = ( 85 | expression: FirstPassExpression 86 | ): ValidationError[] => { 87 | const errors: ValidationError[] = []; 88 | 89 | if (expression === null) { 90 | return errors; 91 | } 92 | 93 | walkExpression(expression, errors); 94 | 95 | return errors; 96 | }; 97 | -------------------------------------------------------------------------------- /search-input-query-parser/tsconfig.base.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "lib": ["es2018"], 5 | "strict": true, 6 | "esModuleInterop": true, 7 | "skipLibCheck": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "declaration": true, 10 | "types": ["jest", "node"] 11 | }, 12 | "include": [ 13 | "./src/*.ts" 14 | ], 15 | "exclude": [ 16 | "node_modules", 17 | "dist", 18 | "**/*.test.ts", 19 | "jest.config.ts" 20 | ] 21 | } -------------------------------------------------------------------------------- /search-input-query-parser/tsconfig.cjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json", 3 | "compilerOptions": { 4 | "module": "CommonJS", 5 | "outDir": "./dist/cjs", 6 | "declaration": false 7 | } 8 | } -------------------------------------------------------------------------------- /search-input-query-parser/tsconfig.esm.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json", 3 | "compilerOptions": { 4 | "module": "ESNext", 5 | "outDir": "./dist/esm", 6 | "declaration": false, 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /search-input-query-parser/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json", 3 | "compilerOptions": { 4 | "module": "CommonJS", 5 | "outDir": "./dist/cjs" 6 | } 7 | } -------------------------------------------------------------------------------- /search-input-query-parser/tsconfig.types.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json", 3 | "compilerOptions": { 4 | "emitDeclarationOnly": true, 5 | "outDir": "dist/types" 6 | } 7 | } -------------------------------------------------------------------------------- /search-input-query-react/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /search-input-query-react/README.md: -------------------------------------------------------------------------------- 1 | ## React Component 2 | 3 | ### Features 4 | 5 | - Monaco editor integration with syntax highlighting 6 | - Real-time validation and error highlighting 7 | - Auto-completion for fields and operators 8 | - Support for all query syntax features 9 | - Customizable editor theme 10 | - Error decorations with hover messages 11 | - Auto-closing quotes and brackets 12 | 13 | The React component provides a rich editing experience with immediate feedback on query validity. It handles all parsing and validation internally, providing clean results through the `onSearchResult` callback. 14 | 15 | ### Installation 16 | 17 | ```bash 18 | npm install search-input-query-react 19 | ``` 20 | 21 | ### Basic Usage 22 | 23 | ```typescript 24 | import { SearchInputQuery, EditorTheme } from 'search-input-query-react'; 25 | import type { Expression, FieldSchema, ValidationError } from 'search-input-query-parser'; 26 | 27 | // Define your schemas 28 | const schemas: FieldSchema[] = [ 29 | { name: 'title', type: 'string' }, 30 | { name: 'price', type: 'number' }, 31 | { name: 'date', type: 'date' }, 32 | { name: 'in_stock', type: 'boolean' } 33 | ]; 34 | 35 | // Define your editor theme (optional, defaults provided) 36 | const editorTheme: EditorTheme = { 37 | base: 'vs', 38 | inherit: true, 39 | rules: [ 40 | { token: 'keyword', foreground: '#794938', fontStyle: 'bold' }, 41 | { token: 'field', foreground: '#234A97', fontStyle: 'bold' }, 42 | { token: 'value', foreground: '#0B6125' } 43 | // Add more token rules as needed 44 | ], 45 | colors: { 46 | 'editor.foreground': '#24292F', 47 | 'editor.background': '#FFFFFF', 48 | // Add more color settings as needed 49 | } 50 | }; 51 | 52 | function SearchComponent() { 53 | const handleSearchResult = (result: { 54 | expression: Expression | null; 55 | parsedResult: string; 56 | errors: ValidationError[]; 57 | }) => { 58 | if (result.errors.length === 0) { 59 | // Handle successful parse 60 | console.log('Parsed expression:', result.expression); 61 | console.log('Stringified result:', result.parsedResult); 62 | } else { 63 | // Handle validation errors 64 | console.log('Parse errors:', result.errors); 65 | } 66 | }; 67 | 68 | return ( 69 | 74 | ); 75 | } 76 | ``` 77 | 78 | ### Component Props 79 | 80 | | Prop | Type | Required | Description | 81 | |------|------|----------|-------------| 82 | | `schemas` | `FieldSchema[]` | Yes | Array of field definitions for validation and auto-completion | 83 | | `onSearchResult` | `(result: SearchResult) => void` | Yes | Callback fired on query changes with parse results | 84 | | `editorTheme` | `EditorTheme` | No | Monaco editor theme configuration | 85 | -------------------------------------------------------------------------------- /search-input-query-react/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js' 2 | import globals from 'globals' 3 | import reactHooks from 'eslint-plugin-react-hooks' 4 | import reactRefresh from 'eslint-plugin-react-refresh' 5 | import tseslint from 'typescript-eslint' 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist'] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ['**/*.{ts,tsx}'], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | 'react-hooks': reactHooks, 18 | 'react-refresh': reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | 'react-refresh/only-export-components': [ 23 | 'warn', 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /search-input-query-react/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Search Input Query 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /search-input-query-react/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: 'ts-jest', 3 | testEnvironment: 'jsdom', 4 | moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], 5 | transform: { 6 | '^.+\\.(ts|tsx)$': ['ts-jest', { 7 | tsconfig: 'tsconfig.json', 8 | useESM: true, 9 | }], 10 | }, 11 | moduleNameMapper: { 12 | '^(\\.{1,2}/.*)\\.js$': '$1', 13 | }, 14 | extensionsToTreatAsEsm: ['.ts', '.tsx'], 15 | testRegex: 'src/.*\\.(test|spec)\\.(jsx?|tsx?)$', 16 | setupFilesAfterEnv: ['@testing-library/jest-dom'], 17 | testPathIgnorePatterns: ['/node_modules/', '/dist/'], 18 | }; 19 | -------------------------------------------------------------------------------- /search-input-query-react/jest.setup.ts: -------------------------------------------------------------------------------- 1 | import "@testing-library/jest-dom"; 2 | -------------------------------------------------------------------------------- /search-input-query-react/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "search-input-query-react", 3 | "version": "0.7.5", 4 | "keywords": [ 5 | "search", 6 | "parser", 7 | "query", 8 | "typescript" 9 | ], 10 | "author": "William Cotton", 11 | "repository": { 12 | "type": "git", 13 | "url": "https://github.com/williamcotton/search-input-query" 14 | }, 15 | "files": [ 16 | "dist", 17 | "src" 18 | ], 19 | "exports": { 20 | ".": { 21 | "types": "./dist/types/SearchInputQuery.d.ts", 22 | "import": "./dist/esm/SearchInputQuery.js", 23 | "require": "./dist/cjs/SearchInputQuery.js" 24 | } 25 | }, 26 | "scripts": { 27 | "test": "jest", 28 | "type-check": "tsc --noEmit", 29 | "build": "npm run build:esm && npm run build:cjs && npm run build:types", 30 | "build:esm": "tsc -p tsconfig.esm.json", 31 | "build:cjs": "tsc -p tsconfig.cjs.json", 32 | "build:types": "tsc -p tsconfig.types.json", 33 | "prepublishOnly": "npm run build" 34 | }, 35 | "peerDependencies": { 36 | "react": "^19.1.0", 37 | "react-dom": "^19.1.0", 38 | "search-input-query-parser": "^0.7.5" 39 | }, 40 | "dependencies": { 41 | "@monaco-editor/react": "^4.7.0", 42 | "search-input-query-parser": "^0.7.5", 43 | "sql.js": "^1.13.0" 44 | }, 45 | "devDependencies": { 46 | "@eslint/js": "^9.25.1", 47 | "@testing-library/jest-dom": "^6.6.3", 48 | "@testing-library/react": "^16.3.0", 49 | "@testing-library/user-event": "^14.6.1", 50 | "@types/jest": "^29.5.14", 51 | "@types/react": "^19.1.2", 52 | "@types/react-dom": "^19.1.2", 53 | "@types/sql.js": "^1.4.9", 54 | "@vitejs/plugin-react": "^4.4.1", 55 | "eslint": "^9.25.1", 56 | "eslint-plugin-react-hooks": "^5.2.0", 57 | "eslint-plugin-react-refresh": "^0.4.20", 58 | "globals": "^16.0.0", 59 | "jest": "^29.7.0", 60 | "jest-environment-jsdom": "^29.7.0", 61 | "npm-check-updates": "^18.0.1", 62 | "react": "^19.1.0", 63 | "react-dom": "^19.1.0", 64 | "ts-jest": "^29.3.2", 65 | "typescript": "~5.8.3", 66 | "typescript-eslint": "^8.31.1", 67 | "vite": "^6.3.3" 68 | }, 69 | "license": "MIT" 70 | } 71 | -------------------------------------------------------------------------------- /search-input-query-react/src/PlaceholderContentWidget.tsx: -------------------------------------------------------------------------------- 1 | import { editor } from "monaco-editor"; 2 | 3 | export class PlaceholderContentWidget implements editor.IContentWidget { 4 | private static readonly ID = "editor.widget.placeholderHint"; 5 | private domNode: HTMLElement | undefined; 6 | 7 | constructor( 8 | private readonly placeholder: string, 9 | private readonly editor: editor.ICodeEditor 10 | ) { 11 | editor.onDidChangeModelContent(() => this.onDidChangeModelContent()); 12 | this.onDidChangeModelContent(); 13 | } 14 | 15 | private onDidChangeModelContent(): void { 16 | if (this.editor.getValue() === "") { 17 | this.editor.addContentWidget(this); 18 | } else { 19 | this.editor.removeContentWidget(this); 20 | } 21 | } 22 | 23 | getId(): string { 24 | return PlaceholderContentWidget.ID; 25 | } 26 | 27 | getDomNode(): HTMLElement { 28 | if (!this.domNode) { 29 | this.domNode = document.createElement("div"); 30 | this.domNode.style.width = "max-content"; 31 | this.domNode.style.pointerEvents = "none"; 32 | this.domNode.textContent = this.placeholder; 33 | this.domNode.style.fontStyle = "italic"; 34 | this.domNode.style.color = "#666"; 35 | this.editor.applyFontInfo(this.domNode); 36 | } 37 | return this.domNode; 38 | } 39 | 40 | getPosition(): editor.IContentWidgetPosition { 41 | return { 42 | position: { lineNumber: 1, column: 1 }, 43 | preference: [editor.ContentWidgetPositionPreference.EXACT], 44 | }; 45 | } 46 | 47 | dispose(): void { 48 | this.editor.removeContentWidget(this); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /search-input-query-react/src/create-completion-item-provider.ts: -------------------------------------------------------------------------------- 1 | import type { editor, languages, Position, IRange } from "monaco-editor"; 2 | 3 | import { FieldSchema } from "search-input-query-parser"; 4 | 5 | import { Monaco } from "./SearchInputQuery"; 6 | 7 | interface CompletionItem extends languages.CompletionItem { 8 | insertText: string; 9 | documentation?: { 10 | value: string; 11 | }; 12 | } 13 | 14 | export function createCompletionItemProvider( 15 | monaco: Monaco, 16 | schemas: FieldSchema[] 17 | ): languages.CompletionItemProvider { 18 | return { 19 | triggerCharacters: [":", " "], 20 | provideCompletionItems: ( 21 | model: editor.ITextModel, 22 | position: Position 23 | ): languages.ProviderResult => { 24 | const wordUntilPosition = model.getWordUntilPosition(position); 25 | const range: IRange = { 26 | startLineNumber: position.lineNumber, 27 | startColumn: wordUntilPosition.startColumn, 28 | endLineNumber: position.lineNumber, 29 | endColumn: wordUntilPosition.endColumn, 30 | }; 31 | 32 | const textUntilPosition = model.getValueInRange({ 33 | startLineNumber: 1, 34 | startColumn: 1, 35 | endLineNumber: position.lineNumber, 36 | endColumn: position.column, 37 | }); 38 | 39 | // Get the current line's text 40 | const currentLineText = model.getLineContent(position.lineNumber); 41 | 42 | const lastWord = currentLineText.split(/[\s]+/).pop(); 43 | const isAfterColon = lastWord?.includes(":"); 44 | 45 | // Check if there's already a colon after the current word 46 | const hasColonAfter = currentLineText 47 | .substring(position.column - 1) 48 | .trimStart() 49 | .startsWith(":"); 50 | 51 | const words = textUntilPosition.split(/[\s:]+/); 52 | const currentWord = words[words.length - 1].toLowerCase(); 53 | const previousWord = words[words.length - 2]?.toLowerCase(); 54 | 55 | let suggestions: CompletionItem[] = []; 56 | 57 | // Suggest fields when not after a colon 58 | if (!isAfterColon) { 59 | // Filter schemas based on current input 60 | const fieldSuggestions: CompletionItem[] = schemas 61 | .filter((schema) => schema.name.toLowerCase().includes(currentWord)) 62 | .map((schema) => ({ 63 | label: schema.name, 64 | kind: monaco.languages.CompletionItemKind.Field, 65 | insertText: schema.name + (hasColonAfter ? "" : ":"), 66 | detail: `Field (${schema.type})`, 67 | documentation: { 68 | value: `Search by ${schema.name}\nType: ${schema.type}`, 69 | }, 70 | range, 71 | })); 72 | 73 | // Add logical operators 74 | const operators: CompletionItem[] = ["AND", "OR", "NOT"] 75 | .filter((op) => op.toLowerCase().includes(currentWord)) 76 | .map((op) => ({ 77 | label: op, 78 | kind: monaco.languages.CompletionItemKind.Operator, 79 | insertText: op, 80 | documentation: { 81 | value: `Logical operator: ${op}`, 82 | }, 83 | range, 84 | })); 85 | 86 | suggestions = [...fieldSuggestions, ...operators]; 87 | } 88 | // Suggest values after a colon based on field type 89 | else if (previousWord) { 90 | // TODO: fix issue with completion items not disappearing after typing 91 | // const schema = schemas.find( 92 | // (s) => s.name.toLowerCase() === previousWord.toLowerCase() 93 | // ); 94 | // if (schema) { 95 | // switch (schema.type) { 96 | // case "boolean": 97 | // suggestions = ["true", "false"].map((value) => ({ 98 | // label: value, 99 | // kind: monaco.languages.CompletionItemKind.Value, 100 | // insertText: value, 101 | // range, 102 | // })); 103 | // break; 104 | 105 | // case "string": 106 | // suggestions = [ 107 | // { 108 | // label: "IN", 109 | // kind: monaco.languages.CompletionItemKind.Operator, 110 | // insertText: "IN", 111 | // documentation: { 112 | // value: "IN operator for multiple values (e.g., IN(value1,value2))", 113 | // }, 114 | // range, 115 | // }, 116 | // ]; 117 | // break; 118 | 119 | // case "number": 120 | // suggestions = [ 121 | // { 122 | // label: ">", 123 | // kind: monaco.languages.CompletionItemKind.Operator, 124 | // insertText: ">", 125 | // range, 126 | // }, 127 | // { 128 | // label: ">=", 129 | // kind: monaco.languages.CompletionItemKind.Operator, 130 | // insertText: ">=", 131 | // range, 132 | // }, 133 | // { 134 | // label: "<", 135 | // kind: monaco.languages.CompletionItemKind.Operator, 136 | // insertText: "<", 137 | // range, 138 | // }, 139 | // { 140 | // label: "<=", 141 | // kind: monaco.languages.CompletionItemKind.Operator, 142 | // insertText: "<=", 143 | // range, 144 | // }, 145 | // { 146 | // label: "..", 147 | // kind: monaco.languages.CompletionItemKind.Operator, 148 | // insertText: "..", 149 | // documentation: { 150 | // value: "Range operator (e.g. 10..20)", 151 | // }, 152 | // range, 153 | // }, 154 | // { 155 | // label: "IN", 156 | // kind: monaco.languages.CompletionItemKind.Operator, 157 | // insertText: "IN", 158 | // documentation: { 159 | // value: "IN operator for multiple values (e.g., IN(value1,value2))", 160 | // }, 161 | // range, 162 | // } 163 | // ]; 164 | // break; 165 | 166 | // case "date": 167 | // suggestions = [ 168 | // { 169 | // label: "YYYY-MM-DD", 170 | // kind: monaco.languages.CompletionItemKind.Value, 171 | // insertText: new Date().toISOString().split("T")[0], 172 | // documentation: { 173 | // value: "Date in YYYY-MM-DD format", 174 | // }, 175 | // range, 176 | // }, 177 | // ]; 178 | // break; 179 | // } 180 | // } 181 | } 182 | 183 | return { 184 | suggestions, 185 | }; 186 | }, 187 | }; 188 | } 189 | -------------------------------------------------------------------------------- /search-input-query-react/src/search-syntax.ts: -------------------------------------------------------------------------------- 1 | import type { Monaco } from "./SearchInputQuery"; 2 | import { editor } from "monaco-editor"; 3 | 4 | export function registerSearchQueryLanguage( 5 | monaco: Monaco, 6 | themeData: editor.IStandaloneThemeData 7 | ) { 8 | // Register a new language 9 | monaco.languages.register({ id: "searchQuery" }); 10 | 11 | // Register a tokens provider for the language 12 | monaco.languages.setMonarchTokensProvider("searchQuery", { 13 | ignoreCase: true, 14 | 15 | // Set defaultToken to invalid to see what you do not tokenize yet 16 | defaultToken: "text", 17 | 18 | // The main tokenizer for our languages 19 | tokenizer: { 20 | root: [ 21 | // Logical operators (must come before field detection) 22 | [/\b(AND|OR|NOT|IN)\b/, "keyword"], 23 | [/(?<=:)\s*IN*/, "value"], 24 | 25 | // Invalid field patterns (must come before valid field patterns) 26 | [/[^a-zA-Z0-9_-]+(?=:)/, "field"], // Invalid field characters 27 | [/[a-zA-Z0-9_-]*\*+[a-zA-Z0-9_-]*(?=:)/, "field"], // Fields with wildcards 28 | 29 | // Field:value pairs 30 | [/[a-zA-Z][a-zA-Z0-9_-]*(?=\s*:)/, "field"], // Field before colon 31 | [/:/, "operator"], 32 | [/(?<=:)\s*"/, { token: "string.quote", next: "@string" }], // Quoted string after colon 33 | [/(?<=:)\s*-?\d+(\.\d+)?/, "number"], // Numbers after colon 34 | [/(?<=:)\s*\d{4}-\d{2}-\d{2}/, "date"], // Dates after colon 35 | [/(?<=:)\s*[a-zA-Z][a-zA-Z0-9_-]*/, "value"], // Plain values after colon 36 | // Plain values after colon 37 | 38 | // Range operators 39 | [/\.\./, "operator"], 40 | [/>=|<=|>|