├── pnpm-workspace.yaml ├── .terserrc.js ├── .npmrc ├── .clang-format ├── .eslintrc.json ├── CMakeLists.txt ├── .gitattributes ├── .gitignore ├── CONTRIBUTING.md ├── src ├── zadeh.h ├── binding │ ├── windows_detect_arch.h │ ├── tsconfig.json │ ├── node.cc │ ├── binding.ts │ ├── node.h │ ├── node_data_interface.h │ └── index.ts ├── options.h ├── query.h ├── common.h ├── data_interface.h ├── StringArrayFilterer.h ├── filter.h ├── path_scorer.h ├── matcher.h └── TreeFilterer.h ├── test ├── score-spec.coffee ├── wrap-spec.js ├── object-array-filterer-spec.js ├── string-array-filterer-spec.js ├── debugger.js ├── fixtures │ ├── small-tree.json │ ├── tree-filterIndices-disp.json │ ├── tree-filterIndices-dips.json │ ├── tree-filterIndices-text.json │ ├── tree-filter-disp.json │ └── tree-filter-dips.json ├── match-spec.coffee ├── filter-options-spec.coffee └── tree-filterer-spec.js ├── .prettierignore ├── .clang-tidy ├── examples └── example1.cpp ├── .github ├── renovate.json └── workflows │ └── CI.yml ├── benchmark ├── benchmark-small.js ├── benchmark.js ├── testutils.js ├── benchmark-tree.js ├── benchmark-large.js ├── data-small.txt └── result.txt ├── package.json ├── binding.gyp ├── LICENSE └── README.md /pnpm-workspace.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - "." 3 | -------------------------------------------------------------------------------- /.terserrc.js: -------------------------------------------------------------------------------- 1 | module.exports = require("terser-config-atomic") 2 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | public-hoist-pattern[]=* 2 | package-lock=false 3 | lockfile=true 4 | prefer-frozen-lockfile=false 5 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | ColumnLimit: 110 2 | IndentWidth: 2 3 | TabWidth: 2 4 | SortIncludes: false 5 | SortUsingDeclarations: false 6 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "eslint-config-atomic", 3 | "ignorePatterns": ["index.js", "index.d.ts", "binding.d.ts", "build/", "node_modules/", "dist-test/"] 4 | } 5 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.17) 2 | 3 | project(zadeh VERSION "2.0.2" LANGUAGES CXX) 4 | 5 | add_executable(example1 ./examples/example1.cpp) 6 | target_compile_features(example1 PRIVATE cxx_std_17) 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.cc text diff=cpp 2 | *.h text diff=cpp 3 | 4 | # Exclude from the language detection of the repository 5 | test/filter-options-spec.coffee linguist-vendored 6 | test/filter-spec.coffee linguist-vendored 7 | test/match-spec.coffee linguist-vendored 8 | test/score-spec.coffee linguist-vendored 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Node 2 | node_modules/ 3 | package-lock.json 4 | 5 | # Caches 6 | .parcel-cache 7 | .rollup.cache 8 | .ropeproject 9 | .mypy_cache 10 | cmake-build-* 11 | .scannerwork 12 | 13 | 14 | # Built Files 15 | build/ 16 | dist/ 17 | dist-test/ 18 | prebuilds/ 19 | out/ 20 | tsconfig.tsbuildinfo 21 | 22 | index.js 23 | index.js.map 24 | *.d.ts 25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## How to release the package to npm? 2 | 3 | - Bump up version in package.json. 4 | - Create a new release tag in Github, for the bumped version. This should trigger builds in GitHub Actions. The binaries will be uploaded to the action's page. 5 | - Manually download the prebuilt binaries from GitHub and publish. 6 | 7 | ``` 8 | npm publish 9 | ``` 10 | -------------------------------------------------------------------------------- /src/zadeh.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_H 2 | #define Zadeh_H 3 | 4 | #include "./common.h" 5 | #include "./data_interface.h" 6 | #include "./options.h" 7 | #include "./scorer.h" 8 | #include "./matcher.h" 9 | #include "./path_scorer.h" 10 | #include "./query.h" 11 | #include "./filter.h" 12 | #include "./StringArrayFilterer.h" 13 | #include "./TreeFilterer.h" 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/binding/windows_detect_arch.h: -------------------------------------------------------------------------------- 1 | #ifdef Zadeh_NODE_BINDING // only defined for building the Node-js binding 2 | 3 | // Check windows 4 | #if _WIN32 || _WIN64 5 | #if _WIN64 6 | #define ENV64BIT 7 | #else 8 | #define ENV32BIT 9 | #endif 10 | #endif 11 | 12 | // Check GCC 13 | #if __GNUC__ 14 | #if __x86_64__ || __ppc64__ 15 | #define ENV64BIT 16 | #else 17 | #define ENV32BIT 18 | #endif 19 | #endif 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /test/score-spec.coffee: -------------------------------------------------------------------------------- 1 | {score} = require '../index' 2 | 3 | describe "score(string, query)", -> 4 | it "returns a score", -> 5 | expect(score('Hello World', 'he')).toBeLessThan(score('Hello World', 'Hello')) 6 | expect(score('Hello World', '')).toBe 0 7 | expect(score('Hello World', null)).toBe 0 8 | expect(score('Hello World')).toBe 0 9 | expect(score()).toBe 0 10 | expect(score(null, 'he')).toBe 0 11 | expect(score('', '')).toBe 0 12 | expect(score('', 'abc')).toBe 0 13 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | # Node 2 | node_modules/ 3 | package-lock.json 4 | pnpm-lock.yaml 5 | 6 | # Caches 7 | .parcel-cache 8 | .rollup.cache 9 | .ropeproject 10 | .mypy_cache 11 | cmake-build-* 12 | .scannerwork 13 | 14 | 15 | # Built Files 16 | build/ 17 | dist/ 18 | dist-test/ 19 | prebuilds/ 20 | out/ 21 | tsconfig.tsbuildinfo 22 | 23 | index.js 24 | index.js.map 25 | *.d.ts 26 | 27 | 28 | # Extra files 29 | test/*.coffee 30 | benchmark/result.md 31 | 32 | test/fixtures/*.json 33 | benchmark/tree.json 34 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | --- 2 | Checks: "*, -clang-diagnostic-*-compat, -cppcoreguidelines-init-variables, -modernize-return-braced-init-list, -misc-unused-parameters, -misc-non-private-member-variables-in-classes, -llvmlibc-*, -llvm-header-guard, -llvm-include-order, -modernize-use-trailing-return-type, -readability-avoid-const-params-in-decls, -readability-convert-member-functions-to-static, -fuchsia-default-arguments-declarations, -fuchsia-default-arguments-calls, -*-uppercase-literal-suffix, -fuchsia-overloaded-operator, -google-build-using-namespace, -google-global-names-in-headers, -google-readability-todo" 3 | HeaderFilterRegex: ".*" 4 | FormatStyle: none 5 | -------------------------------------------------------------------------------- /examples/example1.cpp: -------------------------------------------------------------------------------- 1 | #include "../src/zadeh.h" // include zadeh.h 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | int main() { 8 | // the data to fuzzy search on 9 | auto data = vector{"eye", "why", "bi"}; 10 | 11 | // setup StringArrayFilterer 12 | auto arrayFilterer = zadeh::StringArrayFilterer, string>{}; 13 | arrayFilterer.set_candidates(data); 14 | 15 | // filter the indices that match the query 16 | auto filtered_indices = arrayFilterer.filter_indices("ye"); 17 | 18 | // print the filtered data 19 | for (auto ind: filtered_indices) { 20 | cout << data[ind] << '\n'; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /test/wrap-spec.js: -------------------------------------------------------------------------------- 1 | const path = require("path") 2 | const { wrap } = require("../index") 3 | const legacy = require("fuzzaldrin-plus") 4 | 5 | describe("wrap(string, query)", () => { 6 | const candidates = [ 7 | "helloworld", 8 | "Helloworld", 9 | "HelloWorld", 10 | "hello world", 11 | "Hello world", 12 | "Hello World", 13 | path.join("hello", "world"), 14 | ] 15 | const queries = ["he", "hl", "hw", "el", "eo", "ll", "wo", "ld", "", "helloworld"] 16 | for (const c of candidates) { 17 | for (const q of queries) { 18 | it(`returns same for ${c}`, () => { 19 | expect(wrap(c, q)).toEqual(legacy.wrap(c, q)) 20 | }) 21 | } 22 | } 23 | }) 24 | -------------------------------------------------------------------------------- /test/object-array-filterer-spec.js: -------------------------------------------------------------------------------- 1 | const { ObjectArrayFilterer } = require("../index") 2 | 3 | describe("ObjectArrayFilterer", function () { 4 | it("filters object arrays", function () { 5 | const candidates = [ 6 | { name: "Call", id: 1 }, 7 | { name: "Me", id: 2 }, 8 | { name: "Maybe", id: 3 }, 9 | ] 10 | const objArrFilterer = new ObjectArrayFilterer(candidates, "name") // filter based on their name 11 | // call filter multiple times 12 | expect(objArrFilterer.filter("me")).toEqual([ 13 | { name: "Me", id: 2 }, 14 | { name: "Maybe", id: 3 }, 15 | ]) 16 | expect(objArrFilterer.filter("all")).toEqual([{ name: "Call", id: 1 }]) 17 | }) 18 | }) 19 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "schedule": ["every weekend"], 3 | "labels": ["dependencies"], 4 | "separateMajorMinor": "false", 5 | "packageRules": [ 6 | { 7 | "matchDepTypes": ["devDependencies"], 8 | "matchUpdateTypes": ["major", "minor", "patch", "pin", "digest", "lockFileMaintenance", "rollback", "bump"], 9 | "groupName": "devDependencies", 10 | "semanticCommitType": "chore", 11 | "automerge": true 12 | }, 13 | { 14 | "matchDepTypes": ["dependencies"], 15 | "matchUpdateTypes": ["major", "minor", "patch", "pin", "digest", "lockFileMaintenance", "rollback", "bump"], 16 | "groupName": "dependencies", 17 | "semanticCommitType": "fix" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /test/string-array-filterer-spec.js: -------------------------------------------------------------------------------- 1 | const { StringArrayFilterer } = require("../index") 2 | 3 | describe("StringArrayFilterer", function () { 4 | it("filters string arrays", function () { 5 | const arrayFilterer = new StringArrayFilterer(["Call", "Me", "Maybe"]) 6 | expect(arrayFilterer.filter("me")).toEqual(["Me", "Maybe"]) 7 | expect(arrayFilterer.filter("all")).toEqual(["Call"]) 8 | }) 9 | it("can set candidates later", function () { 10 | const arrayFilterer = new StringArrayFilterer() 11 | arrayFilterer.setCandidates(["Call", "Me", "Maybe"]) // set candidates only once 12 | // call filter multiple times 13 | expect(arrayFilterer.filter("me")).toEqual(["Me", "Maybe"]) 14 | expect(arrayFilterer.filter("all")).toEqual(["Call"]) 15 | }) 16 | }) 17 | -------------------------------------------------------------------------------- /src/binding/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "strict": true, 4 | "strictNullChecks": true, 5 | "noUnusedLocals": true, 6 | "noUnusedParameters": true, 7 | "noImplicitReturns": true, 8 | "noImplicitAny": true, 9 | "noImplicitThis": true, 10 | "noFallthroughCasesInSwitch": true, 11 | "declaration": true, 12 | "emitDecoratorMetadata": true, 13 | "experimentalDecorators": true, 14 | "incremental": false, 15 | "inlineSourceMap": true, 16 | "inlineSources": true, 17 | "preserveSymlinks": true, 18 | "jsx": "react", 19 | "jsxFactory": "etch.dom", 20 | "lib": ["ES2018", "dom"], 21 | "target": "ES2018", 22 | "allowJs": true, 23 | "esModuleInterop": true, 24 | "module": "commonjs", 25 | "moduleResolution": "node", 26 | "importHelpers": false 27 | }, 28 | "compileOnSave": false 29 | } 30 | -------------------------------------------------------------------------------- /src/options.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_options_h_ 2 | #define Zadeh_options_h_ 3 | 4 | #include "common.h" 5 | 6 | namespace zadeh { 7 | 8 | struct Options { 9 | size_t max_results = 0; 10 | bool allowErrors = false; 11 | bool usePathScoring = true; 12 | bool useExtensionBonus = false; 13 | #ifdef _WIN32 14 | char pathSeparator = '\\'; 15 | #else 16 | char pathSeparator = '/'; 17 | #endif 18 | const PreparedQuery preparedQuery; 19 | 20 | explicit Options(const Element &_query, size_t _maxResults, bool _usePathScoring, bool _useExtensionBonus) 21 | : max_results(_maxResults), usePathScoring(_usePathScoring), useExtensionBonus(_useExtensionBonus), 22 | preparedQuery(_query, pathSeparator) {} 23 | explicit Options(const Element &_query, char _pathSeparator) 24 | : pathSeparator(_pathSeparator), preparedQuery(_query, _pathSeparator) {} 25 | }; 26 | 27 | } // namespace zadeh 28 | #endif 29 | -------------------------------------------------------------------------------- /test/debugger.js: -------------------------------------------------------------------------------- 1 | const { TreeFilterer } = require("../index") 2 | const fs = require("fs") 3 | const path = require("path") 4 | 5 | async function main() { 6 | // await sleep() 7 | console.log("start") 8 | 9 | const treeFilterer = new TreeFilterer() 10 | 11 | const candidates = JSON.parse(fs.readFileSync(path.join(__dirname, "fixtures", "small-tree.json"), "utf8")) 12 | 13 | treeFilterer.setCandidates(candidates, "plainText", "children") // set candidates only once 14 | const filteredIndices = treeFilterer.filterIndices("pl") 15 | const filtered = treeFilterer.filter("pl") 16 | 17 | console.log("candidates", candidates) 18 | console.log("filteredIndices ", filteredIndices) 19 | console.log(JSON.stringify(filtered, undefined, " ")) 20 | } 21 | 22 | main().catch((e) => { 23 | throw e 24 | }) 25 | 26 | async function sleep() { 27 | await new Promise((resolve) => { 28 | setTimeout(() => { 29 | resolve(undefined) 30 | }, 10000) 31 | }) 32 | } 33 | -------------------------------------------------------------------------------- /benchmark/benchmark-small.js: -------------------------------------------------------------------------------- 1 | require("coffeescript/register") 2 | const fs = require("fs") 3 | const path = require("path") 4 | const testutils = require("./testutils") 5 | 6 | const Zadeh = require("../index") 7 | const legacy = require("fuzzaldrin-plus") 8 | 9 | const lines = fs.readFileSync(path.join(__dirname, "data-small.txt"), "utf8").trim().split("\n") 10 | const forceAllMatch = { 11 | maxInners: -1, 12 | } 13 | const mitigation = { 14 | maxInners: Math.floor(0.2 * lines.length), 15 | } 16 | 17 | // warmup + compile 18 | Zadeh.filter(lines, "index", forceAllMatch) 19 | legacy.filter(lines, "index") 20 | 21 | testutils.doFilterTest(null, lines, "nm") 22 | testutils.doFilterTest(null, lines, "npm") 23 | testutils.doFilterTest(null, lines, "node") 24 | testutils.doFilterTest(null, lines, "grunt") 25 | testutils.doFilterTest(null, lines, "html") 26 | testutils.doFilterTest(null, lines, "doc") 27 | testutils.doFilterTest(null, lines, "cli") 28 | testutils.doFilterTest(null, lines, "js") 29 | testutils.doFilterTest(null, lines, "jas") 30 | testutils.doFilterTest(null, lines, "mine") 31 | testutils.doFilterTest(null, lines, "stream") 32 | -------------------------------------------------------------------------------- /src/query.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_query_h_ 2 | #define Zadeh_query_h_ 3 | 4 | #include "common.h" 5 | #include "path_scorer.h" 6 | 7 | namespace zadeh { 8 | 9 | // 10 | // Optional chars 11 | // Those char improve the score if present, but will not block the match (score=0) if absent. 12 | 13 | Element coreChars(Element query) { 14 | for (const auto ch : " _-:/\\") { 15 | query.erase(std::remove(query.begin(), query.end(), ch), query.end()); 16 | } 17 | return query; 18 | } 19 | 20 | std::set getCharCodes(const Element &str) { 21 | std::set charCodes; 22 | const auto len = str.size(); 23 | auto i = 0u; 24 | 25 | // create map 26 | while (i < len) { 27 | assert(0 <= i && i < str.size()); // fuzz: if len==0, does not enter while and i==0 28 | charCodes.insert(str[i]); // inbounds 29 | ++i; 30 | } 31 | assert(0 <= i && i <= str.size()); 32 | return charCodes; 33 | } 34 | 35 | PreparedQuery::PreparedQuery(const Element &q, const char pathSeparator) 36 | : query(q), query_lw(ToLower(q)), core(coreChars(q)), core_lw(ToLower(core)), core_up(ToUpper(core)) { 37 | depth = countDir(query, query.size(), pathSeparator); 38 | ext = getExtension(query_lw); 39 | charCodes = getCharCodes(query_lw); 40 | } 41 | 42 | } // namespace zadeh 43 | #endif 44 | -------------------------------------------------------------------------------- /test/fixtures/small-tree.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "kind": "module", 4 | "plainText": "\"rollup.config\"", 5 | "startPosition": { "row": 0, "column": 0 }, 6 | "endPosition": { "row": 18, "column": 3 }, 7 | "children": [ 8 | { 9 | "kind": "variable", 10 | "plainText": "createPlugins", 11 | "startPosition": { "row": 0, "column": 9 }, 12 | "endPosition": { "row": 0, "column": 22 }, 13 | "landingPosition": { "row": 0, "column": 9 }, 14 | "children": [] 15 | }, 16 | { 17 | "kind": "constant", 18 | "plainText": "plugins", 19 | "startPosition": { "row": 2, "column": 6 }, 20 | "endPosition": { "row": 2, "column": 96 }, 21 | "landingPosition": { "row": 2, "column": 6 }, 22 | "children": [ 23 | { 24 | "kind": "property", 25 | "plainText": "tsconfig", 26 | "startPosition": { "row": 2, "column": 40 }, 27 | "endPosition": { "row": 2, "column": 71 }, 28 | "landingPosition": { "row": 2, "column": 40 }, 29 | "children": [] 30 | } 31 | ] 32 | }, 33 | { 34 | "kind": "constant", 35 | "plainText": "default", 36 | "startPosition": { "row": 4, "column": 0 }, 37 | "endPosition": { "row": 18, "column": 1 }, 38 | "children": [] 39 | } 40 | ] 41 | } 42 | ] 43 | -------------------------------------------------------------------------------- /src/binding/node.cc: -------------------------------------------------------------------------------- 1 | #ifdef Zadeh_NODE_BINDING // only defined for building the Node-js binding 2 | 3 | #include "./node.h" 4 | 5 | namespace zadeh { 6 | 7 | Napi::Object ZadehNode::Init(Napi::Env env, Napi::Object exports) { 8 | Napi::HandleScope scope(env); 9 | 10 | // define ZadehNode class in JS 11 | const auto func = 12 | DefineClass(env, "Zadeh", 13 | {// member functions in JS 14 | InstanceMethod("filter", &ZadehNode::filter), 15 | InstanceMethod("filterIndices", &ZadehNode::filterIndices), 16 | InstanceMethod("filterTree", &ZadehNode::filterTree), 17 | InstanceMethod("filterIndicesTree", &ZadehNode::filterIndicesTree), 18 | InstanceMethod("setArrayFiltererCandidates", &ZadehNode::setArrayFiltererCandidates), 19 | InstanceMethod("setTreeFiltererCandidates", &ZadehNode::setTreeFiltererCandidates)}); 20 | // export ZadehNode class to JS 21 | exports.Set("Zadeh", func); 22 | 23 | exports.Set("score", Napi::Function::New(env, score)); 24 | exports.Set("match", Napi::Function::New(env, match)); 25 | exports.Set("wrap", Napi::Function::New(env, wrap)); 26 | return exports; 27 | } 28 | 29 | } // namespace zadeh 30 | 31 | Napi::Object InitAll(Napi::Env env, Napi::Object exports) { return zadeh::ZadehNode::Init(env, exports); } 32 | 33 | NODE_API_MODULE(NODE_GYP_MODULE_NAME, InitAll); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /test/fixtures/tree-filterIndices-disp.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "data": "disposable", "index": 3, "parent_indices": [0, 30, 0, 5] }, 3 | { "data": "disposable", "index": 3, "parent_indices": [0, 31, 5] }, 4 | { "data": "displayLayer", "index": 0, "parent_indices": [0, 30, 0, 87] }, 5 | { "data": "displayLayer", "index": 0, "parent_indices": [0, 31, 87] }, 6 | { "data": "displayBuffer", "index": 9, "parent_indices": [0, 30, 0] }, 7 | { "data": "displayBuffer", "index": 9, "parent_indices": [0, 31] }, 8 | { "data": "displayLayerParams", "index": 5, "parent_indices": [0, 30, 0, 6] }, 9 | { "data": "displayLayerParams", "index": 0, "parent_indices": [0, 30, 0, 15] }, 10 | { "data": "displayLayerParams", "index": 5, "parent_indices": [0, 31, 6] }, 11 | { "data": "displayLayerParams", "index": 0, "parent_indices": [0, 31, 15] }, 12 | { "data": "Disposable", "index": 6, "parent_indices": [0] }, 13 | { "data": "CompositeDisposable", "index": 5, "parent_indices": [0] }, 14 | { "data": "subscribeToDisplayLayer", "index": 50, "parent_indices": [0, 30, 0] }, 15 | { "data": "subscribeToDisplayLayer", "index": 50, "parent_indices": [0, 31] }, 16 | { "data": "onDidStopChanging", "index": 57, "parent_indices": [0, 30, 0] }, 17 | { "data": "onDidStopChanging", "index": 57, "parent_indices": [0, 31] }, 18 | { "data": "onDidChangeScrollTop", "index": 81, "parent_indices": [0, 30, 0] }, 19 | { "data": "onDidChangeScrollTop", "index": 81, "parent_indices": [0, 31] }, 20 | { "data": "onDidChangeSoftWrapped", "index": 60, "parent_indices": [0, 30, 0] }, 21 | { "data": "onDidChangeSoftWrapped", "index": 60, "parent_indices": [0, 31] }, 22 | { "data": "onDidChangeCursorPosition", "index": 58, "parent_indices": [0, 30, 0] }, 23 | { "data": "onDidChangeCursorPosition", "index": 58, "parent_indices": [0, 31] } 24 | ] 25 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_common_h_ 2 | #define Zadeh_common_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace zadeh { 14 | 15 | using namespace std; 16 | 17 | static const auto kMaxThreads = std::thread::hardware_concurrency(); 18 | 19 | #ifdef ENABLE_DEBUG 20 | // TODO does not work anymore because we added explicit to constructors 21 | // Safe string class that logs error when index is accessed outside the string. 22 | class SafeString : public std::string { 23 | public: 24 | explicit SafeString() = default; 25 | explicit SafeString(const std::string &s) : std::string(s) {} 26 | const char &operator[](size_t i) const { 27 | if (i >= size()) { 28 | printf("ERROR string index access index=%zu str=%s\n", i, c_str()); 29 | } 30 | return at(i); 31 | } 32 | }; 33 | 34 | using Element = SafeString; 35 | using CandidateString = SafeString; 36 | #else 37 | using Element = string; 38 | using CandidateString = string; 39 | #endif 40 | 41 | using CandidateIndex = size_t; 42 | 43 | using Score = float; 44 | 45 | struct PreparedQuery { 46 | Element query; 47 | Element query_lw; 48 | Element core; 49 | Element core_lw; 50 | Element core_up; 51 | int depth = 0; 52 | Element ext; 53 | std::set charCodes{}; 54 | 55 | explicit PreparedQuery(const Element &q, const char pathSeparator); 56 | }; 57 | 58 | Element ToLower(const Element &s) { 59 | auto snew = string(s.size(), ' '); // new string 60 | std::transform(s.begin(), s.end(), snew.begin(), ::tolower); 61 | return snew; 62 | } 63 | 64 | Element ToUpper(const Element &s) { 65 | auto snew = string(s.size(), ' '); // new string 66 | std::transform(s.begin(), s.end(), snew.begin(), ::toupper); 67 | return snew; 68 | } 69 | 70 | auto get_num_chunks(const size_t N) { return N < 1000u * kMaxThreads ? N / 1000u + 1u : kMaxThreads; } 71 | 72 | } // namespace zadeh 73 | #endif // Zadeh_common_h_ 74 | -------------------------------------------------------------------------------- /benchmark/benchmark.js: -------------------------------------------------------------------------------- 1 | require("coffeescript/register") 2 | const fs = require("fs") 3 | const path = require("path") 4 | const { start_timer, elapsed_time, doFilterTest } = require("./testutils") 5 | 6 | const Zadeh = require("../index") 7 | const legacy = require("fuzzaldrin-plus") 8 | 9 | const lines = fs.readFileSync(path.join(__dirname, "data.txt"), "utf8").trim().split("\n") 10 | const forceAllMatch = { 11 | maxInners: -1, 12 | } 13 | const mitigation = { 14 | maxInners: Math.floor(0.2 * lines.length), 15 | } 16 | 17 | // warmup + compile 18 | Zadeh.filter(lines, "index", forceAllMatch) 19 | legacy.filter(lines, "index") 20 | 21 | doFilterTest("~10% of results are positive, mix exact & fuzzy", lines, "index") 22 | doFilterTest("~10% of results are positive, Fuzzy match", lines, "indx") 23 | doFilterTest("~1% of results are positive, fuzzy", lines, "walkdr") 24 | doFilterTest("~98% of results are positive, mostly Exact match", lines, "node", forceAllMatch) 25 | doFilterTest("~98% of results are positive, Acronym match", lines, "nm") 26 | doFilterTest("~98% of results + Fuzzy match, [Worst case scenario]", lines, "nm", forceAllMatch) 27 | doFilterTest("~98% of results + Fuzzy match, [Mitigation]", lines, "nm", mitigation) 28 | doFilterTest("~98% of results + Fuzzy match, [Worst case but shorter string]", lines, "ndem", forceAllMatch) 29 | 30 | const query = "index" 31 | const t1 = start_timer() 32 | const prepared = Zadeh.prepareQuery(query) 33 | for (const line of lines) { 34 | Zadeh.match(line, query, { 35 | preparedQuery: prepared, 36 | }) 37 | } 38 | elapsed_time(t1, `Matching ${lines.length} results for 'index' (Prepare in advance)`) 39 | 40 | const t2 = start_timer() 41 | for (const line of lines) { 42 | Zadeh.match(line, query) 43 | } 44 | elapsed_time(t2, `Matching ${lines.length} results for 'index' (cache)`) 45 | // replace by 'prepQuery ?= scorer.prepQuery(query)' to test without cache. 46 | 47 | const t3 = start_timer() 48 | for (const line of lines) { 49 | legacy.match(line, query) 50 | } 51 | elapsed_time(t3, `Matching ${lines.length} results for 'index' (_legacy_)`) 52 | // replace by `prepQuery ? = scorer.prepQuery(query) to test without cache. 53 | -------------------------------------------------------------------------------- /src/data_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_data_interface_h_ 2 | #define Zadeh_data_interface_h_ 3 | 4 | #include "common.h" 5 | 6 | namespace zadeh { 7 | 8 | /** Data Interface */ 9 | 10 | /** Initialize array */ 11 | template 12 | ArrayType init(const SizeType len, const AllocatorType &alloc); 13 | 14 | template ObjectType init(const AllocatorType &alloc); 15 | 16 | template 17 | ObjectType copy(const ObjectType &obj, const AllocatorType &alloc); 18 | 19 | template ReferenceType get_ref(const ValueType &value); 20 | 21 | // TODO do we need manual releasing? 22 | // template 23 | // void release_ref(ReferenceType reference); 24 | 25 | /** Index array */ 26 | template 27 | ElementType get_at(const ArrayType &candidates, const IndexType iCandidate); 28 | 29 | /** Get size of array */ 30 | template SizeType get_size(const ArrayType &candidates); 31 | 32 | /** Set element of array */ 33 | template 34 | void set_at(ArrayType &candidates, ElementType &&value, const IndexType iCandidate); 35 | 36 | /** Get children of a tree */ 37 | template 38 | ChildType get_children(const ParentType &tree_object, const string &children_key, const AllocatorType &env); 39 | 40 | template 41 | optional may_get_children(const ParentType &tree_object, const string &children_key); 42 | 43 | /** Implementation for vector */ 44 | 45 | template <> vector init(const size_t len, const std::allocator &alloc) { 46 | auto out = vector(alloc); 47 | out.reserve(len); 48 | return out; 49 | } 50 | 51 | template <> CandidateString get_at(const vector &candidates, const unsigned int iCandidate) { 52 | return candidates[iCandidate]; 53 | } 54 | 55 | template <> size_t get_size(const vector &candidates) { return candidates.size(); } 56 | 57 | template <> 58 | void set_at(vector &candidates, CandidateString &&value, const size_t iCandidate) { 59 | candidates.push_back(move(value)); 60 | } 61 | 62 | template <> vector get_ref(const vector &vect) { return vect; } 63 | 64 | // template<> 65 | // void release_ref(vector vect) { 66 | // /* do nothing */ 67 | // } 68 | 69 | } // namespace zadeh 70 | #endif 71 | -------------------------------------------------------------------------------- /test/fixtures/tree-filterIndices-dips.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "data": "didUpdateStyles", "index": 2, "parent_indices": [0, 30, 0] }, 3 | { "data": "didUpdateStyles", "index": 2, "parent_indices": [0, 31] }, 4 | { "data": "displayLayerParams", "index": 5, "parent_indices": [0, 30, 0, 6] }, 5 | { "data": "displayLayerParams", "index": 0, "parent_indices": [0, 30, 0, 15] }, 6 | { "data": "displayLayerParams", "index": 5, "parent_indices": [0, 31, 6] }, 7 | { "data": "displayLayerParams", "index": 0, "parent_indices": [0, 31, 15] }, 8 | { "data": "disposable", "index": 3, "parent_indices": [0, 30, 0, 5] }, 9 | { "data": "disposable", "index": 3, "parent_indices": [0, 31, 5] }, 10 | { "data": "didUpdateScrollbarStyles", "index": 3, "parent_indices": [0, 30, 0] }, 11 | { "data": "didUpdateScrollbarStyles", "index": 3, "parent_indices": [0, 31] }, 12 | { "data": "onDidTerminatePendingState", "index": 49, "parent_indices": [0, 30, 0] }, 13 | { "data": "onDidTerminatePendingState", "index": 49, "parent_indices": [0, 31] }, 14 | { "data": "Disposable", "index": 6, "parent_indices": [0] }, 15 | { "data": "onDidChangeCursorPosition", "index": 58, "parent_indices": [0, 30, 0] }, 16 | { "data": "onDidChangeCursorPosition", "index": 58, "parent_indices": [0, 31] }, 17 | { "data": "onDidUpdateDecorations", "index": 85, "parent_indices": [0, 30, 0] }, 18 | { "data": "onDidUpdateDecorations", "index": 85, "parent_indices": [0, 31] }, 19 | { "data": "CompositeDisposable", "index": 5, "parent_indices": [0] }, 20 | { "data": "getSaveDialogOptions", "index": 118, "parent_indices": [0, 30, 0] }, 21 | { "data": "getSaveDialogOptions", "index": 118, "parent_indices": [0, 31] }, 22 | { "data": "openEditorPathSegmentsWithSameFilename", "index": 2, "parent_indices": [0, 30, 0, 107] }, 23 | { "data": "openEditorPathSegmentsWithSameFilename", "index": 2, "parent_indices": [0, 31, 107] }, 24 | { "data": "scopeDescriptorForBufferPosition", "index": 342, "parent_indices": [0, 30, 0] }, 25 | { "data": "scopeDescriptorForBufferPosition", "index": 342, "parent_indices": [0, 31] }, 26 | { "data": "syntaxTreeScopeDescriptorForBufferPosition", "index": 343, "parent_indices": [0, 30, 0] }, 27 | { "data": "syntaxTreeScopeDescriptorForBufferPosition", "index": 343, "parent_indices": [0, 31] }, 28 | { "data": "updateAutoIndentOnPaste", "index": 18, "parent_indices": [0, 30, 0] }, 29 | { "data": "updateAutoIndentOnPaste", "index": 18, "parent_indices": [0, 31] }, 30 | { "data": "shouldAutoIndentOnPaste", "index": 392, "parent_indices": [0, 30, 0] }, 31 | { "data": "shouldAutoIndentOnPaste", "index": 392, "parent_indices": [0, 31] }, 32 | { "data": "destroyFoldsContainingBufferPositions", "index": 373, "parent_indices": [0, 30, 0] }, 33 | { "data": "destroyFoldsContainingBufferPositions", "index": 373, "parent_indices": [0, 31] } 34 | ] 35 | -------------------------------------------------------------------------------- /benchmark/testutils.js: -------------------------------------------------------------------------------- 1 | const { StringArrayFilterer } = require("../index") 2 | const legacy = require("fuzzaldrin-plus") 3 | 4 | let performance = null 5 | 6 | try { 7 | performance = require("perf_hooks").performance 8 | } catch { 9 | performance = window.performance 10 | } 11 | 12 | /*function areArraysEqual2(a, b) { 13 | if (a.length != b.length) 14 | return false 15 | for (let i = 0; i < a.length; ++i) { 16 | if (a[i] !== b[i]) 17 | return false 18 | } 19 | return true 20 | }*/ 21 | 22 | function areArraysEqual(a, b) { 23 | a = new Set(a) 24 | b = new Set(b) 25 | if (a.size !== b.size) { 26 | return false 27 | } 28 | for (const a_item of a) { 29 | if (!b.has(a_item)) { 30 | return false 31 | } 32 | } 33 | return true 34 | } 35 | 36 | function start_timer() { 37 | return performance.now() 38 | } 39 | exports.start_timer = start_timer 40 | 41 | function elapsed_time(timer_start_time, testName, decimals = 2) { 42 | const time_final = performance.now() 43 | const elapsed = (time_final - timer_start_time).toFixed(decimals) 44 | if (testName) { 45 | console.log(`${testName} took ${" ".repeat(80 - testName.length)} ${elapsed} ms`) 46 | } 47 | return parseFloat(elapsed) 48 | } 49 | exports.elapsed_time = elapsed_time 50 | 51 | function doFilterTest(test_name, lines, query, params) { 52 | console.log(`====== Running test - query:${query} ======`) 53 | const strArrFilterer = new StringArrayFilterer() // We exclude the class construction time 54 | const timer_start_time = start_timer() 55 | strArrFilterer.setCandidates(lines) 56 | const res_actual = strArrFilterer.filter(query, params) 57 | const elapsed = elapsed_time(timer_start_time) 58 | 59 | const timer_start_time_legacy = start_timer() 60 | const res_expected = legacy.filter(lines, query) 61 | const elapsed_legacy = elapsed_time(timer_start_time_legacy) 62 | 63 | if (res_actual.length !== res_expected.length) { 64 | console.error(`Results count changed! ${res_actual.length} instead of ${res_expected.length}`) 65 | process.exit(1) 66 | } 67 | if (!areArraysEqual(res_actual, res_expected)) { 68 | console.error(`Results different`) 69 | console.error(` counts: ${res_actual.length}`) 70 | process.exit(1) 71 | } 72 | 73 | if (test_name) { 74 | console.log(test_name) 75 | } 76 | console.log(`zadeh vs. legacy: ${" ".repeat(50)} ${elapsed} ms | ${elapsed_legacy} ms`) 77 | console.log(`length of the result: ${res_actual.length}, length of the lines: ${lines.length}`) 78 | 79 | if (elapsed > elapsed_legacy) { 80 | console.error(`${" ".repeat(75)} zadeh is SLOWER`) 81 | } 82 | console.log("") 83 | } 84 | 85 | function averageArray(nums) { 86 | return nums.reduce((a, b) => a + b, 0) / nums.length || 0 87 | } 88 | 89 | module.exports = { 90 | start_timer, 91 | elapsed_time, 92 | 93 | doFilterTest, 94 | averageArray, 95 | } 96 | -------------------------------------------------------------------------------- /benchmark/benchmark-tree.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs") 2 | const path = require("path") 3 | const { averageArray, start_timer, elapsed_time } = require("./testutils") 4 | 5 | const { TreeFilterer, filterTree } = require("../index") 6 | 7 | const outlineData = JSON.parse(fs.readFileSync(path.join(__dirname, "tree.json"), "utf8")) 8 | 9 | // Loading using TreeFilterer 10 | { 11 | const t1 = start_timer() 12 | const treeFilterer = new TreeFilterer() 13 | treeFilterer.setCandidates(outlineData, "plainText", "children") 14 | elapsed_time(t1, `TreeFilterer.setCandidates:`) 15 | 16 | const t2 = start_timer() 17 | const out_text = treeFilterer.filterIndices("text") 18 | const t2_delta = elapsed_time(t2, `TreeFilterer.filterIndices text`) 19 | 20 | // 21 | const t3 = start_timer() 22 | const out_dips = treeFilterer.filterIndices("dips") 23 | const t3_delta = elapsed_time(t3, `TreeFilterer.filterIndices dips`) 24 | 25 | // 26 | const t4 = start_timer() 27 | const out_disp = treeFilterer.filterIndices("disp") 28 | const t4_delta = elapsed_time(t4, `TreeFilterer.filterIndices disp`) 29 | 30 | const t5 = start_timer() 31 | const out_txt = treeFilterer.filterIndices("txt") 32 | const t5_delta = elapsed_time(t5, `TreeFilterer.filterIndices txt`) 33 | 34 | const t6 = start_timer() 35 | const out_getBuffer = treeFilterer.filterIndices("getBuffer") 36 | const t6_delta = elapsed_time(t6, `TreeFilterer.filterIndices getBuffer`) 37 | 38 | console.log( 39 | `\nTreeFilterer.filterIndices average: ${" ".repeat(50)} ${averageArray([ 40 | t2_delta, 41 | t3_delta, 42 | t4_delta, 43 | t5_delta, 44 | t6_delta, 45 | ]).toFixed(3)} ms\n` 46 | ) 47 | } 48 | 49 | // Loading using TreeFilterer 50 | { 51 | const t1 = start_timer() 52 | const treeFilterer = new TreeFilterer() 53 | treeFilterer.setCandidates(outlineData, "plainText", "children") 54 | elapsed_time(t1, `TreeFilterer.setCandidates:`) 55 | 56 | const t2 = start_timer() 57 | const out_text = treeFilterer.filter("text") 58 | const t2_delta = elapsed_time(t2, `TreeFilterer.filter text`) 59 | 60 | // 61 | const t3 = start_timer() 62 | const out_dips = treeFilterer.filter("dips") 63 | const t3_delta = elapsed_time(t3, `TreeFilterer.filter dips`) 64 | 65 | // 66 | const t4 = start_timer() 67 | const out_disp = treeFilterer.filter("disp") 68 | const t4_delta = elapsed_time(t4, `TreeFilterer.filter disp`) 69 | 70 | const t5 = start_timer() 71 | const out_txt = treeFilterer.filter("txt") 72 | const t5_delta = elapsed_time(t5, `TreeFilterer.filter txt`) 73 | 74 | const t6 = start_timer() 75 | const out_getBuffer = treeFilterer.filter("getBuffer") 76 | const t6_delta = elapsed_time(t6, `TreeFilterer.filter getBuffer`) 77 | 78 | console.log( 79 | `\nTreeFilterer.filter average: ${" ".repeat(57)} ${averageArray([ 80 | t2_delta, 81 | t3_delta, 82 | t4_delta, 83 | t5_delta, 84 | t6_delta, 85 | ]).toFixed(3)} ms\n` 86 | ) 87 | } 88 | -------------------------------------------------------------------------------- /src/StringArrayFilterer.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_TreeFilterer_H 2 | #define Zadeh_TreeFilterer_H 3 | 4 | #include "common.h" 5 | #include "data_interface.h" 6 | #include "options.h" 7 | #include "filter.h" 8 | 9 | namespace zadeh { 10 | 11 | template > 13 | class StringArrayFilterer { 14 | private: 15 | vector> partitioned_candidates{}; 16 | /** Should we keep a reference to the candidates. Set to `true` if you want to call `::filter` method */ 17 | bool keepReference; 18 | /** Reference to the candidates used in `::filter` method */ 19 | ReferenceType candidates_view; 20 | 21 | public: 22 | StringArrayFilterer() = default; 23 | 24 | StringArrayFilterer(const ArrayType &candidates, const bool keepReference_ = true) { 25 | keepReference = keepReference_; 26 | 27 | set_candidates(candidates); 28 | } 29 | 30 | auto set_candidates(const ArrayType &candidates, const bool keepReference_ = true) { 31 | keepReference = keepReference_; 32 | 33 | const auto N = get_size(candidates); 34 | const auto num_chunks = get_num_chunks(N); 35 | 36 | partitioned_candidates.clear(); 37 | partitioned_candidates.resize(num_chunks); 38 | 39 | auto cur_start = 0u; 40 | for (auto iChunk = 0u; iChunk < num_chunks; iChunk++) { 41 | 42 | auto chunk_size = N / num_chunks; 43 | // Distribute remainder among the chunks. 44 | if (iChunk < N % num_chunks) { 45 | chunk_size++; 46 | } 47 | for (size_t iCandidate = cur_start; iCandidate < cur_start + chunk_size; iCandidate++) { 48 | partitioned_candidates[iChunk].emplace_back(get_at(candidates, iCandidate)); 49 | } 50 | cur_start += chunk_size; 51 | } 52 | 53 | if (keepReference) { 54 | // store a view of candidates in case filter was called 55 | candidates_view = get_ref(candidates); 56 | } 57 | } 58 | 59 | auto filter_indices(const std::string &query, const size_t maxResults = 0, const bool usePathScoring = true, 60 | const bool useExtensionBonus = false) { 61 | // optimization for no candidates 62 | if (partitioned_candidates.empty()) { 63 | return vector(); 64 | } 65 | 66 | const Options options(query, maxResults, usePathScoring, useExtensionBonus); 67 | return zadeh::filter(partitioned_candidates, query, options); 68 | } 69 | 70 | auto filter(const std::string &query, const AllocatorType &env, const size_t maxResults = 0, 71 | const bool usePathScoring = true, const bool useExtensionBonus = false) { 72 | if (!keepReference || candidates_view == nullptr) { 73 | return init(static_cast(0), 74 | env); // return an empty vector (should we throw?) 75 | } 76 | const auto filtered_indices = filter_indices(query, maxResults, usePathScoring, useExtensionBonus); 77 | const auto filter_indices_length = filtered_indices.size(); 78 | auto res = init(filter_indices_length, env); 79 | auto candidates = candidates_view.Value(); 80 | for (size_t i = 0; i < filter_indices_length; i++) { 81 | set_at(res, get_at(candidates, filtered_indices[i]), i); 82 | } 83 | return res; 84 | } 85 | }; 86 | 87 | } // namespace zadeh 88 | #endif 89 | -------------------------------------------------------------------------------- /test/match-spec.coffee: -------------------------------------------------------------------------------- 1 | {match} = require '../index' 2 | path = require 'path' 3 | 4 | describe "match(string, query)", -> 5 | 6 | it "returns an array of matched and unmatched strings", -> 7 | expect(match('Hello World', 'he')).toEqual [0, 1] 8 | expect(match()).toEqual [] 9 | expect(match('Hello World', 'wor')).toEqual [6..8] 10 | 11 | expect(match('Hello World', 'd')).toEqual [10] 12 | expect(match('Hello World', 'elwor')).toEqual [1, 2, 6, 7, 8] 13 | expect(match('Hello World', 'er')).toEqual [1, 8] 14 | expect(match('Hello World', '')).toEqual [] 15 | expect(match(null, 'he')).toEqual [] 16 | expect(match('', '')).toEqual [] 17 | expect(match('', 'abc')).toEqual [] 18 | 19 | it "matches paths with slashes", -> 20 | expect(match(path.join('X', 'Y'), path.join('X', 'Y'))).toEqual [0..2] 21 | expect(match(path.join('X', 'X-x'), 'X')).toEqual [0, 2] 22 | expect(match(path.join('X', 'Y'), 'XY')).toEqual [0, 2] 23 | expect(match(path.join('-', 'X'), 'X')).toEqual [2] 24 | expect(match(path.join('X-', '-'), "X#{path.sep}")).toEqual [0, 2] 25 | 26 | it "double matches characters in the path and the base", -> 27 | expect(match(path.join('XY', 'XY'), 'XY')).toEqual [0, 1, 3, 4] 28 | expect(match(path.join('--X-Y-', '-X--Y'), 'XY')).toEqual [2, 4, 8, 11] 29 | 30 | it "prefer whole word to scattered letters", -> 31 | expect(match('fiddle gruntfile filler', 'file')).toEqual [ 12, 13, 14,15] 32 | expect(match('fiddle file', 'file')).toEqual [ 7, 8, 9, 10] 33 | expect(match('find le file', 'file')).toEqual [ 8, 9, 10, 11] 34 | 35 | it "prefer whole word to scattered letters, even without exact matches", -> 36 | expect(match('fiddle gruntfile xfiller', 'filex')).toEqual [ 12, 13, 14,15, 17] 37 | expect(match('fiddle file xfiller', 'filex')).toEqual [ 7, 8, 9, 10, 12] 38 | expect(match('find le file xfiller', 'filex')).toEqual [ 8, 9, 10, 11, 13] 39 | 40 | it "prefer exact match", -> 41 | expect(match('filter gruntfile filler', 'file')).toEqual [ 12, 13, 14, 15] 42 | 43 | it "prefer case sensitive exact match", -> 44 | expect(match('ccc CCC cCc CcC CCc', 'ccc')).toEqual [ 0, 1, 2] 45 | expect(match('ccc CCC cCc CcC CCc', 'CCC')).toEqual [ 4, 5, 6] 46 | expect(match('ccc CCC cCc CcC CCc', 'cCc')).toEqual [ 8, 9, 10] 47 | expect(match('ccc CCC cCc CcC CCc', 'CcC')).toEqual [ 12, 13, 14] 48 | expect(match('ccc CCC cCc CcC CCc', 'CCc')).toEqual [ 16, 17, 18] 49 | 50 | it "prefer camelCase to scattered letters", -> 51 | expect(match('ImportanceTableCtrl', 'itc')).toEqual [0,10,15] 52 | 53 | it "prefer acronym to scattered letters", -> 54 | expect(match('action_config', 'acon')).toEqual [ 0, 7, 8, 9] 55 | expect(match('application_control', 'acon')).toEqual [ 0, 12, 13, 14] 56 | 57 | it "account for case in selecting camelCase vs consecutive", -> 58 | expect(match('0xACACAC: CamelControlClass.ccc', 'CCC')).toEqual [ 10, 15, 22] 59 | expect(match('0xACACAC: CamelControlClass.ccc', 'ccc')).toEqual [ 28, 29, 30] 60 | 61 | it "limit consecutive inside word boundary", -> 62 | 63 | #expect(match('Interns And Roles - Patterns Roles', 'interns roles')).toEqual [ 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16] 64 | # 65 | # the longest substring is "terns roles" 66 | # it's also not very intuitive to split the word interns like that. 67 | # limit consecutive at word boundary will help to prevent spiting words. 68 | # 69 | # Aside from doing more computation while scanning consecutive. 70 | # The main problem is that we don't reset the consecutive count unless we encounter a negative match. 71 | # 72 | -------------------------------------------------------------------------------- /src/binding/binding.ts: -------------------------------------------------------------------------------- 1 | import type { Tree, TreeFilterIndicesResult } from "./index" 2 | 3 | export declare class Zadeh { 4 | constructor() 5 | 6 | filter(query: string, maxResult: number, usePathScoring: boolean, useExtensionBonus: boolean): Array 7 | filterIndices(query: string, maxResult: number, usePathScoring: boolean, useExtensionBonus: boolean): Array 8 | 9 | setArrayFiltererCandidates(candidateStrings: Array): boolean 10 | 11 | filterTree(query: string, maxResult: number, usePathScoring: boolean, useExtensionBonus: boolean): Array 12 | 13 | filterIndicesTree( 14 | query: string, 15 | maxResult: number, 16 | usePathScoring: boolean, 17 | useExtensionBonus: boolean 18 | ): Array 19 | 20 | // TODO use generic Tree type 21 | setTreeFiltererCandidates(candidateTrees: Array, dataKey: string, childrenKey: string): boolean 22 | } 23 | 24 | export declare function score( 25 | candidate: string, 26 | query: string, 27 | usePathScoring: boolean, 28 | useExtensionBonus: boolean 29 | ): number 30 | 31 | export declare function match(str: string, query: string, pathSeparator: string): Array 32 | 33 | export declare function wrap(str: string, query: string, pathSeparator: string): string 34 | 35 | // Argument validators 36 | 37 | export function validate_filter(...args: Parameters) { 38 | if ( 39 | !( 40 | typeof args[0] === "string" && 41 | typeof args[1] === "number" && 42 | typeof args[2] === "boolean" && 43 | typeof args[3] === "boolean" 44 | ) 45 | ) { 46 | throw new Error(`Invalid arguments for filter: ${args}`) 47 | } 48 | } 49 | 50 | export function validate_setArrayFiltererCandidates(...args: Parameters) { 51 | if (!Array.isArray(args[0])) { 52 | throw new Error(`Invalid arguments for setArrayFiltererCandidates: ${args}`) 53 | } 54 | } 55 | 56 | export function validate_filterTree(...args: Parameters) { 57 | if ( 58 | !( 59 | typeof args[0] === "string" && 60 | typeof args[1] === "number" && 61 | typeof args[2] === "boolean" && 62 | typeof args[3] === "boolean" 63 | ) 64 | ) { 65 | throw new Error(`Invalid arguments for filterTree: ${args}`) 66 | } 67 | } 68 | 69 | export function validate_setTreeFiltererCandidates(...args: Parameters) { 70 | if (!(Array.isArray(args[0]) && typeof args[1] === "string" && typeof args[2] === "string")) { 71 | throw new Error(`Invalid arguments for setTreeFiltererCandidates: ${args}`) 72 | } 73 | } 74 | 75 | export function validate_score(...args: Parameters) { 76 | if ( 77 | !( 78 | typeof args[0] === "string" && 79 | typeof args[1] === "string" && 80 | typeof args[2] === "boolean" && 81 | typeof args[3] === "boolean" 82 | ) 83 | ) { 84 | throw new Error(`Invalid arguments for score: ${args}`) 85 | } 86 | } 87 | 88 | export function validate_match(...args: Parameters) { 89 | if ( 90 | !( 91 | typeof args[0] === "string" && 92 | typeof args[1] === "string" && 93 | typeof args[2] === "string" && 94 | /** PathSeparator */ args[2].length === 1 95 | ) 96 | ) { 97 | throw new Error(`Invalid arguments for match: ${args}`) 98 | } 99 | } 100 | 101 | export function validate_wrap(...args: Parameters) { 102 | if ( 103 | !( 104 | typeof args[0] === "string" && 105 | typeof args[1] === "string" && 106 | typeof args[2] === "string" && 107 | /** PathSeparator */ args[2].length === 1 108 | ) 109 | ) { 110 | throw new Error(`Invalid arguments for wrap: ${args}`) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zadeh", 3 | "version": "3.0.0-beta.4", 4 | "description": "Blazing fast library for fuzzy filtering, matching, and other fuzzy things!", 5 | "homepage": "https://github.com/atom-ide-community/zadeh", 6 | "bugs": { 7 | "url": "https://github.com/atom-ide-community/zadeh/issues" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/atom-ide-community/zadeh.git" 12 | }, 13 | "license": "MIT", 14 | "author": "Amin Yahyaabadi, Rajendran T", 15 | "main": "./index.js", 16 | "source": "./src/binding/index.ts", 17 | "files": [ 18 | "src", 19 | "prebuilds", 20 | "index.*", 21 | "*.d.ts", 22 | "binding.gyp" 23 | ], 24 | "scripts": { 25 | "benchmark": "npm run benchmark.small && npm run benchmark.regular && npm run benchmark.large && npm run benchmark.tree", 26 | "benchmark.large": "node benchmark/benchmark-large.js", 27 | "benchmark.regular": "node benchmark/benchmark.js", 28 | "benchmark.small": "node benchmark/benchmark-small.js", 29 | "benchmark.tree": "node benchmark/benchmark-tree.js", 30 | "build": "npm run build.native && npm run build.js", 31 | "build.js": "npm run types.js && cross-env NODE_ENV=production parcel build --target main ./src/binding/index.ts", 32 | "prebuild.native": "prebuildify --napi -t 12.0.0 -t electron@6.0.0 -t electron@9.3.5 --strip --tag-libc", 33 | "build.native": "node-gyp configure --release && node-gyp build --release", 34 | "build.native.debug": "node-gyp configure --debug && node-gyp build --debug", 35 | "bump": "ncu -u && ncu -u /parcel/ --greatest", 36 | "clean": "npm run clean.native && npm run clean.js", 37 | "clean.js": "shx rm -rf dist dist-test .rollup.cache .parcel-cache **/tsconfig.tsbuildinfo ./index.* ./*.d.ts", 38 | "clean.native": "shx rm -rf build prebuilds", 39 | "dev.js": "npm run types.js && cross-env NODE_ENV=development parcel watch --target main ./src/binding/index.ts", 40 | "examples.cpp": "cmake -S . -B ./build && cmake --build ./build --config Debug", 41 | "format": "prettier --write . && clang-format -i src/*.h src/binding/*.cc src/binding/*.h", 42 | "install": "node-gyp-build", 43 | "lint": "eslint . --fix", 44 | "prebuild.native.ia32": "prebuildify --napi --arch=ia32 -t 12.0.0 -t electron@6.0.0 -t electron@9.3.5 --strip --tag-libc", 45 | "prepare": "npm run clean.js && npm run build", 46 | "test": "shx rm -rf dist-test && shx cp -r test dist-test && coffee --compile dist-test && shx rm -rf dist-test/*.coffee && jasmine dist-test/*-spec.js", 47 | "test.format": "prettier . --check", 48 | "test.lint": "eslint .", 49 | "tidy": "clang-tidy src/*.cc src/*.h", 50 | "tidy.fix": "clang-tidy src/*.cc src/*.h --fix --fix-errors", 51 | "types.js": "tsc -p ./src/binding/tsconfig.json --emitDeclarationOnly && shx mv ./src/binding/*.d.ts ." 52 | }, 53 | "prettier": "prettier-config-atomic", 54 | "dependencies": { 55 | "node-addon-api": "~4.2.0", 56 | "node-gyp-build": "^4.3.0" 57 | }, 58 | "devDependencies": { 59 | "coffeescript": "^2.6.1", 60 | "cross-env": "^7.0.3", 61 | "eslint-config-atomic": "^1.16.4", 62 | "fast-equals": "^2.0.3", 63 | "fuzzaldrin-plus": "^0.6.0", 64 | "jasmine": "^3.10.0", 65 | "parcel": "2.0.0", 66 | "prebuildify": "^4.2.1", 67 | "prettier-config-atomic": "^3.0.1", 68 | "shx": "^0.3.3", 69 | "terser-config-atomic": "^0.1.1", 70 | "typescript": "^4.4.4" 71 | }, 72 | "engines": { 73 | "atom": ">=1.52.0 <2.0.0", 74 | "electron": ">=6.0.0", 75 | "node": ">=12.0.0" 76 | }, 77 | "gypfile": true, 78 | "node": "./index.js", 79 | "targets": { 80 | "main": { 81 | "context": "node", 82 | "includeNodeModules": { 83 | "node-gyp-build": false 84 | }, 85 | "outputFormat": "commonjs", 86 | "isLibrary": true 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/filter.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_filter_h_ 2 | #define Zadeh_filter_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | #include "options.h" 11 | 12 | namespace zadeh { 13 | 14 | struct CandidateScore { 15 | // TODO non const 16 | Score score; 17 | CandidateIndex index; 18 | CandidateScore(const Score score_, const size_t index_) noexcept : score(score_), index(index_) {} 19 | 20 | bool operator<(const CandidateScore &other) const noexcept { return score > other.score; } 21 | }; 22 | 23 | using CandidateScoreVector = std::vector; 24 | 25 | void filter_internal(const std::vector &candidates, size_t start_index, const Element &query, 26 | const Options &options, size_t max_results, CandidateScoreVector &results) { 27 | const auto scoreProvider = options.usePathScoring ? path_scorer_score : scorer_score; 28 | auto results_size = results.size(); 29 | for (size_t i = 0, len = candidates.size(); i < len; i++) { 30 | const auto &candidate = candidates[i]; 31 | if (candidate.empty()) { 32 | continue; 33 | } 34 | const auto score = scoreProvider(candidate, query, options); 35 | if (score > 0) { 36 | results.emplace_back(score, start_index + i); 37 | ++results_size; // maintain size manually rather than calling results.size() every time 38 | if (results_size > max_results) { 39 | results.pop_back(); 40 | --results_size; 41 | } 42 | } 43 | } 44 | } 45 | 46 | std::vector sort_priority_queue(CandidateScoreVector &&candidates, size_t max_results) { 47 | // sort all the results 48 | std::sort(candidates.begin(), candidates.end()); 49 | 50 | // find the end based on max_results or the length of the results 51 | const auto end = min(max_results, candidates.size()); 52 | 53 | // make the return from the indices of the results 54 | std::vector ret; 55 | ret.reserve(end); 56 | for (auto i = 0u; i < end; i++) { 57 | ret.emplace_back(candidates[i].index); 58 | } 59 | 60 | return ret; 61 | } 62 | 63 | std::vector filter(const vector> &candidates, 64 | const Element &query, const Options &options) { 65 | const auto candidates_size = candidates.size(); 66 | assert(1 <= candidates_size); // TODO handled outside 67 | 68 | auto max_results = options.max_results; 69 | if (max_results == 0u) { 70 | max_results = std::numeric_limits::max(); 71 | } 72 | 73 | // Split the dataset and pass down to multiple threads. 74 | vector threads; 75 | threads.reserve(candidates_size - 1); // 1 less thread 76 | 77 | auto results = vector(candidates_size); 78 | 79 | size_t start_index = 0; 80 | for (size_t i = 1; i < candidates_size; i++) { 81 | assert(1 <= i && i < candidates_size && i < results.size()); 82 | start_index += candidates[i - 1].size(); // inbounds 83 | threads.emplace_back(filter_internal, ref(candidates[i]), start_index, ref(query), ref(options), 84 | max_results, ref(results[i])); // inbounds 85 | } 86 | assert(threads.size() == candidates_size - 1 && results.size() == candidates_size); 87 | 88 | CandidateScoreVector top_k; 89 | // Do the work for first thread. 90 | filter_internal(candidates[0], 0, query, options, max_results, top_k); // inbounds (candidate_size >= 1) 91 | // Wait for threads to complete and merge the results. 92 | 93 | for (size_t i = 1; i < candidates_size; i++) { 94 | threads[i - 1].join(); // inbounds 95 | 96 | const auto new_results = results[i]; 97 | std::move(new_results.begin(), new_results.end(), std::back_inserter(top_k)); 98 | } 99 | 100 | return sort_priority_queue(move(top_k), max_results); 101 | } 102 | 103 | } // namespace zadeh 104 | #endif 105 | -------------------------------------------------------------------------------- /benchmark/benchmark-large.js: -------------------------------------------------------------------------------- 1 | require("coffeescript/register") 2 | const fs = require("fs") 3 | const path = require("path") 4 | const { elapsed_time, start_timer } = require("./testutils") 5 | 6 | const { StringArrayFilterer, ObjectArrayFilterer, filter } = require("../index") 7 | const legacy = require("fuzzaldrin-plus") 8 | 9 | const lines = fs.readFileSync(path.join(__dirname, "data-large.txt"), "utf8").trim().split("\n") 10 | const dict = lines.map((item) => { 11 | return { 12 | key: item, 13 | val: item, 14 | } 15 | }) 16 | 17 | const arrFilterer = new StringArrayFilterer(lines) 18 | 19 | const two_letter_tests = [ 20 | "dp", 21 | "la", 22 | "ow", 23 | "rb", 24 | "dg", 25 | "by", 26 | "pf", 27 | "fk", 28 | "qk", 29 | "pu", 30 | "cl", 31 | "cu", 32 | "cj", 33 | "dz", 34 | "mh", 35 | "sm", 36 | "qk", 37 | "cz", 38 | "nf", 39 | "rb", 40 | ] 41 | const three_letter_tests = [ 42 | "mxl", 43 | "ipe", 44 | "dvi", 45 | "sxg", 46 | "qiu", 47 | "mvw", 48 | "efa", 49 | "utz", 50 | "pxr", 51 | "dsr", 52 | "inw", 53 | "xck", 54 | "bqk", 55 | "ibv", 56 | "zbh", 57 | "ozj", 58 | "wht", 59 | "kny", 60 | "ccj", 61 | "dtv", 62 | ] 63 | 64 | const t1 = start_timer() 65 | for (const query of two_letter_tests) { 66 | legacy.filter(lines, query, { 67 | maxResults: 10, 68 | }) 69 | } 70 | elapsed_time(t1, "TwoLetter _legacy_") 71 | 72 | const t2 = start_timer() 73 | for (const query of two_letter_tests) { 74 | filter(lines, query, { 75 | maxResults: 10, 76 | }) 77 | } 78 | elapsed_time(t2, "TwoLetter deprecated filter") 79 | 80 | const t3 = start_timer() 81 | for (const query of two_letter_tests) { 82 | arrFilterer.filter(query, { 83 | maxResults: 10, 84 | }) 85 | } 86 | elapsed_time(t3, "TwoLetter StringArrayFilterer.filter") 87 | 88 | console.log("======") 89 | 90 | const t4 = start_timer() 91 | for (const query of three_letter_tests) { 92 | legacy.filter(lines, query, { 93 | maxResults: 10, 94 | }) 95 | } 96 | elapsed_time(t4, "ThreeLetter _legacy_") 97 | 98 | const t5 = start_timer() 99 | for (const query of three_letter_tests) { 100 | filter(lines, query, { 101 | maxResults: 10, 102 | }) 103 | } 104 | elapsed_time(t5, "ThreeLetter deprecated filter") 105 | 106 | const t6 = start_timer() 107 | for (const query of three_letter_tests) { 108 | arrFilterer.filter(query, { 109 | maxResults: 10, 110 | }) 111 | } 112 | elapsed_time(t6, "ThreeLetter StringArrayFilterer.filter") 113 | 114 | console.log("======") 115 | const obj = new ObjectArrayFilterer(dict, "key") 116 | 117 | const t7 = start_timer() 118 | for (const query of two_letter_tests) { 119 | obj.filter(query, { 120 | maxResults: 10, 121 | }) 122 | } 123 | elapsed_time(t7, "TwoLetter object filter") 124 | 125 | const t8 = start_timer() 126 | for (const query of three_letter_tests) { 127 | obj.filter(query, { 128 | maxResults: 10, 129 | }) 130 | } 131 | elapsed_time(t8, "ThreeLetter object filter") 132 | 133 | const t71 = start_timer() 134 | for (const query of two_letter_tests) { 135 | filter(dict, query, { 136 | maxResults: 10, 137 | key: "key", 138 | }) 139 | } 140 | elapsed_time(t71, "TwoLetter object deprecated filter") 141 | 142 | const t81 = start_timer() 143 | for (const query of three_letter_tests) { 144 | filter(dict, query, { 145 | maxResults: 10, 146 | key: "key", 147 | }) 148 | } 149 | elapsed_time(t81, "ThreeLetter object deprecated filter") 150 | 151 | console.log("======") 152 | 153 | const t9 = start_timer() 154 | const obj_1 = new StringArrayFilterer(lines) 155 | elapsed_time(t9, "StringArrayFilterer constructor") 156 | 157 | const t10 = start_timer() 158 | for (const query of two_letter_tests) { 159 | obj_1.filter(query, { 160 | maxResults: 10, 161 | }) 162 | } 163 | elapsed_time(t10, "TwoLetter StringArrayFilterer.filter") 164 | 165 | const t11 = start_timer() 166 | for (const query of three_letter_tests) { 167 | obj_1.filter(query, { 168 | maxResults: 10, 169 | }) 170 | } 171 | elapsed_time(t11, "ThreeLetter StringArrayFilterer.filter") 172 | 173 | console.log("======") 174 | 175 | const t12 = start_timer() 176 | const obj_2 = new ObjectArrayFilterer(dict, "key") 177 | elapsed_time(t12, "ObjectArrayFilterer constructor") 178 | 179 | const t13 = start_timer() 180 | for (const query of two_letter_tests) { 181 | obj_2.filter(query, { 182 | maxResults: 10, 183 | }) 184 | } 185 | elapsed_time(t13, "TwoLetter ObjectArrayFilterer.filter") 186 | 187 | const t14 = start_timer() 188 | for (const query of three_letter_tests) { 189 | obj_2.filter(query, { 190 | maxResults: 10, 191 | }) 192 | } 193 | elapsed_time(t14, "ThreeLetter ObjectArrayFilterer.filter") 194 | -------------------------------------------------------------------------------- /src/binding/node.h: -------------------------------------------------------------------------------- 1 | #ifdef Zadeh_NODE_BINDING // only defined for building the Node-js binding 2 | #ifndef Zadeh_Node_H 3 | #define Zadeh_Node_H 4 | 5 | #include 6 | 7 | #include "../zadeh.h" 8 | #include "./node_data_interface.h" 9 | 10 | namespace zadeh { 11 | 12 | class ZadehNode : public Napi::ObjectWrap { 13 | public: 14 | Napi::Value filter(const Napi::CallbackInfo &info) { 15 | // NOTE: not used, as it seems slower than using `ZadehNode::filterIndices` and then filter based on the 16 | // indices on the JavaScript side. Currently, it is disabled and so `set_candidates` doens't store a 17 | // reference. If you want to use this function, you should call `strArrFilterer.set_candidates` with the 18 | // second argument set to `true` 19 | return strArrFilterer.filter(info[0].As(), info.Env(), 20 | info[1].As().Uint32Value(), info[2].As(), 21 | info[3].As()); 22 | } 23 | 24 | Napi::Value filterIndices(const Napi::CallbackInfo &info) { 25 | const auto env = info.Env(); 26 | const auto filter_indices = 27 | strArrFilterer.filter_indices(info[0].As(), info[1].As().Uint32Value(), 28 | info[2].As(), info[3].As()); 29 | 30 | const auto indices_num = filter_indices.size(); 31 | auto res = Napi::Array::New(env, indices_num); 32 | for (uint32_t i = 0; i < indices_num; i++) { 33 | res[i] = Napi::Number::New(env, filter_indices[i]); 34 | } 35 | return res; 36 | } 37 | 38 | Napi::Value setArrayFiltererCandidates(const Napi::CallbackInfo &info) { 39 | strArrFilterer.set_candidates(info[0].As(), false); 40 | return Napi::Boolean(); 41 | } 42 | 43 | Napi::Value setTreeFiltererCandidates(const Napi::CallbackInfo &info) { 44 | // create Tree and set candidates 45 | treeFilterer.set_candidates(info[0].As(), info[1].As(), 46 | info[2].As()); 47 | 48 | return Napi::Boolean(); 49 | } 50 | 51 | Napi::Value filterTree(const Napi::CallbackInfo &info) { 52 | return treeFilterer.filter(info[0].As(), info.Env(), 53 | info[1].As().Uint32Value(), info[2].As(), 54 | info[3].As()); 55 | } 56 | 57 | Napi::Value filterIndicesTree(const Napi::CallbackInfo &info) { 58 | return treeFilterer.filter_indices(info[0].As(), info.Env(), 59 | info[1].As().Uint32Value(), info[2].As(), 60 | info[3].As()); 61 | } 62 | 63 | // NAPI entry functions: 64 | static Napi::Object Init(Napi::Env env, Napi::Object exports); 65 | explicit ZadehNode(const Napi::CallbackInfo &info) : Napi::ObjectWrap(info) {} 66 | 67 | private: 68 | StringArrayFilterer, CandidateString, Napi::Env> strArrFilterer{}; 69 | TreeFilterer, Napi::Env> 70 | treeFilterer{}; 71 | }; 72 | 73 | Napi::Number score(const Napi::CallbackInfo &info) { 74 | const std::string candidate = info[0].As(); 75 | const std::string query = info[1].As(); 76 | const bool usePathScoring = info[2].As(); 77 | const bool useExtensionBonus = info[3].As(); 78 | const Options options(query, 1, usePathScoring, useExtensionBonus); 79 | const auto scoreProvider = options.usePathScoring ? path_scorer_score : scorer_score; 80 | const auto score = scoreProvider(candidate, query, options); 81 | return Napi::Number::New(info.Env(), score); 82 | } 83 | 84 | Napi::Array match(const Napi::CallbackInfo &info) { 85 | auto res = Napi::Array::New(info.Env()); 86 | std::string candidate = info[0].As(); 87 | std::string query = info[1].As(); 88 | std::string pathSeparator = info[2].As(); 89 | assert(pathSeparator.size() == 1); 90 | 91 | Options options(query, pathSeparator[0]); 92 | auto matches = matcher_match(candidate, query, options); 93 | for (uint32_t i = 0, len = matches.size(); i < len; i++) { 94 | res[i] = Napi::Number::New(info.Env(), matches[i]); 95 | } 96 | return res; 97 | } 98 | 99 | Napi::String wrap(const Napi::CallbackInfo &info) { 100 | std::string candidate = info[0].As(); 101 | std::string query = info[1].As(); 102 | std::string pathSeparator = info[2].As(); 103 | assert(pathSeparator.size() == 1); 104 | 105 | Options options(query, pathSeparator[0]); 106 | std::string res; 107 | get_wrap(candidate, query, options, &res); 108 | return Napi::String::New(info.Env(), res); 109 | } 110 | 111 | } // namespace zadeh 112 | #endif // Zadeh_Node_H 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /test/filter-options-spec.coffee: -------------------------------------------------------------------------------- 1 | zadeh = require('../index'); 2 | fuzzaldrinExpected = require 'fuzzaldrin-plus' 3 | 4 | score_test = (candidate, query, options={}) -> 5 | expected = fuzzaldrinExpected.score(candidate, query, options) 6 | actual = zadeh.score(candidate, query, options) 7 | # expect(actual).toEqual(expected) # Tests are disabled for now. 8 | 9 | filter_test = (candidates, query, options={}) -> 10 | expected = fuzzaldrinExpected.filter(candidates, query, options) 11 | actual = zadeh.filter(candidates, query, options) 12 | expect(actual).toEqual(expected) 13 | 14 | score_test_with_options = (options) -> 15 | it "when query is at the start, end or in between ", -> 16 | score_test('0gruntfile0', 'file') 17 | score_test('0gruntfile0', 'grunt') 18 | score_test('0gruntfile', 'file') 19 | score_test('0gruntfile', 'grunt') 20 | score_test('gruntfile0', 'file') 21 | score_test('gruntfile0', 'grunt') 22 | 23 | describe "when file path is involved", -> 24 | it -> 25 | score_test(path.join('app', 'components', 'admin', 'member', 'modals', 'edit-payment.html'), 'member edit htm') 26 | 27 | it "with windows style path", -> 28 | score_test('0\\Diagnostic', 'diag') 29 | score_test('0\\Diagnostic', 'diag0') 30 | score_test('0\\0\\0\\diagnostics00', 'diag') 31 | score_test('0\\0\\0\\diagnostics00', 'diag0') 32 | 33 | it "with linux style path", -> 34 | score_test('0/Diagnostic', 'diag') 35 | score_test('0/Diagnostic', 'diag0') 36 | score_test('0/0/0/diagnostics00', 'diag') 37 | score_test('0/0/0/diagnostics00', 'diag0') 38 | 39 | filter_test_with_options = (options) -> 40 | 41 | describe "scoring", -> 42 | 43 | describe "returns the same score as returned by fuzzaldrin-plus", -> 44 | 45 | describe "with default options", -> 46 | score_test_with_options usePathScoring: false 47 | 48 | describe "with path scoring turned off", -> 49 | score_test_with_options usePathScoring: false 50 | 51 | describe "with path scoring turned on", -> 52 | score_test_with_options usePathScoring: true 53 | 54 | it "when useExtensionBonus option is used", -> 55 | score_test('matchOptimisticB.htaccess', 'mob.h', useExtensionBonus: true) 56 | score_test('matchOptimisticB_main.html', 'mob.h', useExtensionBonus: true) 57 | 58 | 59 | describe "filtering", -> 60 | 61 | describe "returns the same candidates as filtered by fuzzaldrin-plus", -> 62 | 63 | it "with default options", -> 64 | expect(zadeh.filter(['ab', 'abc', 'cd', 'de'], 'a')).toEqual(['ab', 'abc']) 65 | expect(zadeh.filter(['ab', 'abc', 'cd', 'de'], 'b')).toEqual(['ab', 'abc']) 66 | expect(zadeh.filter(['ab', 'abc', 'cd', 'de'], 'c')).toEqual(['cd', 'abc',]) 67 | 68 | it "honors maxResults in options", -> 69 | expect(zadeh.filter(['ab', 'abc', 'abcd', 'abcdde'], 'a', maxResults: 1)).toEqual(['ab']) 70 | expect(zadeh.filter(['ab', 'abc', 'abcd', 'abcdde'], 'a', maxResults: 2)).toEqual(['ab', 'abc']) 71 | expect(zadeh.filter(['ab', 'abc', 'abcd', 'abcdde'], 'c', maxResults: 2)).toEqual(['abc', 'abcd']) 72 | 73 | it "candidates are able to be indexed by a given key", -> 74 | candidates = [ 75 | {uri: '/usr/bin/ls', fname: 'ls'}, 76 | {uri: '/usr/bin/mkdir', fname: 'mkdir'}, 77 | {uri: '/usr/sbin/find', fname: 'find'}, 78 | {uri: '/usr/local/bin/git', fname: 'git'}, 79 | ] 80 | expect(zadeh.filter(candidates, 'i', key: 'fname')).toEqual([candidates[3], candidates[2], candidates[1]]) 81 | 82 | it "candidates with duplicate values when indexed by key are returned properly", -> 83 | candidates = [ 84 | {uri: '/usr/bin/ls', fname: 'ls'}, 85 | {uri: '/usr/sbin/ls', fname: 'ls'} 86 | ] 87 | expect(zadeh.filter(candidates, 'l', key: 'fname')).toEqual([candidates[0], candidates[1]]) 88 | 89 | describe "filtering by creating an object", -> 90 | it "with default options", -> 91 | obj = new zadeh.StringArrayFilterer() 92 | obj.setCandidates ['ab', 'abc', 'cd', 'de'] 93 | expect(obj.filter('a')).toEqual(['ab', 'abc']) 94 | expect(obj.filter('b')).toEqual(['ab', 'abc']) 95 | expect(obj.filter('c')).toEqual(['cd', 'abc',]) 96 | 97 | it "candidates are able to be indexed by a given key", -> 98 | candidates = [ 99 | {uri: '/usr/bin/ls', fname: 'ls'}, 100 | {uri: '/usr/bin/mkdir', fname: 'mkdir'}, 101 | {uri: '/usr/sbin/find', fname: 'find'}, 102 | {uri: '/usr/local/bin/git', fname: 'git'}, 103 | ] 104 | obj = new zadeh.ObjectArrayFilterer() 105 | obj.setCandidates candidates, 'fname' 106 | expect(obj.filter('i')).toEqual([candidates[3], candidates[2], candidates[1]]) 107 | 108 | it "candidates with duplicate values when indexed by key are returned properly", -> 109 | candidates = [ 110 | {uri: '/usr/bin/ls', fname: 'ls'}, 111 | {uri: '/usr/sbin/ls', fname: 'ls'} 112 | ] 113 | obj = new zadeh.ObjectArrayFilterer() 114 | obj.setCandidates candidates, 'fname' 115 | expect(obj.filter('l')).toEqual([candidates[0], candidates[1]]) 116 | -------------------------------------------------------------------------------- /src/binding/node_data_interface.h: -------------------------------------------------------------------------------- 1 | // TODO remove duplicate implementations 2 | 3 | #ifdef Zadeh_NODE_BINDING // only defined for building the Node-js binding 4 | 5 | #ifndef Zadeh_Node_DATA_INTERFACE_H 6 | #define Zadeh_Node_DATA_INTERFACE_H 7 | 8 | #include 9 | 10 | #include "../data_interface.h" 11 | #include "./windows_detect_arch.h" 12 | 13 | namespace zadeh { 14 | 15 | template <> Napi::Number init(const size_t value, const Napi::Env &env) { 16 | return Napi::Number::New(env, value); 17 | } 18 | 19 | /** Napi::Array Data Interface */ 20 | template <> Napi::Array init(const size_t len, const Napi::Env &env) { return Napi::Array::New(env, len); } 21 | 22 | template <> string get_at(const Napi::Array &candidates, const size_t ind) { 23 | return candidates.Get(ind).ToString().Utf8Value(); 24 | } 25 | 26 | #ifndef ENV32BIT // only enable if size_t is not unint32_t 27 | template <> Napi::Object get_at(const Napi::Array &candidates, const uint32_t ind) { 28 | return candidates.Get(ind).As(); 29 | } 30 | #endif 31 | 32 | template <> Napi::Object get_at(const Napi::Array &candidates, const size_t ind) { 33 | return candidates.Get(ind).As(); 34 | } 35 | 36 | template <> size_t get_size(const Napi::Array &candidates) { return candidates.Length(); } 37 | 38 | template <> void set_at(Napi::Array &candidates, CandidateString &&value, const size_t iCandidate) { 39 | candidates.Set(iCandidate, move(value)); 40 | } 41 | 42 | template <> void set_at(Napi::Array &candidates, Napi::Number &&value, const uint32_t iCandidate) { 43 | candidates.Set(iCandidate, move(value)); 44 | } 45 | 46 | template <> void set_at(Napi::Array &candidates, Napi::Object &&value, const size_t iCandidate) { 47 | candidates.Set(iCandidate, move(value)); 48 | } 49 | 50 | template <> void set_at(Napi::Array &candidates, Napi::Object &value, const size_t iCandidate) { 51 | candidates.Set(iCandidate, value); 52 | } 53 | 54 | template <> void set_at(Napi::Array &candidates, Napi::Object &&value, const string ind) { 55 | candidates.Set(ind, move(value)); 56 | } 57 | 58 | template <> Napi::Reference get_ref(const Napi::Array &arr) { return Napi::Persistent(arr); } 59 | 60 | // template<> 61 | // void release_ref(Napi::Reference &arr) { 62 | // arr.Unref(); 63 | // } 64 | 65 | /** Napi::Object Data Interface */ 66 | 67 | template <> Napi::Object init(const Napi::Env &env) { return Napi::Object::New(env); } 68 | 69 | template <> Napi::Object copy(const Napi::Object &obj, const Napi::Env &env) { 70 | auto obj_copy = Napi::Object::New(env); 71 | env.Global().Get("Object").As().Get("assign").As().Call({obj_copy, obj}); 72 | return obj_copy; 73 | } 74 | 75 | template <> Napi::Array copy(const Napi::Array &arr, const Napi::Env &env) { 76 | auto arr_copy = Napi::Array::New(env); 77 | env.Global().Get("Object").As().Get("assign").As().Call({arr_copy, arr}); 78 | return arr_copy; 79 | } 80 | 81 | template <> CandidateString get_at(const Napi::Object &candidates, const string ind) { 82 | return candidates.Get(ind).ToString().Utf8Value(); 83 | } 84 | 85 | template <> Napi::Array get_at(const Napi::Object &candidates, const string ind) { 86 | return candidates.Get(ind).As(); 87 | } 88 | 89 | template <> void set_at(Napi::Object &candidates, string &&value, const string index) { 90 | candidates.Set(index, move(value)); 91 | } 92 | 93 | template <> void set_at(Napi::Object &candidates, size_t &&value, const string index) { 94 | candidates.Set(index, move(value)); 95 | } 96 | 97 | template <> void set_at(Napi::Object &candidates, const string &value, const string index) { 98 | candidates.Set(index, value); 99 | } 100 | 101 | template <> void set_at(Napi::Object &candidates, const size_t &value, const string index) { 102 | candidates.Set(index, value); 103 | } 104 | 105 | template <> void set_at(Napi::Object &candidates, Napi::Array &&value, const string ind) { 106 | candidates.Set(ind, move(value)); 107 | } 108 | 109 | template <> void set_at(Napi::Object &candidates, Napi::Array &value, const string ind) { 110 | candidates.Set(ind, value); 111 | } 112 | 113 | template <> Napi::Reference get_ref(const Napi::Object &obj) { return Napi::Persistent(obj); } 114 | 115 | /** Get the children of a tree_object (Napi::Object) */ 116 | template <> 117 | optional may_get_children(const Napi::Object &tree_object, const string &children_key) { 118 | // determine if it has children 119 | if (tree_object.HasOwnProperty(children_key)) { 120 | const auto childrenRaw = tree_object.Get(children_key); 121 | if (childrenRaw.IsArray()) { 122 | const auto childrenArray = childrenRaw.As(); 123 | if (childrenArray.Length() != 0) { 124 | return childrenArray; 125 | } 126 | } 127 | } 128 | return {}; 129 | } 130 | 131 | template <> 132 | Napi::Array get_children(const Napi::Object &tree_object, const string &children_key, const Napi::Env &env) { 133 | auto may_children = may_get_children(tree_object, children_key); 134 | if (may_children.has_value()) { 135 | return may_children.value(); 136 | } else { 137 | // empty array 138 | return init(static_cast(0u), env); 139 | } 140 | } 141 | 142 | /** console.log function to use for debugging */ 143 | auto print(const Napi::Env &env, const std::initializer_list &args) { 144 | env.Global().Get("console").As().Get("log").As().Call(args); 145 | } 146 | 147 | /** JSON.stringify function to use for debugging */ 148 | auto stringify(const Napi::Env &env, const std::initializer_list &args) { 149 | return env.Global().Get("JSON").As().Get("stringify").As().Call(args); 150 | } 151 | 152 | /** printLn function to use for debugging */ 153 | auto println(std::string name, const Napi::Env &env, const std::initializer_list &args, 154 | bool strinfigy = true) { 155 | cout << name << '\n'; 156 | if (strinfigy) { 157 | return print(env, {stringify(env, args)}); 158 | } else { 159 | return print(env, args); 160 | } 161 | } 162 | 163 | } // namespace zadeh 164 | #endif 165 | #endif 166 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | JS_BUILD: 10 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Install dependencies and build JS 20 | run: | 21 | npm install 22 | npm run build.js 23 | 24 | - name: Upload artifacts 25 | uses: actions/upload-artifact@v2 26 | with: 27 | path: | 28 | ./*.js 29 | ./*.d.ts 30 | 31 | NATIVE_Build: 32 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 33 | runs-on: ${{ matrix.os }} 34 | strategy: 35 | fail-fast: false 36 | matrix: 37 | os: 38 | - ubuntu-20.04 39 | - macos-11.0 40 | - windows-latest 41 | node: 42 | - 12 43 | arch: 44 | - x64 45 | prebuild_cmd: 46 | - npm run prebuild.native 47 | docker: 48 | - "" 49 | include: 50 | - os: windows-2016 51 | node: 12 52 | arch: x86 53 | prebuild_cmd: npm run prebuild.native.ia32 54 | - os: ubuntu-20.04 55 | docker: node:12-alpine 56 | arch: x64 57 | apk: python3 make gcc g++ musl-dev 58 | node: 12 59 | prebuild_cmd: npm run prebuild.native 60 | # - os: macos-11.0 61 | # node: 15 62 | # arch: arm64 63 | # prebuild_cmd: npm run prebuild.native-arm64 64 | 65 | name: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.docker }}-${{ matrix.node }} 66 | steps: 67 | - uses: actions/checkout@v2 68 | 69 | - name: Cache node_modules 70 | uses: actions/cache@v2 71 | env: 72 | cache-name: node_modules 73 | with: 74 | path: node_modules 75 | key: ${{ runner.os }}-${{ matrix.node }}-${{ matrix.arch }}-${{ hashFiles('package.json') }} 76 | 77 | - name: Install Docker 78 | if: ${{ matrix.docker }} 79 | run: | 80 | docker login -u ${{ github.actor }} -p ${{ secrets.GITHUB_TOKEN }} ghcr.io 81 | docker pull ${{ matrix.docker }} 82 | docker tag ${{ matrix.docker }} builder 83 | 84 | # - name: Install Compiler for Ubuntu 16.04 85 | # if: ${{ contains(matrix.os, 'ubuntu-16.04') }} 86 | # run: | 87 | # sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" 88 | # sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-9 10 89 | # sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-9 10 90 | # sudo update-alternatives --config clang 91 | # sudo update-alternatives --config clang++ 92 | 93 | # - name: Set Compiler for Ubuntu 94 | # run: | 95 | # echo "CC=clang" >> $GITHUB_ENV 96 | # echo "CXX=clang++" >> $GITHUB_ENV 97 | 98 | - name: Install Node 99 | if: ${{ !matrix.docker }} 100 | uses: actions/setup-node@v2 101 | with: 102 | node-version: ${{ matrix.node }} 103 | architecture: ${{ matrix.arch }} 104 | 105 | - name: Install dependencies 106 | run: npm install --ignore-scripts 107 | 108 | - name: Build native 109 | if: ${{ !matrix.docker }} 110 | run: npm run build.native 111 | 112 | - name: Prebuildify 113 | if: ${{ !matrix.docker }} 114 | run: ${{ matrix.prebuild_cmd }} 115 | 116 | - name: Prebuildify Docker 117 | if: ${{ matrix.docker }} 118 | run: docker run --volume ${{ github.workspace }}:/projectdir --workdir /projectdir --privileged builder sh -c "apk add --no-cache ${{ matrix.apk }} && ${{ matrix.prebuild_cmd }}" 119 | 120 | - name: Upload artifacts 121 | uses: actions/upload-artifact@v2 122 | with: 123 | path: ./prebuilds 124 | 125 | Test: 126 | needs: [NATIVE_Build, JS_BUILD] 127 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 128 | runs-on: ${{ matrix.os }} 129 | strategy: 130 | fail-fast: false 131 | matrix: 132 | os: 133 | - ubuntu-20.04 134 | - macos-11.0 135 | - windows-latest 136 | node: 137 | - 10 138 | - 12 139 | - 14 140 | arch: 141 | - x64 142 | include: 143 | - os: windows-2016 144 | node: 12 145 | arch: x86 146 | - os: ubuntu-16.04 147 | node: 12 148 | - os: macos-10.15 149 | node: 12 150 | # Node arm64 doesn't exist yet 151 | # - os: macos-11.0 152 | # node: 15 153 | # arch: arm64 154 | steps: 155 | - uses: actions/checkout@v2 156 | 157 | - name: Cache node_modules 158 | uses: actions/cache@v2 159 | env: 160 | cache-name: node_modules 161 | with: 162 | path: node_modules 163 | key: ${{ runner.os }}-${{ matrix.node }}-${{ matrix.arch }}-${{ hashFiles('package.json') }} 164 | 165 | - name: Install Node 166 | uses: actions/setup-node@v2 167 | with: 168 | node-version: ${{ matrix.node }} 169 | architecture: ${{ matrix.arch }} 170 | 171 | - name: Install dependencies only 172 | run: | 173 | npm install --ignore-scripts 174 | 175 | - name: Download articats 176 | uses: actions/download-artifact@v2 177 | 178 | - name: Install prebuilds 179 | shell: bash 180 | run: | 181 | rm -rf build 182 | mkdir prebuilds 183 | mv ./artifact/*.js . 184 | mv ./artifact/*.d.ts . 185 | mv artifact/* prebuilds/ 186 | 187 | - name: Run tests 188 | run: npm run test 189 | 190 | - name: Run benchmarks 191 | if: ${{ !contains(matrix.arch, 'x86') }} # https://github.com/npm/npm-lifecycle/issues/54 192 | run: npm run benchmark 193 | 194 | Lint: 195 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 196 | runs-on: ubuntu-latest 197 | env: 198 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 199 | steps: 200 | - uses: actions/checkout@v2 201 | with: 202 | fetch-depth: 0 203 | 204 | - name: Commit lint ✨ 205 | uses: wagoid/commitlint-github-action@v4 206 | 207 | - name: Install dependencies 208 | run: npm install 209 | 210 | - name: Format ✨ 211 | run: npm run test.format 212 | 213 | - name: Lint ✨ 214 | run: npm run test.lint 215 | -------------------------------------------------------------------------------- /src/path_scorer.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_path_scorer_h_ 2 | #define Zadeh_path_scorer_h_ 3 | 4 | #include "common.h" 5 | #include "options.h" 6 | #include "scorer.h" 7 | 8 | namespace zadeh { 9 | 10 | // Directory depth at which the full path influence is halved. 11 | constexpr size_t tau_depth = 20; 12 | 13 | // Full path is also penalized for length of basename. This adjust a scale factor for that penalty. 14 | constexpr Score file_coeff = 2.5; 15 | 16 | // 17 | // Find fraction of extension that is matched by query. 18 | // For example mf.h prefers myFile.h to myfile.html 19 | // This need special handling because it give point for not having characters (the `tml` in above example) 20 | // 21 | CandidateString getExtension(const CandidateString &str) { 22 | const auto pos = str.rfind('.'); 23 | return pos == string::npos ? "" : str.substr(pos + 1); 24 | } 25 | 26 | Score getExtensionScore(const CandidateString &candidate, const CandidateString &ext, const int startPos, 27 | const int endPos, const int maxDepth) { 28 | // TODO make startPos and endPos size_t and m, n, pos auto 29 | // startPos is the position of last slash of candidate, -1 if absent. 30 | 31 | if (ext.empty()) { 32 | return 0; 33 | } 34 | 35 | // Check that (a) extension exist, (b) it is after the start of the basename 36 | int pos = candidate.rfind('.', endPos); 37 | assert(pos >= 0u); 38 | if (pos <= startPos) { 39 | return 0; // (note that startPos >= -1) 40 | } 41 | 42 | int ext_size = ext.size(); 43 | auto m = endPos - pos; 44 | 45 | // n contain the smallest of both extension length, m the largest. 46 | if (m < ext_size) { 47 | ext_size = m; 48 | m = ext.size(); 49 | } 50 | 51 | // place cursor after dot & count number of matching characters in extension 52 | pos++; 53 | assert(pos >= 1u); 54 | auto matched = 0; 55 | while (matched < ext_size) { 56 | assert(matched >= 0); // fuzz: if n==0, does not enter while and matched==0 57 | if (candidate[pos + matched] != ext[matched]) { // TODO candidate upper bound 58 | break; 59 | } 60 | ++matched; 61 | } 62 | assert(matched >= 0); 63 | 64 | // if nothing found, try deeper for multiple extensions, with some penalty for depth 65 | if (matched == 0u && maxDepth > 0) { 66 | return 0.9f * getExtensionScore(candidate, ext, startPos, pos - 2, maxDepth - 1); 67 | } 68 | 69 | // cannot divide by zero because m is the largest extension length and we return if either is 0 70 | return static_cast(matched) / static_cast(m); 71 | } 72 | 73 | // 74 | // Count number of folder in a path. 75 | // (consecutive slashes count as a single directory) 76 | // 77 | int countDir(const CandidateString &path, const size_t end, const char pathSeparator) { 78 | // TODO bounds 79 | 80 | if (end < 1u) { 81 | return 0; 82 | } 83 | 84 | auto count = 0u; 85 | auto i = 0u; 86 | 87 | // skip slash at the start so `foo/bar` and `/foo/bar` have the same depth. 88 | // the following skips all the pathSeparator from the beginning of the given path 89 | while ((i < end) && (path[i] == pathSeparator)) { 90 | assert(0 <= i); // fuzz: if end==0, it does not enter while and i==0 91 | ++i; 92 | } 93 | assert(0 <= i); // if there were no pathSeparator i==0 94 | 95 | ++i; // the current char is certainly a non-pathSeparator, so we should check the other following ones 96 | while (i < end) { 97 | assert(0 <= i && i < path.size()); // fuzz: if end==0, it does not enter while and i==0 98 | if (path[i] == pathSeparator) { 99 | count++; // record first slash, but then skip consecutive ones 100 | 101 | // skip consecutive slashes: 102 | ++i; // current one is already pathSeparator, so we should check the other following ones 103 | while ((i < end) && (path[i] == pathSeparator)) { 104 | ++i; 105 | } 106 | // once a non-pathSeparator is found the above while exits 107 | // after this, i is certainly a non-pathSeparator, so we should check the other following ones, so i is 108 | // incremented before the next loop; 109 | } 110 | ++i; 111 | } 112 | // assert(0 <= i && i <= path.size()); // TODO 113 | // cout << "i" << i << endl << "path.size()" << path.size() << endl; 114 | 115 | return count; 116 | } 117 | 118 | // 119 | // Score adjustment for path 120 | // 121 | Score scorePath(const CandidateString &subject, const CandidateString &subject_lw, Score fullPathScore, 122 | const Options &options) { 123 | if (fullPathScore == 0) { 124 | return 0; 125 | } 126 | 127 | // {preparedQuery, useExtensionBonus, pathSeparator} = options 128 | 129 | // Skip trailing slashes 130 | int end = subject.size() - 1; 131 | while (subject[end] == options.pathSeparator) { 132 | end--; 133 | } 134 | 135 | // Get position of basePath of subject. 136 | int basePos = subject.rfind(options.pathSeparator, end); 137 | const auto fileLength = end - basePos; 138 | 139 | // Get a bonus for matching extension 140 | Score extAdjust = 1.0; 141 | 142 | if (options.useExtensionBonus) { 143 | extAdjust += getExtensionScore(subject_lw, options.preparedQuery.ext, basePos, end, 2); 144 | fullPathScore *= extAdjust; 145 | } 146 | 147 | // no basePath, nothing else to compute. 148 | if (basePos == -1) { 149 | return fullPathScore; 150 | } 151 | 152 | // Get the number of folder in query 153 | auto depth = options.preparedQuery.depth; 154 | 155 | // Get that many folder from subject 156 | while (basePos > -1 && depth-- > 0) { 157 | basePos = subject.rfind(options.pathSeparator, basePos - 1); 158 | } 159 | 160 | // Get basePath score, if BaseName is the whole string, no need to recompute 161 | // We still need to apply the folder depth and filename penalty. 162 | const auto basePathScore = basePos == -1 ? fullPathScore 163 | : extAdjust * computeScore(subject.substr(basePos + 1, end + 1), 164 | subject_lw.substr(basePos + 1, end + 1), 165 | options.preparedQuery); 166 | 167 | // Final score is linear interpolation between base score and full path score. 168 | // For low directory depth, interpolation favor base Path then include more of full path as depth increase 169 | // 170 | // A penalty based on the size of the basePath is applied to fullPathScore 171 | // That way, more focused basePath match can overcome longer directory path. 172 | 173 | const Score alpha = (0.5 * tau_depth) / (tau_depth + countDir(subject, end + 1, options.pathSeparator)); 174 | return alpha * basePathScore + (1 - alpha) * fullPathScore * scoreSize(0, file_coeff * fileLength); 175 | } 176 | 177 | // 178 | // Main export 179 | // 180 | // Manage the logic of testing if there's a match and calling the main scoring function 181 | // Also manage scoring a path and optional character. 182 | 183 | Score path_scorer_score(const CandidateString &string, const Element &query, const Options &options) { 184 | if (!options.allowErrors && 185 | !isMatch(string, options.preparedQuery.core_lw, options.preparedQuery.core_up)) { 186 | return 0; 187 | } 188 | const auto string_lw = ToLower(string); 189 | auto sc = computeScore(string, string_lw, options.preparedQuery); 190 | sc = scorePath(string, string_lw, sc, options); 191 | return ceil(sc); 192 | } 193 | 194 | } // namespace zadeh 195 | #endif 196 | -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | # Note: C++ standard is repeated in configurations multiple times for different configurations 2 | { 3 | "targets": [{ 4 | "target_name": "zadeh", 5 | "sources": [ 6 | "src/zadeh.h", "src/common.h", "src/data_interface.h", 7 | "src/options.h", "src/scorer.h", "src/path_scorer.h", "src/filter.h", "src/query.h", "src/matcher.h", 8 | "src/StringArrayFilterer.h", "src/TreeFilterer.h", 9 | "src/binding/node.cc", "src/binding/node.h", "src/binding/node_data_interface.h", 10 | ], 11 | "include_dirs": [ 12 | " zadeh@2.1.0 benchmark.small C:\Users\aminy\Documents\GitHub\JavaScript\@atom-ide-community\zadeh 2 | > node benchmark/benchmark-small.js 3 | 4 | Zadeh: deprecated function. Use 'StringArrayFilterer' instead 5 | ====== Running test - query:nm ====== 6 | zadeh vs. legacy: 0.18 ms | 0.47 ms 7 | length of the result: 100, length of the lines: 100 8 | 9 | ====== Running test - query:npm ====== 10 | zadeh vs. legacy: 0.14 ms | 3.4 ms 11 | length of the result: 55, length of the lines: 100 12 | 13 | ====== Running test - query:node ====== 14 | zadeh vs. legacy: 0.16 ms | 1.33 ms 15 | length of the result: 100, length of the lines: 100 16 | 17 | ====== Running test - query:grunt ====== 18 | zadeh vs. legacy: 0.31 ms | 0.37 ms 19 | length of the result: 33, length of the lines: 100 20 | 21 | ====== Running test - query:html ====== 22 | zadeh vs. legacy: 0.1 ms | 0.46 ms 23 | length of the result: 10, length of the lines: 100 24 | 25 | ====== Running test - query:doc ====== 26 | zadeh vs. legacy: 0.24 ms | 3.57 ms 27 | length of the result: 87, length of the lines: 100 28 | 29 | ====== Running test - query:cli ====== 30 | zadeh vs. legacy: 0.37 ms | 1.89 ms 31 | length of the result: 57, length of the lines: 100 32 | 33 | ====== Running test - query:js ====== 34 | zadeh vs. legacy: 0.18 ms | 0.27 ms 35 | length of the result: 60, length of the lines: 100 36 | 37 | ====== Running test - query:jas ====== 38 | zadeh vs. legacy: 0.12 ms | 0.47 ms 39 | length of the result: 19, length of the lines: 100 40 | 41 | ====== Running test - query:mine ====== 42 | zadeh vs. legacy: 0.18 ms | 2.36 ms 43 | length of the result: 65, length of the lines: 100 44 | 45 | ====== Running test - query:stream ====== 46 | zadeh vs. legacy: 0.13 ms | 1.1 ms 47 | length of the result: 19, length of the lines: 100 48 | > zadeh@2.1.0 benchmark.regular 49 | > node benchmark/benchmark.js 50 | 51 | Zadeh: deprecated function. Use 'StringArrayFilterer' instead 52 | ====== Running test - query:index ====== 53 | ~10% of results are positive, mix exact & fuzzy 54 | zadeh vs. legacy: 28.82 ms | 43.34 ms 55 | length of the result: 6168, length of the lines: 66672 56 | 57 | ====== Running test - query:indx ====== 58 | ~10% of results are positive, Fuzzy match 59 | zadeh vs. legacy: 28.43 ms | 49.1 ms 60 | length of the result: 6192, length of the lines: 66672 61 | 62 | ====== Running test - query:walkdr ====== 63 | ~1% of results are positive, fuzzy 64 | zadeh vs. legacy: 25.66 ms | 15.74 ms 65 | length of the result: 504, length of the lines: 66672 66 | zadeh is SLOWER 67 | 68 | ====== Running test - query:node ====== 69 | ~98% of results are positive, mostly Exact match 70 | zadeh vs. legacy: 45.2 ms | 67.57 ms 71 | length of the result: 65136, length of the lines: 66672 72 | 73 | ====== Running test - query:nm ====== 74 | ~98% of results are positive, Acronym match 75 | zadeh vs. legacy: 38.9 ms | 69.47 ms 76 | length of the result: 65208, length of the lines: 66672 77 | 78 | ====== Running test - query:nm ====== 79 | ~98% of results + Fuzzy match, [Worst case scenario] 80 | zadeh vs. legacy: 39.56 ms | 61.79 ms 81 | length of the result: 65208, length of the lines: 66672 82 | 83 | ====== Running test - query:nm ====== 84 | ~98% of results + Fuzzy match, [Mitigation] 85 | zadeh vs. legacy: 39.76 ms | 55.73 ms 86 | length of the result: 65208, length of the lines: 66672 87 | 88 | ====== Running test - query:ndem ====== 89 | ~98% of results + Fuzzy match, [Worst case but shorter string] 90 | zadeh vs. legacy: 45.66 ms | 206.9 ms 91 | length of the result: 65124, length of the lines: 66672 92 | 93 | Zadeh: prepareQuery is deprecated. There is no major benefit by precomputing something just for the query. 94 | Matching 66672 results for 'index' (Prepare in advance) took 293.66 ms 95 | Matching 66672 results for 'index' (cache) took 279.56 ms 96 | Matching 66672 results for 'index' (_legacy_) took 82.57 ms 97 | 98 | > zadeh@2.1.0 benchmark.large 99 | > node benchmark/benchmark-large.js 100 | 101 | TwoLetter _legacy_ took 10718.67 ms 102 | Zadeh: deprecated function. Use 'StringArrayFilterer' instead 103 | TwoLetter deprecated filter took 3425.46 ms 104 | TwoLetter StringArrayFilterer.filter took 419.65 ms 105 | ====== 106 | ThreeLetter _legacy_ took 8647.23 ms 107 | ThreeLetter deprecated filter took 3505.40 ms 108 | ThreeLetter StringArrayFilterer.filter took 412.46 ms 109 | ====== 110 | TwoLetter object filter took 426.80 ms 111 | ThreeLetter object filter took 418.18 ms 112 | Zadeh: deprecated function. Use 'ObjectArrayFilterer' instead 113 | TwoLetter object deprecated filter took 4077.01 ms 114 | ThreeLetter object deprecated filter took 4752.99 ms 115 | ====== 116 | StringArrayFilterer constructor took 235.95 ms 117 | TwoLetter StringArrayFilterer.filter took 418.53 ms 118 | ThreeLetter StringArrayFilterer.filter took 430.14 ms 119 | ====== 120 | ObjectArrayFilterer constructor took 234.18 ms 121 | TwoLetter ObjectArrayFilterer.filter took 414.06 ms 122 | ThreeLetter ObjectArrayFilterer.filter took 419.60 ms 123 | 124 | > zadeh@2.1.0 benchmark.tree C:\Users\aminy\Documents\GitHub\JavaScript\@atom-ide-community\zadeh 125 | > node benchmark/benchmark-tree.js 126 | 127 | TreeFilterer.setCandidates: took 2.63 ms 128 | TreeFilterer.filter text took 0.67 ms 129 | TreeFilterer.filter dips took 0.29 ms 130 | TreeFilterer.filter disp took 0.26 ms 131 | TreeFilterer.filter txt took 0.41 ms 132 | TreeFilterer.filter getBuffer took 0.31 ms 133 | 134 | TreeFilterer.filter average: 0.388 ms 135 | 136 | filterTree text took 2.60 ms 137 | filterTree dips took 2.43 ms 138 | filterTree disp took 2.26 ms 139 | filterTree txt: took 3.26 ms 140 | filterTree getBuffer: took 2.43 ms 141 | 142 | filterTree average: 2.596 ms 143 | -------------------------------------------------------------------------------- /test/fixtures/tree-filter-disp.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "plainText": "", 4 | "startPosition": { "row": 0, "column": 0 }, 5 | "endPosition": { "row": 5844, "column": 3 }, 6 | "children": [ 7 | { 8 | "kind": "constant", 9 | "plainText": "disposable", 10 | "startPosition": { "row": 130, "column": 12 }, 11 | "endPosition": { "row": 130, "column": 64 }, 12 | "children": [] 13 | } 14 | ] 15 | }, 16 | { 17 | "plainText": "", 18 | "startPosition": { "row": 0, "column": 0 }, 19 | "endPosition": { "row": 5844, "column": 3 }, 20 | "children": [ 21 | { 22 | "kind": "constant", 23 | "plainText": "disposable", 24 | "startPosition": { "row": 130, "column": 12 }, 25 | "endPosition": { "row": 130, "column": 64 }, 26 | "children": [] 27 | } 28 | ] 29 | }, 30 | { 31 | "plainText": "", 32 | "startPosition": { "row": 0, "column": 0 }, 33 | "endPosition": { "row": 5844, "column": 3 }, 34 | "children": [ 35 | { 36 | "kind": "constant", 37 | "plainText": "displayLayer", 38 | "startPosition": { "row": 1232, "column": 10 }, 39 | "endPosition": { "row": 1232, "column": 49 }, 40 | "children": [] 41 | } 42 | ] 43 | }, 44 | { 45 | "plainText": "", 46 | "startPosition": { "row": 0, "column": 0 }, 47 | "endPosition": { "row": 5844, "column": 3 }, 48 | "children": [ 49 | { 50 | "kind": "constant", 51 | "plainText": "displayLayer", 52 | "startPosition": { "row": 1232, "column": 10 }, 53 | "endPosition": { "row": 1232, "column": 49 }, 54 | "children": [] 55 | } 56 | ] 57 | }, 58 | { 59 | "plainText": "", 60 | "startPosition": { "row": 0, "column": 0 }, 61 | "endPosition": { "row": 5844, "column": 3 }, 62 | "children": [ 63 | { 64 | "kind": "property", 65 | "plainText": "displayBuffer", 66 | "startPosition": { "row": 333, "column": 2 }, 67 | "endPosition": { "row": 341, "column": 3 }, 68 | "children": [] 69 | } 70 | ] 71 | }, 72 | { 73 | "plainText": "", 74 | "startPosition": { "row": 0, "column": 0 }, 75 | "endPosition": { "row": 5844, "column": 3 }, 76 | "children": [ 77 | { 78 | "kind": "property", 79 | "plainText": "displayBuffer", 80 | "startPosition": { "row": 333, "column": 2 }, 81 | "endPosition": { "row": 341, "column": 3 }, 82 | "children": [] 83 | } 84 | ] 85 | }, 86 | { 87 | "plainText": "", 88 | "startPosition": { "row": 0, "column": 0 }, 89 | "endPosition": { "row": 5844, "column": 3 }, 90 | "children": [ 91 | { 92 | "kind": "constant", 93 | "plainText": "displayLayerParams", 94 | "startPosition": { "row": 243, "column": 12 }, 95 | "endPosition": { "row": 257, "column": 7 }, 96 | "children": [] 97 | } 98 | ] 99 | }, 100 | { 101 | "plainText": "", 102 | "startPosition": { "row": 0, "column": 0 }, 103 | "endPosition": { "row": 5844, "column": 3 }, 104 | "children": [ 105 | { 106 | "kind": "constant", 107 | "plainText": "displayLayerParams", 108 | "startPosition": { "row": 392, "column": 10 }, 109 | "endPosition": { "row": 392, "column": 33 }, 110 | "children": [] 111 | } 112 | ] 113 | }, 114 | { 115 | "plainText": "", 116 | "startPosition": { "row": 0, "column": 0 }, 117 | "endPosition": { "row": 5844, "column": 3 }, 118 | "children": [ 119 | { 120 | "kind": "constant", 121 | "plainText": "displayLayerParams", 122 | "startPosition": { "row": 243, "column": 12 }, 123 | "endPosition": { "row": 257, "column": 7 }, 124 | "children": [] 125 | } 126 | ] 127 | }, 128 | { 129 | "plainText": "", 130 | "startPosition": { "row": 0, "column": 0 }, 131 | "endPosition": { "row": 5844, "column": 3 }, 132 | "children": [ 133 | { 134 | "kind": "constant", 135 | "plainText": "displayLayerParams", 136 | "startPosition": { "row": 392, "column": 10 }, 137 | "endPosition": { "row": 392, "column": 33 }, 138 | "children": [] 139 | } 140 | ] 141 | }, 142 | { 143 | "plainText": "", 144 | "startPosition": { "row": 0, "column": 0 }, 145 | "endPosition": { "row": 5844, "column": 3 }, 146 | "children": [ 147 | { 148 | "kind": "constant", 149 | "plainText": "Disposable", 150 | "startPosition": { "row": 5, "column": 29 }, 151 | "endPosition": { "row": 5, "column": 39 }, 152 | "children": [] 153 | } 154 | ] 155 | }, 156 | { 157 | "plainText": "", 158 | "startPosition": { "row": 0, "column": 0 }, 159 | "endPosition": { "row": 5844, "column": 3 }, 160 | "children": [ 161 | { 162 | "kind": "constant", 163 | "plainText": "CompositeDisposable", 164 | "startPosition": { "row": 5, "column": 8 }, 165 | "endPosition": { "row": 5, "column": 27 }, 166 | "children": [] 167 | } 168 | ] 169 | }, 170 | { 171 | "plainText": "", 172 | "startPosition": { "row": 0, "column": 0 }, 173 | "endPosition": { "row": 5844, "column": 3 }, 174 | "children": [ 175 | { 176 | "kind": "method", 177 | "plainText": "subscribeToDisplayLayer", 178 | "startPosition": { "row": 846, "column": 2 }, 179 | "endPosition": { "row": 874, "column": 3 }, 180 | "children": [] 181 | } 182 | ] 183 | }, 184 | { 185 | "plainText": "", 186 | "startPosition": { "row": 0, "column": 0 }, 187 | "endPosition": { "row": 5844, "column": 3 }, 188 | "children": [ 189 | { 190 | "kind": "method", 191 | "plainText": "subscribeToDisplayLayer", 192 | "startPosition": { "row": 846, "column": 2 }, 193 | "endPosition": { "row": 874, "column": 3 }, 194 | "children": [] 195 | } 196 | ] 197 | }, 198 | { 199 | "plainText": "", 200 | "startPosition": { "row": 0, "column": 0 }, 201 | "endPosition": { "row": 5844, "column": 3 }, 202 | "children": [ 203 | { 204 | "kind": "method", 205 | "plainText": "onDidStopChanging", 206 | "startPosition": { "row": 944, "column": 2 }, 207 | "endPosition": { "row": 946, "column": 3 }, 208 | "children": [] 209 | } 210 | ] 211 | }, 212 | { 213 | "plainText": "", 214 | "startPosition": { "row": 0, "column": 0 }, 215 | "endPosition": { "row": 5844, "column": 3 }, 216 | "children": [ 217 | { 218 | "kind": "method", 219 | "plainText": "onDidStopChanging", 220 | "startPosition": { "row": 944, "column": 2 }, 221 | "endPosition": { "row": 946, "column": 3 }, 222 | "children": [] 223 | } 224 | ] 225 | }, 226 | { 227 | "plainText": "", 228 | "startPosition": { "row": 0, "column": 0 }, 229 | "endPosition": { "row": 5844, "column": 3 }, 230 | "children": [ 231 | { 232 | "kind": "method", 233 | "plainText": "onDidChangeScrollTop", 234 | "startPosition": { "row": 1198, "column": 2 }, 235 | "endPosition": { "row": 1203, "column": 3 }, 236 | "children": [] 237 | } 238 | ] 239 | }, 240 | { 241 | "plainText": "", 242 | "startPosition": { "row": 0, "column": 0 }, 243 | "endPosition": { "row": 5844, "column": 3 }, 244 | "children": [ 245 | { 246 | "kind": "method", 247 | "plainText": "onDidChangeScrollTop", 248 | "startPosition": { "row": 1198, "column": 2 }, 249 | "endPosition": { "row": 1203, "column": 3 }, 250 | "children": [] 251 | } 252 | ] 253 | }, 254 | { 255 | "plainText": "", 256 | "startPosition": { "row": 0, "column": 0 }, 257 | "endPosition": { "row": 5844, "column": 3 }, 258 | "children": [ 259 | { 260 | "kind": "method", 261 | "plainText": "onDidChangeSoftWrapped", 262 | "startPosition": { "row": 985, "column": 2 }, 263 | "endPosition": { "row": 987, "column": 3 }, 264 | "children": [] 265 | } 266 | ] 267 | }, 268 | { 269 | "plainText": "", 270 | "startPosition": { "row": 0, "column": 0 }, 271 | "endPosition": { "row": 5844, "column": 3 }, 272 | "children": [ 273 | { 274 | "kind": "method", 275 | "plainText": "onDidChangeSoftWrapped", 276 | "startPosition": { "row": 985, "column": 2 }, 277 | "endPosition": { "row": 987, "column": 3 }, 278 | "children": [] 279 | } 280 | ] 281 | }, 282 | { 283 | "plainText": "", 284 | "startPosition": { "row": 0, "column": 0 }, 285 | "endPosition": { "row": 5844, "column": 3 }, 286 | "children": [ 287 | { 288 | "kind": "method", 289 | "plainText": "onDidChangeCursorPosition", 290 | "startPosition": { "row": 961, "column": 2 }, 291 | "endPosition": { "row": 963, "column": 3 }, 292 | "children": [] 293 | } 294 | ] 295 | }, 296 | { 297 | "plainText": "", 298 | "startPosition": { "row": 0, "column": 0 }, 299 | "endPosition": { "row": 5844, "column": 3 }, 300 | "children": [ 301 | { 302 | "kind": "method", 303 | "plainText": "onDidChangeCursorPosition", 304 | "startPosition": { "row": 961, "column": 2 }, 305 | "endPosition": { "row": 963, "column": 3 }, 306 | "children": [] 307 | } 308 | ] 309 | } 310 | ] 311 | -------------------------------------------------------------------------------- /src/matcher.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_matcher_h_ 2 | #define Zadeh_matcher_h_ 3 | 4 | #include 5 | #include 6 | 7 | #include "common.h" 8 | #include "options.h" 9 | #include "query.h" 10 | 11 | namespace zadeh { 12 | 13 | //---------------------------------------------------------------------- 14 | // Align sequence (used for zadeh.match) 15 | // Return position of subject characters that match query. 16 | // 17 | // Follow closely scorer.computeScore. 18 | // Except at each step we record what triggered the best score. 19 | // Then we trace back to output matched characters. 20 | // 21 | // Differences are: 22 | // - we record the best move at each position in a matrix, and finish by a traceback. 23 | // - we reset consecutive sequence if we do not take the match. 24 | // - no hit miss limit 25 | std::vector computeMatch(const CandidateString &subject, const CandidateString &subject_lw, 26 | const PreparedQuery &preparedQuery, size_t offset = 0u) { 27 | const auto &query = preparedQuery.query; 28 | const auto &query_lw = preparedQuery.query_lw; 29 | 30 | const auto subject_size = subject.size(); 31 | const auto query_size = query.size(); 32 | 33 | // TODO optimization for the case that the sizes are 1 34 | 35 | // this is like the consecutive bonus, but for camelCase / snake_case initials 36 | const auto acro = scoreAcronyms(subject, subject_lw, query, query_lw); 37 | const auto acro_score = acro.score; 38 | 39 | // Init 40 | auto score_row = vector(query_size, 0); 41 | auto csc_row = vector(query_size, 0); 42 | 43 | // Directions constants 44 | enum class Direction { STOP, UP, LEFT, DIAGONAL }; 45 | 46 | // Traceback matrix 47 | auto trace = std::vector(subject_size * query_size, Direction::STOP); 48 | auto pos = 0u; 49 | 50 | auto i = 0u; 51 | while (i < subject_size) { // foreach char is of subject 52 | assert(0 <= i && i < subject_lw.size()); 53 | Score score = 0; 54 | Score score_up = 0; 55 | Score csc_diag = 0; 56 | const auto si_lw = subject_lw[i]; 57 | 58 | auto j = 0u; // 0..n-1 59 | while (j < query_size) { // foreach char qj of query 60 | assert(0u <= j && 61 | j < min({static_cast(query_size), query_lw.size(), score_row.size(), csc_row.size()})); 62 | 63 | // reset score 64 | Score csc_score = 0; 65 | Score align = 0; 66 | const auto score_diag = score_up; 67 | 68 | // Compute a tentative match 69 | if (query_lw[j] == si_lw) { 70 | const auto start = isWordStart(i, subject, subject_lw); 71 | 72 | // Forward search for a sequence of consecutive char 73 | csc_score = 74 | csc_diag > 0 ? csc_diag : scoreConsecutives(subject, subject_lw, query, query_lw, i, j, start); 75 | 76 | // Determine bonus for matching A[i] with B[j] 77 | align = score_diag + scoreCharacter(i, j, start, acro_score, csc_score); 78 | } 79 | // Prepare next sequence & match score. 80 | score_up = score_row[j]; // Current score_up is next run score diag 81 | csc_diag = csc_row[j]; 82 | 83 | auto move = [&score_up, &score]() { 84 | // In case of equality, moving UP get us closer to the start of the candidate string. 85 | if (score > score_up) { 86 | return Direction::LEFT; 87 | } 88 | score = score_up; 89 | return Direction::UP; 90 | }(); 91 | 92 | // Only take alignment if it's the absolute best option. 93 | if (align > score) { 94 | score = align; 95 | move = Direction::DIAGONAL; 96 | } else { 97 | // If we do not take this character, break consecutive sequence. 98 | // (when consecutive is 0, it'll be recomputed) 99 | csc_score = 0; 100 | } 101 | 102 | score_row[j] = score; 103 | csc_row[j] = csc_score; 104 | 105 | assert(0u <= pos && pos < trace.size()); 106 | trace[pos] = score > 0 ? move : Direction::STOP; 107 | 108 | ++pos; 109 | 110 | ++j; 111 | } 112 | assert(0u <= j && j <= query_size); 113 | 114 | ++i; 115 | } 116 | assert(0 <= i && i <= subject_lw.size()); 117 | 118 | // ------------------- 119 | // Go back in the trace matrix 120 | // and collect matches (diagonals) 121 | 122 | // TODO narrowing conversions 123 | 124 | const auto query_size_int = static_cast(query_size); 125 | 126 | auto ii = static_cast(subject_size) - 1; 127 | auto jj = query_size_int - 1; 128 | auto pos_ = static_cast(ii * query_size_int + jj); 129 | auto backtrack = true; 130 | std::vector matches; 131 | 132 | while (backtrack && ii >= 0 && jj >= 0) { 133 | switch (trace[pos_]) { 134 | case Direction::UP: 135 | ii--; 136 | pos_ -= query_size; 137 | break; 138 | case Direction::LEFT: 139 | jj--; 140 | pos_--; 141 | break; 142 | case Direction::DIAGONAL: 143 | matches.emplace_back(ii + offset); 144 | jj--; 145 | ii--; 146 | pos_ -= query_size + 1; 147 | break; 148 | default: 149 | backtrack = false; 150 | } 151 | } 152 | 153 | std::reverse(matches.begin(), matches.end()); 154 | return matches; 155 | } 156 | 157 | std::vector basenameMatch(const CandidateString &subject, const CandidateString &subject_lw, 158 | const PreparedQuery &preparedQuery, char pathSeparator) { 159 | // Skip trailing slashes 160 | auto end = subject.size() - 1; 161 | while (subject[end] == pathSeparator) { 162 | end--; 163 | } 164 | 165 | // Get position of basePath of subject. 166 | auto basePos = subject.rfind(pathSeparator, end); 167 | 168 | // If no PathSeparator, no base path exist. 169 | if (basePos == std::string::npos) { 170 | return std::vector(); 171 | } 172 | 173 | // Get the number of folder in query 174 | auto depth = preparedQuery.depth; 175 | 176 | // Get that many folder from subject 177 | while (depth-- > 0) { 178 | basePos = subject.rfind(pathSeparator, basePos - 1); 179 | if (basePos == std::string::npos) { // consumed whole subject ? 180 | return std::vector(); 181 | } 182 | } 183 | 184 | // Get basePath match 185 | basePos++; 186 | end++; 187 | return computeMatch(subject.substr(basePos, end - basePos), subject_lw.substr(basePos, end - basePos), 188 | preparedQuery, basePos); 189 | } 190 | 191 | // 192 | // Combine two matches result and remove duplicate 193 | // (Assume sequences are sorted, matches are sorted by construction.) 194 | // 195 | std::vector mergeMatches(const std::vector &a, const std::vector &b) { 196 | const auto a_size = a.size(); 197 | const auto b_size = b.size(); 198 | 199 | if (b_size == 0u) { 200 | return a; 201 | } 202 | if (a_size == 0u) { 203 | return b; 204 | } 205 | 206 | auto i = 0u; 207 | auto j = 0u; 208 | auto bj = b[j]; 209 | std::vector out; 210 | 211 | while (i < a_size) { 212 | auto ai = a[i]; 213 | 214 | while (bj <= ai && ++j < b_size) { 215 | if (bj < ai) { 216 | out.emplace_back(bj); 217 | } 218 | bj = b[j]; 219 | } 220 | out.emplace_back(ai); 221 | ++i; 222 | } 223 | while (j < b_size) { 224 | out.emplace_back(b[j++]); 225 | } 226 | return out; 227 | } 228 | 229 | // Return position of character which matches 230 | std::vector matcher_match(const CandidateString &string, const Element &query, 231 | const Options &options) { 232 | const auto string_lw = ToLower(string); 233 | auto matches = computeMatch(string, string_lw, options.preparedQuery); 234 | 235 | if (string.find(options.pathSeparator) != std::string::npos) { 236 | const auto baseMatches = basenameMatch(string, string_lw, options.preparedQuery, options.pathSeparator); 237 | return mergeMatches(matches, baseMatches); 238 | } 239 | return matches; 240 | } 241 | 242 | void get_wrap(const CandidateString &string, const Element &query, const Options &options, std::string *out) { 243 | // const auto tagClass = "highlight"s; 244 | // const auto tagOpen = ""s; 245 | const auto tagOpen = R"()"s; 246 | const auto tagClose = ""s; 247 | 248 | if (string == query) { 249 | *out = tagOpen + string + tagClose; 250 | return; 251 | } 252 | 253 | // Run get position where a match is found 254 | auto matchPositions = matcher_match(string, query, options); 255 | 256 | // If no match return as is 257 | if (matchPositions.empty()) { 258 | *out = string; 259 | return; 260 | } 261 | 262 | // Loop over match positions 263 | std::string output; 264 | auto matchIndex = 0u; 265 | auto strPos = 0u; 266 | while (matchIndex < matchPositions.size()) { 267 | auto matchPos = matchPositions[matchIndex]; 268 | matchIndex++; 269 | 270 | // Get text before the current match position 271 | if (matchPos > strPos) { 272 | output += string.substr(strPos, matchPos - strPos); 273 | strPos = matchPos; 274 | } 275 | 276 | // Get consecutive matches to wrap under a single tag 277 | while (matchIndex < matchPositions.size()) { 278 | matchIndex++; 279 | if (matchPositions[matchIndex - 1] == matchPos + 1) { 280 | matchPos++; 281 | } else { 282 | matchIndex--; 283 | break; 284 | } 285 | } 286 | 287 | // Get text inside the match, including current character 288 | matchPos++; 289 | if (matchPos > strPos) { 290 | output += tagOpen; 291 | output += string.substr(strPos, matchPos - strPos); 292 | output += tagClose; 293 | strPos = matchPos; 294 | } 295 | } 296 | 297 | // Get string after last match 298 | if (strPos <= string.size() - 1) { 299 | output += string.substr(strPos); 300 | } 301 | 302 | // return wrapped text 303 | *out = output; 304 | } 305 | 306 | } // namespace zadeh 307 | #endif 308 | -------------------------------------------------------------------------------- /test/tree-filterer-spec.js: -------------------------------------------------------------------------------- 1 | const { TreeFilterer } = require("../index") 2 | const { deepEqual } = require("fast-equals") 3 | const fs = require("fs") 4 | const path = require("path") 5 | 6 | describe("TreeFilterer", function () { 7 | const outlineData = JSON.parse(fs.readFileSync(path.join(path.dirname(__dirname), "benchmark", "tree.json"), "utf8")) 8 | 9 | describe("TreeFilterer.filterIndices", () => { 10 | it("can fuzzy search in an array tree objects", () => { 11 | const treeFilterer = new TreeFilterer() 12 | 13 | const candidates = [ 14 | { data: "bye1", children: [{ data: "hello" }] }, 15 | { data: "Bye2", children: [{ data: "_bye4" }, { data: "hel" }] }, 16 | { data: "eye" }, 17 | ] 18 | 19 | treeFilterer.setCandidates(candidates, "data", "children") // set candidates only once 20 | 21 | // console.log(treeFilterer.filterIndices("hello")) 22 | expect(deepEqual(treeFilterer.filterIndices("hello"), [{ data: "hello", index: 0, parent_indices: [0] }])).toBe( 23 | true 24 | ) 25 | 26 | // console.log(treeFilterer.filterIndices("hel")) 27 | expect( 28 | deepEqual(treeFilterer.filterIndices("hel"), [ 29 | { data: "hel", index: 1, parent_indices: [1] }, 30 | { data: "hello", index: 0, parent_indices: [0] }, 31 | ]) 32 | ).toBe(true) 33 | 34 | // console.log(treeFilterer.filterIndices("he")) 35 | expect( 36 | deepEqual(treeFilterer.filterIndices("he"), [ 37 | { data: "hel", index: 1, parent_indices: [1] }, 38 | { data: "hello", index: 0, parent_indices: [0] }, 39 | ]) 40 | ).toBe(true) 41 | 42 | // console.log(treeFilterer.filterIndices("bye")) 43 | expect( 44 | deepEqual(treeFilterer.filterIndices("bye"), [ 45 | { data: "bye1", index: 0, parent_indices: [] }, 46 | { data: "_bye4", index: 0, parent_indices: [1] }, 47 | { data: "Bye2", index: 1, parent_indices: [] }, 48 | ]) 49 | ).toBe(true) 50 | 51 | // console.log(treeFilterer.filterIndices("ye")) 52 | expect( 53 | deepEqual(treeFilterer.filterIndices("ye"), [ 54 | { data: "eye", index: 2, parent_indices: [] }, 55 | { data: "bye1", index: 0, parent_indices: [] }, 56 | { data: "Bye2", index: 1, parent_indices: [] }, 57 | { data: "_bye4", index: 0, parent_indices: [1] }, 58 | ]) 59 | ).toBe(true) 60 | 61 | // test maxResults 62 | // console.log(treeFilterer.filterIndices("bye", { maxResults: 2 })) 63 | expect( 64 | deepEqual(treeFilterer.filterIndices("bye", { maxResults: 2 }), [ 65 | { data: "bye1", index: 0, parent_indices: [] }, 66 | { data: "Bye2", index: 1, parent_indices: [] }, 67 | ]) 68 | ).toBe(true) 69 | 70 | // console.log(treeFilterer.filterIndices("ye", { maxResults: 3 })) 71 | expect( 72 | deepEqual(treeFilterer.filterIndices("ye", { maxResults: 3 }), [ 73 | { data: "bye1", index: 0, parent_indices: [] }, 74 | { data: "Bye2", index: 1, parent_indices: [] }, 75 | { data: "_bye4", index: 0, parent_indices: [1] }, 76 | ]) 77 | ).toBe(true) 78 | }) 79 | 80 | it("can search in an array of children-less objects", () => { 81 | const treeFilterer = new TreeFilterer() 82 | const candidates = [{ data: "helloworld" }, { data: "bye" }, { data: "hello" }] 83 | treeFilterer.setCandidates(candidates, "data", "children") // set candidates only once 84 | 85 | // console.log(treeFilterer.filterIndices("hello")) 86 | expect( 87 | deepEqual(treeFilterer.filterIndices("hello"), [ 88 | { data: "hello", index: 2, parent_indices: [] }, 89 | { data: "helloworld", index: 0, parent_indices: [] }, 90 | ]) 91 | ).toBe(true) 92 | }) 93 | 94 | // answers are os dependant because of slight differences 95 | it("can search in outline data", () => { 96 | const treeFilterer = new TreeFilterer() 97 | treeFilterer.setCandidates(outlineData, "plainText", "children") 98 | 99 | // fs.writeFileSync( 100 | // path.join(__dirname, "fixtures", "tree-filterIndices-text.json"), 101 | // JSON.stringify(treeFilterer.filterIndices("text")) 102 | // ) 103 | if (process.platform === "win32") { 104 | const treeFilterIndicesText = JSON.parse( 105 | fs.readFileSync(path.join(__dirname, "fixtures", "tree-filterIndices-text.json"), "utf8") 106 | ) 107 | expect(deepEqual(treeFilterer.filterIndices("text"), treeFilterIndicesText)).toBe(true) 108 | } 109 | 110 | // fs.writeFileSync( 111 | // path.join(__dirname, "fixtures", "tree-filterIndices-disp.json"), 112 | // JSON.stringify(treeFilterer.filterIndices("disp")) 113 | // ) 114 | if (process.platform !== "linux") { 115 | const treeFilterIndicesDisp = JSON.parse( 116 | fs.readFileSync(path.join(__dirname, "fixtures", "tree-filterIndices-disp.json"), "utf8") 117 | ) 118 | expect(deepEqual(treeFilterer.filterIndices("disp"), treeFilterIndicesDisp)).toBe(true) 119 | } 120 | 121 | // fs.writeFileSync( 122 | // path.join(__dirname, "fixtures", "tree-filterIndices-dips.json"), 123 | // JSON.stringify(treeFilterer.filterIndices("dips")) 124 | // ) 125 | if (process.platform !== "linux") { 126 | const treeFilterIndicesDips = JSON.parse( 127 | fs.readFileSync(path.join(__dirname, "fixtures", "tree-filterIndices-dips.json"), "utf8") 128 | ) 129 | expect(deepEqual(treeFilterer.filterIndices("dips"), treeFilterIndicesDips)).toBe(true) 130 | } 131 | }) 132 | }) 133 | 134 | describe("TreeFilterer.filter", () => { 135 | it("can fuzzy search in an array tree objects", () => { 136 | const treeFilterer = new TreeFilterer() 137 | 138 | const candidates = [ 139 | { data: "bye1", children: [{ data: "hello" }] }, 140 | { data: "Bye2", children: [{ data: "_bye4" }, { data: "hel" }] }, 141 | { data: "eye" }, 142 | ] 143 | 144 | treeFilterer.setCandidates(candidates, "data", "children") // set candidates only once 145 | 146 | // console.log(JSON.stringify(treeFilterer.filter("hello"))) 147 | expect( 148 | deepEqual(treeFilterer.filter("hello"), [{ data: "bye1", children: [{ data: "hello", children: [] }] }]) 149 | ).toBe(true) 150 | 151 | // console.log(JSON.stringify(treeFilterer.filter("hel"))) 152 | expect( 153 | deepEqual(treeFilterer.filter("hel"), [ 154 | { data: "Bye2", children: [{ data: "hel", children: [] }] }, 155 | { data: "bye1", children: [{ data: "hello", children: [] }] }, 156 | ]) 157 | ).toBe(true) 158 | 159 | // console.log(JSON.stringify(treeFilterer.filter("he"))) 160 | expect( 161 | deepEqual(treeFilterer.filter("he"), [ 162 | { data: "Bye2", children: [{ data: "hel", children: [] }] }, 163 | { data: "bye1", children: [{ data: "hello", children: [] }] }, 164 | ]) 165 | ).toBe(true) 166 | 167 | // console.log(JSON.stringify(treeFilterer.filter("bye"))) 168 | expect( 169 | deepEqual(treeFilterer.filter("bye"), [ 170 | { data: "bye1", children: [] }, 171 | { data: "Bye2", children: [{ data: "_bye4", children: [] }] }, 172 | { data: "Bye2", children: [] }, 173 | ]) 174 | ).toBe(true) 175 | 176 | // console.log(JSON.stringify(treeFilterer.filter("ye"))) 177 | expect( 178 | deepEqual(treeFilterer.filter("ye"), [ 179 | { data: "eye", children: [] }, 180 | { data: "bye1", children: [] }, 181 | { data: "Bye2", children: [] }, 182 | { data: "Bye2", children: [{ data: "_bye4", children: [] }] }, 183 | ]) 184 | ).toBe(true) 185 | 186 | // test maxResults 187 | // console.log(JSON.stringify(treeFilterer.filter("bye", { maxResults: 2 }))) 188 | expect( 189 | deepEqual(treeFilterer.filter("bye", { maxResults: 2 }), [ 190 | { data: "bye1", children: [] }, 191 | { data: "Bye2", children: [] }, 192 | ]) 193 | ).toBe(true) 194 | 195 | // console.log(JSON.stringify(treeFilterer.filter("ye", { maxResults: 3 }))) 196 | expect( 197 | deepEqual(treeFilterer.filter("ye", { maxResults: 3 }), [ 198 | { data: "bye1", children: [] }, 199 | { data: "Bye2", children: [] }, 200 | { data: "Bye2", children: [{ data: "_bye4", children: [] }] }, 201 | ]) 202 | ).toBe(true) 203 | }) 204 | 205 | it("can search in an array of children-less objects", () => { 206 | const treeFilterer = new TreeFilterer() 207 | const candidates = [{ data: "helloworld" }, { data: "bye" }, { data: "hello" }] 208 | treeFilterer.setCandidates(candidates, "data", "children") // set candidates only once 209 | 210 | // console.log(JSON.stringify(treeFilterer.filter("hello"))) 211 | expect( 212 | deepEqual(treeFilterer.filter("hello"), [ 213 | { data: "hello", children: [] }, 214 | { data: "helloworld", children: [] }, 215 | ]) 216 | ).toBe(true) 217 | }) 218 | 219 | // answers are os dependant because of slight differences 220 | it("can search in outline data", () => { 221 | const treeFilterer = new TreeFilterer() 222 | treeFilterer.setCandidates(outlineData, "plainText", "children") 223 | 224 | // fs.writeFileSync(path.join(__dirname, "fixtures", "tree-filter-text.json"), JSON.stringify(treeFilterer.filter("text"))) 225 | const treeFilterText = JSON.parse( 226 | fs.readFileSync(path.join(__dirname, "fixtures", "tree-filter-text.json"), "utf8") 227 | ) 228 | if (process.platform === "win32") { 229 | expect(deepEqual(treeFilterer.filter("text"), treeFilterText)).toBe(true) 230 | } 231 | 232 | // fs.writeFileSync(path.join(__dirname, "fixtures", "tree-filter-disp.json"), JSON.stringify(treeFilterer.filter("disp"))) 233 | if (process.platform !== "linux") { 234 | const treeFilterDisp = JSON.parse( 235 | fs.readFileSync(path.join(__dirname, "fixtures", "tree-filter-disp.json"), "utf8") 236 | ) 237 | expect(deepEqual(treeFilterer.filter("disp"), treeFilterDisp)).toBe(true) 238 | } 239 | 240 | // fs.writeFileSync(path.join(__dirname, "fixtures", "tree-filter-dips.json"), JSON.stringify(treeFilterer.filter("dips"))) 241 | if (process.platform !== "linux") { 242 | const treeFilterDips = JSON.parse( 243 | fs.readFileSync(path.join(__dirname, "fixtures", "tree-filter-dips.json"), "utf8") 244 | ) 245 | expect(deepEqual(treeFilterer.filter("dips"), treeFilterDips)).toBe(true) 246 | } 247 | }) 248 | }) 249 | }) 250 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/TreeFilterer.h: -------------------------------------------------------------------------------- 1 | #ifndef Zadeh_ArrayFilterer_H 2 | #define Zadeh_ArrayFilterer_H 3 | 4 | #include "common.h" 5 | #include "data_interface.h" 6 | #include "options.h" 7 | #include "filter.h" 8 | 9 | namespace zadeh { 10 | 11 | struct TreeNode { 12 | const CandidateString data; 13 | const size_t index; 14 | vector parent_indices{}; // TODO use a pointer/reference 15 | 16 | explicit TreeNode(CandidateString &&data_, const size_t index_, vector parent_indices_) noexcept 17 | : data{move(data_)}, index{index_}, parent_indices{parent_indices_} {} 18 | }; 19 | 20 | template > 22 | class TreeFilterer { 23 | private: 24 | /* const */ string data_key = "data"s; 25 | /* const */ string children_key = "children"s; 26 | /** an array of the TreeNode which includes the data and its address (index, level) in the tree for each */ 27 | vector> partitioned_candidates{}; 28 | 29 | /** Should we keep a reference to the candidates. Set to `true` if you want to call `::filter` method */ 30 | bool keepReference; 31 | /** Reference to the candidates used in `::filter` method */ 32 | ReferenceType candidates_view; 33 | 34 | public: 35 | vector candidates_vector; 36 | 37 | // default constructor is needed for generation of all the move/copy methods 38 | TreeFilterer() = default; 39 | 40 | explicit TreeFilterer(const string &data_key_, const string &children_key_) 41 | : data_key{data_key_}, children_key{children_key_} {} 42 | 43 | /** create a Tree object and make an entries array */ 44 | explicit TreeFilterer(const ArrayType &candidates_, const string &data_key_, const string &children_key_, 45 | const bool keepReference_ = true) 46 | : data_key{data_key_}, children_key{children_key_} { 47 | set_candiates(candidates_, keepReference_); 48 | } 49 | 50 | auto set_candidates(const ArrayType &candidates_, const bool keepReference_ = true) { 51 | keepReference = keepReference_; 52 | make_candidates_vector(candidates_, vector{}); /* consider the given array of trees the childs of 53 | an imaginary parent that has no index*/ 54 | set_partitioned_candidates(); 55 | 56 | if (keepReference) { 57 | if (candidates_view != nullptr) { 58 | // release the already kept reference 59 | candidates_view.Unref(); 60 | } 61 | // store a view of candidates in case filter was called 62 | candidates_view = get_ref(candidates_); 63 | } 64 | } 65 | 66 | auto set_candidates(const ArrayType &candidates_, const string &data_key_, const string &children_key_, 67 | const bool keepReference_ = true) { 68 | keepReference = keepReference_; 69 | data_key = data_key_; 70 | children_key = children_key_; 71 | set_candidates(candidates_); 72 | } 73 | 74 | auto filter_indices(const std::string &query, const AllocatorType &env, const size_t maxResults = 0, 75 | const bool usePathScoring = true, const bool useExtensionBonus = false) { 76 | // optimization for no candidates 77 | if (partitioned_candidates.empty()) { 78 | return init(static_cast(0), 79 | env); // return an empty vector (should we throw?) 80 | } 81 | 82 | const Options options(query, maxResults, usePathScoring, useExtensionBonus); 83 | const auto filtered_indices = zadeh::filter(partitioned_candidates, query, options); 84 | const auto filter_indices_length = filtered_indices.size(); 85 | 86 | auto res = init(filter_indices_length, env); // array of TreeNode 87 | for (size_t i_candidate = 0; i_candidate < filter_indices_length; i_candidate++) { 88 | auto entry = candidates_vector[filtered_indices[i_candidate]]; 89 | 90 | // create {data, index, level} 91 | auto node = init(env); 92 | set_at(node, entry.data, "data"s); 93 | set_at(node, entry.index, "index"s); 94 | 95 | const auto parent_indices = entry.parent_indices; 96 | auto parent_indices_len = parent_indices.size(); 97 | 98 | // copy vector to ArrayType // TODO is this needed? 99 | auto parent_indices_array = init(parent_indices_len, env); 100 | for (uint32_t i_parent_index = 0; i_parent_index < parent_indices_len; i_parent_index++) { 101 | set_at(parent_indices_array, init(parent_indices[i_parent_index], env), 102 | i_parent_index); 103 | } 104 | set_at(node, move(parent_indices_array), "parent_indices"s); 105 | set_at(res, move(node), i_candidate); 106 | } 107 | return res; 108 | } 109 | 110 | auto filter(const std::string &query, const AllocatorType &env, const size_t maxResults = 0, 111 | const bool usePathScoring = true, const bool useExtensionBonus = false) { 112 | // optimization for no candidates 113 | if (partitioned_candidates.empty()) { 114 | return init(static_cast(0), 115 | env); // return an empty vector (should we throw?) 116 | } 117 | 118 | const Options options(query, maxResults, usePathScoring, useExtensionBonus); 119 | const auto filtered_indices = zadeh::filter(partitioned_candidates, query, options); 120 | const auto filter_indices_length = filtered_indices.size(); 121 | 122 | auto res = init(filter_indices_length, env); // array of TreeNode 123 | auto candidates = candidates_view.Value(); 124 | for (size_t i_candidate = 0; i_candidate < filter_indices_length; i_candidate++) { 125 | auto entry = candidates_vector[filtered_indices[i_candidate]]; 126 | 127 | const auto index = entry.index; 128 | 129 | const auto parent_indices = entry.parent_indices; 130 | auto parent_indices_len = parent_indices.size(); 131 | 132 | // final filtered tree 133 | NodeType filtered_tree; 134 | 135 | // We create a tree with the filtered data being the last level (if it has children, they are not 136 | // included in the filered tree) we construct a filtered tree from top to bottom 137 | auto temp_children = candidates; 138 | for (uint32_t i_parent_index = 0; i_parent_index < parent_indices_len + 1; i_parent_index++) { 139 | #ifdef Zadeh_NODE_BINDING 140 | assert(temp_children.IsArray()); 141 | #endif 142 | NodeType temp_parent; // the temp parent that is processed in each iteration 143 | if (i_parent_index < parent_indices_len) { 144 | const auto parent_index = parent_indices[i_parent_index]; 145 | // for each parent index get the original object at that index 146 | // BUG this check shouldn't be required 147 | if (parent_index >= get_size(temp_children)) { 148 | continue; 149 | } 150 | assert(parent_index < get_size(temp_children)); 151 | temp_parent = get_at(temp_children, parent_index); 152 | // update the children for the next iteration 153 | temp_children = get_children(temp_parent, children_key, env); 154 | } else { 155 | // BUG this check shouldn't be required 156 | if (index >= get_size(temp_children)) { 157 | continue; 158 | } 159 | assert(i_parent_index == parent_indices_len); 160 | assert(index < get_size(temp_children)); 161 | // once parent indices finished, we get the index instead of the last parent 162 | temp_parent = get_at(temp_children, index); 163 | } 164 | #ifdef Zadeh_NODE_BINDING 165 | assert(temp_parent.IsObject()); 166 | #endif 167 | // TODO refactor! 168 | if (i_parent_index == 0) { 169 | if (parent_indices_len == 0) { 170 | // if the first level is chosen, set the children to an empty array 171 | auto filtered_parent = copy(temp_parent, env); 172 | set_at(filtered_parent, init(static_cast(0u), env), 173 | children_key); 174 | filtered_tree = filtered_parent; 175 | } else { 176 | // for the first round we just use temp_parent 177 | filtered_tree = copy(temp_parent, env); 178 | } 179 | } else { 180 | // get the previous chosen children (current temp_parent) and place it in filtered_children 181 | // so the previous children only has the chosen ones 182 | NodeType filtered_parent; 183 | if (i_parent_index != parent_indices_len) { 184 | filtered_parent = copy(temp_parent, env); 185 | } else { 186 | filtered_parent = copy(temp_parent, env); 187 | // unset children in the last step 188 | set_at(filtered_parent, init(static_cast(0u), env), 189 | children_key); 190 | } 191 | auto filtered_children = init(static_cast(1u), env); 192 | set_at(filtered_children, filtered_parent, static_cast(0u)); 193 | // finally store it in the global tree 194 | set_at(filtered_tree, filtered_children, children_key); 195 | } 196 | } 197 | set_at(res, move(filtered_tree), i_candidate); 198 | } 199 | 200 | return res; 201 | } 202 | 203 | private: 204 | /** 205 | Recursive function that fills the candidates_vector from the given children_nodes that have a common 206 | parent node 207 | @param children_nodes an array of trees 208 | @param parent_indices the indices of the parent node 209 | */ 210 | void make_candidates_vector(const ArrayType &children_nodes, const vector &parent_indices) { 211 | const auto children_num = get_size(children_nodes); 212 | for (auto i_child = 0u; i_child < children_num; i_child++) { 213 | make_candidates_vector(get_at(children_nodes, i_child), i_child, parent_indices); 214 | } 215 | } 216 | 217 | /** 218 | Recursive function that fills the candidates_vector from the given node 219 | @param node a tree node 220 | @param index the index of the child in the parent node 221 | @param parent_indices the indices of the parent node 222 | */ 223 | void make_candidates_vector(const NodeType &node, size_t index, const vector &parent_indices) { 224 | // make the TreeNode and push it back 225 | candidates_vector.emplace_back( 226 | get_at(node, data_key), // first, get the current data 227 | index, parent_indices); 228 | 229 | // add children if any 230 | auto may_children = may_get_children(node, children_key); 231 | if (may_children.has_value()) { 232 | // copy parent_indices 233 | auto new_parent_indices = vector(); 234 | new_parent_indices = parent_indices; 235 | // add the current index 236 | new_parent_indices.emplace_back(index); 237 | make_candidates_vector(may_children.value(), new_parent_indices); 238 | } 239 | } 240 | 241 | auto set_partitioned_candidates() { 242 | const auto N = candidates_vector.size(); 243 | const auto num_chunks = get_num_chunks(N); 244 | 245 | partitioned_candidates.clear(); 246 | partitioned_candidates.resize(num_chunks); 247 | 248 | auto cur_start = 0u; 249 | for (auto i = 0u; i < num_chunks; i++) { 250 | 251 | auto chunk_size = N / num_chunks; 252 | // Distribute remainder among the chunks. 253 | if (i < N % num_chunks) { 254 | chunk_size++; 255 | } 256 | for (auto j = cur_start; j < cur_start + chunk_size; j++) { 257 | partitioned_candidates[i].emplace_back(candidates_vector[j].data); // different 258 | } 259 | cur_start += chunk_size; 260 | } 261 | } 262 | }; 263 | 264 | } // namespace zadeh 265 | #endif 266 | -------------------------------------------------------------------------------- /test/fixtures/tree-filter-dips.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "plainText": "", 4 | "startPosition": { "row": 0, "column": 0 }, 5 | "endPosition": { "row": 5844, "column": 3 }, 6 | "children": [ 7 | { 8 | "kind": "method", 9 | "plainText": "didUpdateStyles", 10 | "startPosition": { "row": 84, "column": 2 }, 11 | "endPosition": { "row": 89, "column": 3 }, 12 | "children": [] 13 | } 14 | ] 15 | }, 16 | { 17 | "plainText": "", 18 | "startPosition": { "row": 0, "column": 0 }, 19 | "endPosition": { "row": 5844, "column": 3 }, 20 | "children": [ 21 | { 22 | "kind": "method", 23 | "plainText": "didUpdateStyles", 24 | "startPosition": { "row": 84, "column": 2 }, 25 | "endPosition": { "row": 89, "column": 3 }, 26 | "children": [] 27 | } 28 | ] 29 | }, 30 | { 31 | "plainText": "", 32 | "startPosition": { "row": 0, "column": 0 }, 33 | "endPosition": { "row": 5844, "column": 3 }, 34 | "children": [ 35 | { 36 | "kind": "constant", 37 | "plainText": "displayLayerParams", 38 | "startPosition": { "row": 243, "column": 12 }, 39 | "endPosition": { "row": 257, "column": 7 }, 40 | "children": [] 41 | } 42 | ] 43 | }, 44 | { 45 | "plainText": "", 46 | "startPosition": { "row": 0, "column": 0 }, 47 | "endPosition": { "row": 5844, "column": 3 }, 48 | "children": [ 49 | { 50 | "kind": "constant", 51 | "plainText": "displayLayerParams", 52 | "startPosition": { "row": 392, "column": 10 }, 53 | "endPosition": { "row": 392, "column": 33 }, 54 | "children": [] 55 | } 56 | ] 57 | }, 58 | { 59 | "plainText": "", 60 | "startPosition": { "row": 0, "column": 0 }, 61 | "endPosition": { "row": 5844, "column": 3 }, 62 | "children": [ 63 | { 64 | "kind": "constant", 65 | "plainText": "displayLayerParams", 66 | "startPosition": { "row": 243, "column": 12 }, 67 | "endPosition": { "row": 257, "column": 7 }, 68 | "children": [] 69 | } 70 | ] 71 | }, 72 | { 73 | "plainText": "", 74 | "startPosition": { "row": 0, "column": 0 }, 75 | "endPosition": { "row": 5844, "column": 3 }, 76 | "children": [ 77 | { 78 | "kind": "constant", 79 | "plainText": "displayLayerParams", 80 | "startPosition": { "row": 392, "column": 10 }, 81 | "endPosition": { "row": 392, "column": 33 }, 82 | "children": [] 83 | } 84 | ] 85 | }, 86 | { 87 | "plainText": "", 88 | "startPosition": { "row": 0, "column": 0 }, 89 | "endPosition": { "row": 5844, "column": 3 }, 90 | "children": [ 91 | { 92 | "kind": "constant", 93 | "plainText": "disposable", 94 | "startPosition": { "row": 130, "column": 12 }, 95 | "endPosition": { "row": 130, "column": 64 }, 96 | "children": [] 97 | } 98 | ] 99 | }, 100 | { 101 | "plainText": "", 102 | "startPosition": { "row": 0, "column": 0 }, 103 | "endPosition": { "row": 5844, "column": 3 }, 104 | "children": [ 105 | { 106 | "kind": "constant", 107 | "plainText": "disposable", 108 | "startPosition": { "row": 130, "column": 12 }, 109 | "endPosition": { "row": 130, "column": 64 }, 110 | "children": [] 111 | } 112 | ] 113 | }, 114 | { 115 | "plainText": "", 116 | "startPosition": { "row": 0, "column": 0 }, 117 | "endPosition": { "row": 5844, "column": 3 }, 118 | "children": [ 119 | { 120 | "kind": "method", 121 | "plainText": "didUpdateScrollbarStyles", 122 | "startPosition": { "row": 91, "column": 2 }, 123 | "endPosition": { "row": 96, "column": 3 }, 124 | "children": [] 125 | } 126 | ] 127 | }, 128 | { 129 | "plainText": "", 130 | "startPosition": { "row": 0, "column": 0 }, 131 | "endPosition": { "row": 5844, "column": 3 }, 132 | "children": [ 133 | { 134 | "kind": "method", 135 | "plainText": "didUpdateScrollbarStyles", 136 | "startPosition": { "row": 91, "column": 2 }, 137 | "endPosition": { "row": 96, "column": 3 }, 138 | "children": [] 139 | } 140 | ] 141 | }, 142 | { 143 | "plainText": "", 144 | "startPosition": { "row": 0, "column": 0 }, 145 | "endPosition": { "row": 5844, "column": 3 }, 146 | "children": [ 147 | { 148 | "kind": "method", 149 | "plainText": "onDidTerminatePendingState", 150 | "startPosition": { "row": 842, "column": 2 }, 151 | "endPosition": { "row": 844, "column": 3 }, 152 | "children": [] 153 | } 154 | ] 155 | }, 156 | { 157 | "plainText": "", 158 | "startPosition": { "row": 0, "column": 0 }, 159 | "endPosition": { "row": 5844, "column": 3 }, 160 | "children": [ 161 | { 162 | "kind": "method", 163 | "plainText": "onDidTerminatePendingState", 164 | "startPosition": { "row": 842, "column": 2 }, 165 | "endPosition": { "row": 844, "column": 3 }, 166 | "children": [] 167 | } 168 | ] 169 | }, 170 | { 171 | "plainText": "", 172 | "startPosition": { "row": 0, "column": 0 }, 173 | "endPosition": { "row": 5844, "column": 3 }, 174 | "children": [ 175 | { 176 | "kind": "constant", 177 | "plainText": "Disposable", 178 | "startPosition": { "row": 5, "column": 29 }, 179 | "endPosition": { "row": 5, "column": 39 }, 180 | "children": [] 181 | } 182 | ] 183 | }, 184 | { 185 | "plainText": "", 186 | "startPosition": { "row": 0, "column": 0 }, 187 | "endPosition": { "row": 5844, "column": 3 }, 188 | "children": [ 189 | { 190 | "kind": "method", 191 | "plainText": "onDidChangeCursorPosition", 192 | "startPosition": { "row": 961, "column": 2 }, 193 | "endPosition": { "row": 963, "column": 3 }, 194 | "children": [] 195 | } 196 | ] 197 | }, 198 | { 199 | "plainText": "", 200 | "startPosition": { "row": 0, "column": 0 }, 201 | "endPosition": { "row": 5844, "column": 3 }, 202 | "children": [ 203 | { 204 | "kind": "method", 205 | "plainText": "onDidChangeCursorPosition", 206 | "startPosition": { "row": 961, "column": 2 }, 207 | "endPosition": { "row": 963, "column": 3 }, 208 | "children": [] 209 | } 210 | ] 211 | }, 212 | { 213 | "plainText": "", 214 | "startPosition": { "row": 0, "column": 0 }, 215 | "endPosition": { "row": 5844, "column": 3 }, 216 | "children": [ 217 | { 218 | "kind": "method", 219 | "plainText": "onDidUpdateDecorations", 220 | "startPosition": { "row": 1221, "column": 2 }, 221 | "endPosition": { "row": 1223, "column": 3 }, 222 | "children": [] 223 | } 224 | ] 225 | }, 226 | { 227 | "plainText": "", 228 | "startPosition": { "row": 0, "column": 0 }, 229 | "endPosition": { "row": 5844, "column": 3 }, 230 | "children": [ 231 | { 232 | "kind": "method", 233 | "plainText": "onDidUpdateDecorations", 234 | "startPosition": { "row": 1221, "column": 2 }, 235 | "endPosition": { "row": 1223, "column": 3 }, 236 | "children": [] 237 | } 238 | ] 239 | }, 240 | { 241 | "plainText": "", 242 | "startPosition": { "row": 0, "column": 0 }, 243 | "endPosition": { "row": 5844, "column": 3 }, 244 | "children": [ 245 | { 246 | "kind": "constant", 247 | "plainText": "CompositeDisposable", 248 | "startPosition": { "row": 5, "column": 8 }, 249 | "endPosition": { "row": 5, "column": 27 }, 250 | "children": [] 251 | } 252 | ] 253 | }, 254 | { 255 | "plainText": "", 256 | "startPosition": { "row": 0, "column": 0 }, 257 | "endPosition": { "row": 5844, "column": 3 }, 258 | "children": [ 259 | { 260 | "kind": "method", 261 | "plainText": "getSaveDialogOptions", 262 | "startPosition": { "row": 1510, "column": 2 }, 263 | "endPosition": { "row": 1512, "column": 3 }, 264 | "children": [] 265 | } 266 | ] 267 | }, 268 | { 269 | "plainText": "", 270 | "startPosition": { "row": 0, "column": 0 }, 271 | "endPosition": { "row": 5844, "column": 3 }, 272 | "children": [ 273 | { 274 | "kind": "method", 275 | "plainText": "getSaveDialogOptions", 276 | "startPosition": { "row": 1510, "column": 2 }, 277 | "endPosition": { "row": 1512, "column": 3 }, 278 | "children": [] 279 | } 280 | ] 281 | }, 282 | { 283 | "plainText": "", 284 | "startPosition": { "row": 0, "column": 0 }, 285 | "endPosition": { "row": 5844, "column": 3 }, 286 | "children": [ 287 | { 288 | "kind": "constant", 289 | "plainText": "openEditorPathSegmentsWithSameFilename", 290 | "startPosition": { "row": 1396, "column": 12 }, 291 | "endPosition": { "row": 1396, "column": 55 }, 292 | "children": [] 293 | } 294 | ] 295 | }, 296 | { 297 | "plainText": "", 298 | "startPosition": { "row": 0, "column": 0 }, 299 | "endPosition": { "row": 5844, "column": 3 }, 300 | "children": [ 301 | { 302 | "kind": "constant", 303 | "plainText": "openEditorPathSegmentsWithSameFilename", 304 | "startPosition": { "row": 1396, "column": 12 }, 305 | "endPosition": { "row": 1396, "column": 55 }, 306 | "children": [] 307 | } 308 | ] 309 | }, 310 | { 311 | "plainText": "", 312 | "startPosition": { "row": 0, "column": 0 }, 313 | "endPosition": { "row": 5844, "column": 3 }, 314 | "children": [ 315 | { 316 | "kind": "method", 317 | "plainText": "scopeDescriptorForBufferPosition", 318 | "startPosition": { "row": 4535, "column": 2 }, 319 | "endPosition": { "row": 4540, "column": 3 }, 320 | "children": [] 321 | } 322 | ] 323 | }, 324 | { 325 | "plainText": "", 326 | "startPosition": { "row": 0, "column": 0 }, 327 | "endPosition": { "row": 5844, "column": 3 }, 328 | "children": [ 329 | { 330 | "kind": "method", 331 | "plainText": "scopeDescriptorForBufferPosition", 332 | "startPosition": { "row": 4535, "column": 2 }, 333 | "endPosition": { "row": 4540, "column": 3 }, 334 | "children": [] 335 | } 336 | ] 337 | }, 338 | { 339 | "plainText": "", 340 | "startPosition": { "row": 0, "column": 0 }, 341 | "endPosition": { "row": 5844, "column": 3 }, 342 | "children": [ 343 | { 344 | "kind": "method", 345 | "plainText": "syntaxTreeScopeDescriptorForBufferPosition", 346 | "startPosition": { "row": 4557, "column": 2 }, 347 | "endPosition": { "row": 4562, "column": 3 }, 348 | "children": [] 349 | } 350 | ] 351 | }, 352 | { 353 | "plainText": "", 354 | "startPosition": { "row": 0, "column": 0 }, 355 | "endPosition": { "row": 5844, "column": 3 }, 356 | "children": [ 357 | { 358 | "kind": "method", 359 | "plainText": "syntaxTreeScopeDescriptorForBufferPosition", 360 | "startPosition": { "row": 4557, "column": 2 }, 361 | "endPosition": { "row": 4562, "column": 3 }, 362 | "children": [] 363 | } 364 | ] 365 | }, 366 | { 367 | "plainText": "", 368 | "startPosition": { "row": 0, "column": 0 }, 369 | "endPosition": { "row": 5844, "column": 3 }, 370 | "children": [ 371 | { 372 | "kind": "method", 373 | "plainText": "updateAutoIndentOnPaste", 374 | "startPosition": { "row": 543, "column": 2 }, 375 | "endPosition": { "row": 546, "column": 3 }, 376 | "children": [] 377 | } 378 | ] 379 | }, 380 | { 381 | "plainText": "", 382 | "startPosition": { "row": 0, "column": 0 }, 383 | "endPosition": { "row": 5844, "column": 3 }, 384 | "children": [ 385 | { 386 | "kind": "method", 387 | "plainText": "updateAutoIndentOnPaste", 388 | "startPosition": { "row": 543, "column": 2 }, 389 | "endPosition": { "row": 546, "column": 3 }, 390 | "children": [] 391 | } 392 | ] 393 | }, 394 | { 395 | "plainText": "", 396 | "startPosition": { "row": 0, "column": 0 }, 397 | "endPosition": { "row": 5844, "column": 3 }, 398 | "children": [ 399 | { 400 | "kind": "method", 401 | "plainText": "shouldAutoIndentOnPaste", 402 | "startPosition": { "row": 5134, "column": 2 }, 403 | "endPosition": { "row": 5136, "column": 3 }, 404 | "children": [] 405 | } 406 | ] 407 | }, 408 | { 409 | "plainText": "", 410 | "startPosition": { "row": 0, "column": 0 }, 411 | "endPosition": { "row": 5844, "column": 3 }, 412 | "children": [ 413 | { 414 | "kind": "method", 415 | "plainText": "shouldAutoIndentOnPaste", 416 | "startPosition": { "row": 5134, "column": 2 }, 417 | "endPosition": { "row": 5136, "column": 3 }, 418 | "children": [] 419 | } 420 | ] 421 | }, 422 | { 423 | "plainText": "", 424 | "startPosition": { "row": 0, "column": 0 }, 425 | "endPosition": { "row": 5844, "column": 3 }, 426 | "children": [ 427 | { 428 | "kind": "method", 429 | "plainText": "destroyFoldsContainingBufferPositions", 430 | "startPosition": { "row": 4950, "column": 2 }, 431 | "endPosition": { "row": 4955, "column": 3 }, 432 | "children": [] 433 | } 434 | ] 435 | }, 436 | { 437 | "plainText": "", 438 | "startPosition": { "row": 0, "column": 0 }, 439 | "endPosition": { "row": 5844, "column": 3 }, 440 | "children": [ 441 | { 442 | "kind": "method", 443 | "plainText": "destroyFoldsContainingBufferPositions", 444 | "startPosition": { "row": 4950, "column": 2 }, 445 | "endPosition": { "row": 4955, "column": 3 }, 446 | "children": [] 447 | } 448 | ] 449 | } 450 | ] 451 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Blazing fast library for fuzzy filtering, matching, and other fuzzy things! 2 | 3 | ![CI](https://github.com/atom-ide-community/zadeh/workflows/CI/badge.svg) 4 | 5 | # Zadeh 6 | 7 | Zadeh is a blazing fast library for fuzzy filtering, matching, and other fuzzy things. Zadeh is a multithreaded library written in C++ with the goal to search through a dataset with 1M entries in a few hundred milliseconds. 8 | 9 | The name "Zadeh" refers to [Lofti Zadeh](https://en.wikipedia.org/wiki/Lotfi_A._Zadeh), the creator of fuzzy logic and fuzzy systems. 10 | 11 | ### features 12 | 13 | - Fuzzy filter through an array of candidates (`StringArrayFilterer`) 14 | - Fuzzy filter through nested tree-like objects (`TreeFilterer`) 15 | - Special treatment for strings that have separators (space ` `, hyphen `-`, underline`_`) 16 | - Special treatment for path-like strings (string separated by `\` or `//`) 17 | - give an array of indices at which the query matches the given string (`match`) 18 | - score the given string against the given query (`score`) 19 | - give an HTML/Markdown string that highlights the range for which the match happens (`wrap`) 20 | - Allows setting the candidates only once using `StringArrayFilterer` and `TreeFilterer` classes, and then perform `filter` multiple times, which is much more efficient than calling the `filter` or `filterTree` functions directly every time. 21 | - Bindings for Nodejs (more to come) 22 | 23 | # Usage 24 | 25 | ## Usage from C++ 26 | 27 | This is a header-only library. Include `./src/zadeh.h` and build it in your application. 28 | 29 | `examples/example1.cpp`: 30 | 31 | ```cpp 32 | #include "../src/zadeh.h" // include zadeh.h 33 | #include 34 | #include 35 | 36 | using namespace std; 37 | 38 | int main() { 39 | // the data to fuzzy search on 40 | auto data = vector{"eye", "why", "bi"}; 41 | 42 | // setup StringArrayFilterer 43 | auto strArrFilterer = zadeh::StringArrayFilterer, string>{}; 44 | strArrFilterer.set_candidates(data); 45 | 46 | // filter the indices that match the query 47 | auto filtered_indices = strArrFilterer.filter_indices("ye"); 48 | 49 | // print the filtered data 50 | for (auto ind: filtered_indices) { 51 | cout << data[ind] << '\n'; 52 | } 53 | } 54 | ``` 55 | 56 | Cmake file: 57 | 58 | ```cmake 59 | cmake_minimum_required(VERSION 3.17) 60 | 61 | project(example1 LANGUAGES CXX) 62 | add_executable(example1 ./examples/example1.cpp) 63 | target_compile_features(example1 PRIVATE cxx_std_17) 64 | ``` 65 | 66 | Build: 67 | 68 | ``` 69 | cmake -S . -B ./build && cmake --build ./build --config Debug 70 | ``` 71 | 72 | ## Usage from Nodejs 73 | 74 | Installation: 75 | 76 | ```sh 77 | npm install zadeh 78 | ``` 79 | 80 | To import all the functions: 81 | 82 | ```js 83 | import * as zadeh from "zadeh" 84 | ``` 85 | 86 | or 87 | 88 | ```js 89 | const zadeh = require("zadeh") 90 | ``` 91 | 92 | ### StringArrayFilterer 93 | 94 | `StringArrayFilterer` is a class that allows setting the `candidates` only once and perform filtering on them multiple times. This is much more efficient than calling the `filter` function directly. 95 | 96 |
97 | `StringArrayFilterer` API 98 | 99 | ```ts 100 | export class StringArrayFilterer { 101 | /** 102 | * Make a `StringArrayFilterer` for the candidates that are going to be filtered. 103 | * 104 | * @param candidates An array of strings. 105 | */ 106 | constructor(candidates?: Array) 107 | 108 | /** 109 | * Filter the already set array of strings 110 | * 111 | * @param query A string query to match each candidate against. 112 | * @param options Options 113 | * @returns Returns an array of candidates sorted by best match against the query. 114 | */ 115 | filter(query: string, options: StringArrayFilterOptions = {}): Array 116 | 117 | /** 118 | * Filter the already set array of objects and get the indices of the chosen candidate 119 | * 120 | * @param query A string query to match the dataKey of each candidate against. 121 | * @param options Options 122 | * @returns Returns an array of numbers indicating the index of the chosen candidate sorted by the best match against the query. 123 | */ 124 | filterIndices(query: string, options: StringArrayFilterOptions = {}): Array 125 | 126 | /** 127 | * Allows setting the candidates (if changed or not set in the constructor). 128 | * 129 | * @param candidates An array of strings. 130 | */ 131 | setCandidates(candidates: Array) 132 | } 133 | ``` 134 | 135 |
136 |
137 | 138 | **Example**: 139 | 140 | ```js 141 | const { StringArrayFilterer } = require("zadeh") 142 | 143 | // create class 144 | const strArrFilterer = new StringArrayFilterer() 145 | 146 | // set the candidates 147 | strArrFilterer.setCandidates(["Call", "Me", "Maybe"]) 148 | 149 | // call filter multiple times 150 | strArrFilterer.filter("me") 151 | strArrFilterer.filter("all") 152 | ``` 153 | 154 | ### ObjectArrayFilterer 155 | 156 | ObjectArrayFilterer is a class that performs filtering on an array of objects based on a string stored in the given `dataKey` for each object 157 | 158 |
159 | `ObjectArrayFilterer` API 160 | 161 | ```ts 162 | export class ObjectArrayFilterer { 163 | /** 164 | * Make an `ObjectArrayFilterer` for the candidates that are going to be filtered. 165 | * 166 | * @param candidates An array of objects. 167 | * @param dataKey The key which is indexed for each object, and filtering is done based on the resulting string 168 | */ 169 | constructor(candidates?: Array>, dataKey?: DataKey) 170 | 171 | /** 172 | * Filter the already set objects 173 | * 174 | * @param query A string query to match the dataKey of each candidate against. 175 | * @param options Options 176 | * @returns Returns an array of objects sorted by the best match against the query. 177 | */ 178 | filter(query: string, options: ObjectArrayFilterOptions = {}): Array 179 | 180 | /** 181 | * Filter the already set array of strings and get the indices of the chosen candidate 182 | * 183 | * @param query A string query to match each candidate against. 184 | * @param options Options 185 | * @returns Returns an array of numbers indicating the index of the chosen candidate sorted by the best match against the query. 186 | */ 187 | filterIndices(query: string, options: StringArrayFilterOptions = {}): Array 188 | 189 | /** 190 | * Allows setting the candidates (if changed or not set in the constructor). 191 | * 192 | * @param candidates An array of objects. 193 | * @param dataKey The key which is indexed for each object, and filtering is done based on the resulting string 194 | */ 195 | setCandidates(candidates: Array>, dataKey: DataKey) 196 | } 197 | ``` 198 | 199 |
200 |
201 | 202 | **Example**: 203 | 204 | ```js 205 | const { ObjectArrayFilterer } = require("zadeh") 206 | 207 | const candidates = [ 208 | { name: "Call", id: 1 }, 209 | { name: "Me", id: 2 }, 210 | { name: "Maybe", id: 3 }, 211 | ] 212 | 213 | // create a class and set the candidates 214 | const objArrFilterer = new ObjectArrayFilterer(candidates, "name") // filter based on their name 215 | 216 | // call filter multiple times 217 | objArrFilterer.filter("me") // [{ name: 'Me', id: 2 }, { name: 'Maybe', id: 3}] // finds two objects 218 | objArrFilterer.filter("all") // [{ name: 'Call', id: 1 }] 219 | ``` 220 | 221 | ### TreeFilterer 222 | 223 | TreeFilterer filters the given query in the nodes of the given array of trees and returns an array of filtered 224 | trees (or the indices of the filter candidates). A tree object is an object in which each entry stores the data in its `dataKey`, and it has (may have) some 225 | children (with a similar structure) in its `childrenKey` 226 | 227 |
228 | `TreeFilterer` API 229 | 230 | ```ts 231 | export class TreeFilterer { 232 | /** 233 | * The method to set an array of trees that are going to be filtered 234 | * 235 | * @param candidates An array of tree objects. 236 | * @param dataKey The key of the object (and its children) which holds the data (defaults to `"data"`) 237 | * @param childrenKey The key of the object (and its children) which hold the children (defaults to `"children"`) 238 | */ 239 | constructor( 240 | candidates?: Tree[], 241 | dataKey: DataKey = "data", 242 | childrenKey: ChildrenKey = "children" 243 | ) 244 | 245 | /** 246 | * The method to set an array of trees that are going to be filtered 247 | * 248 | * @param candidates An array of tree objects. 249 | * @param dataKey The key of the object (and its children) which holds the data (defaults to `"data"`) 250 | * @param childrenKey The key of the object (and its children) which hold the children (defaults to `"children"`) 251 | */ 252 | setCandidates( 253 | candidates: Tree[], 254 | dataKey: DataKey = "data", 255 | childrenKey: ChildrenKey = "children" 256 | ) 257 | 258 | /** 259 | * Filter the already set trees 260 | * 261 | * @param query A string query to match the dataKey of each candidate against. 262 | * @param options Options 263 | * @returns {Tree[]} An array of filtered trees. In a tree, the filtered data is at the last level (if it has 264 | * children, they are not included in the filtered tree) 265 | */ 266 | filter(query: string, options: TreeFilterOptions = {}): Tree[] 267 | 268 | /** 269 | * The method to perform the filtering on the already set candidates 270 | * 271 | * @param query A string query to match the dataKey of each candidate against. 272 | * @param options Options 273 | * @returns {TreeFilterIndicesResult[]} An array candidate objects in form of `{data, index, parentIndices}` sorted by 274 | * best match against the query. Each object has the address of the object in the tree using `index` and `parent_indices` 275 | */ 276 | filterIndices(query: string, options: TreeFilterOptions = {}): TreeFilterIndicesResult[] 277 | } 278 | ``` 279 | 280 |
281 |
282 | 283 | **Example**: 284 | 285 | ```js 286 | const { TreeFilterer } = require("zadeh") 287 | 288 | const treeFilterer = new TreeFilterer() 289 | 290 | const candidates = [ 291 | { data: "bye1", children: [{ data: "hello" }] }, 292 | { data: "Bye2", children: [{ data: "_bye4" }, { data: "hel" }] }, 293 | { data: "eye" }, 294 | ] 295 | treeFilterer.setCandidates(candidates, "data", "children") 296 | ``` 297 | 298 | ```ts 299 | treeFilterer.filter("hel") 300 | ``` 301 | 302 | returns 303 | 304 | ```ts 305 | ;[ 306 | { data: "Bye2", children: [{ data: "hel" }] }, 307 | { data: "bye1", children: [{ data: "hello" }] }, 308 | ] 309 | ``` 310 | 311 | ```ts 312 | treeFilterer.filter("bye") 313 | ``` 314 | 315 | returns 316 | 317 | ```ts 318 | ;[ 319 | { data: "bye1", children: [] }, 320 | { data: "Bye2", children: [{ data: "_bye4" }] }, 321 | { data: "Bye2", children: [] }, 322 | ] 323 | ``` 324 | 325 | ```ts 326 | treeFilterer.filterIndices("bye") 327 | ``` 328 | 329 | returns 330 | 331 | ```ts 332 | ;[ 333 | { data: "bye1", index: 0, parent_indices: [] }, 334 | { data: "_bye4", index: 0, parent_indices: [1] }, 335 | { data: "Bye2", index: 1, parent_indices: [] }, 336 | ] 337 | ``` 338 | 339 | ### score 340 | 341 | score(string, query, options = {}) 342 | 343 | Score the given string against the given query. 344 | 345 | - `string` - the string to score. 346 | - `query` - The query to score the string against. 347 | 348 | ```js 349 | const { score } = require('zadeh') 350 | 351 | score('Me', 'me') # 0.17099999999999999 352 | score('Maybe', 'me') # 0.0693 353 | ``` 354 | 355 | ### match 356 | 357 | match(string, query, options = {}) 358 | 359 | Gives an array of indices at which the query matches the given string 360 | 361 | ```js 362 | const { match } = require("zadeh") 363 | 364 | match("Hello World", "he") // [0, 1] 365 | match("Hello World", "wor") // [6, 7, 8] 366 | match("Hello World", "elwor") // [1, 2, 6, 7, 8] 367 | ``` 368 | 369 | ### wrap 370 | 371 | wrap (string, query, options = {}) 372 | 373 | Gives an HTML/Markdown string that highlights the range for which the match happens 374 | 375 | ```js 376 | wrap("helloworld", "he") 377 | ``` 378 | 379 | helloworld 380 | 381 | ```js 382 | wrap("Hello world", "he") 383 | ``` 384 | 385 | Hello world 386 | 387 | ### options 388 | 389 | In all the above functions, you can pass an optional object with the following keys 390 | 391 | ```ts 392 | { 393 | /** only for `filter` function */ 394 | /** The key to use when candidates is an object */ 395 | key?: T extends string ? never : keyof T 396 | 397 | /** only for `filter` function */ 398 | maxResults?: number 399 | 400 | /** @default false */ 401 | allowErrors?: boolean 402 | 403 | /** @default true */ 404 | usePathScoring?: boolean 405 | 406 | /** @default false */ 407 | useExtensionBonus?: boolean 408 | 409 | pathSeparator?: '/' | '\\' | string 410 | } 411 | ``` 412 | 413 | ## Deprecated functions 414 | 415 | These deprecated functions are provided to support the API of `fuzzaldrin` and `fuzzaldrin-plus`. 416 | However, you should replace their usage with `StringArrayFilterer` or `ObjectArrayFilterer` classes that allow setting the candidates only once and perform filtering on those candidates multiple times. This is much more efficient than `filter` or `filterTree` functions. 417 | 418 |
419 | `filter` function 420 | 421 | ### filter 422 | 423 | filter(candidates, query, options = {}) 424 | 425 | Sort and filter the given candidates by matching them against the given query. 426 | 427 | - `candidates` - An array of strings or objects. 428 | - `query` - A string query to match each candidate against. 429 | - `options` - the options. You should provide a `key` in the options if an array of objects is passed. 430 | 431 | Returns an array of candidates sorted by best match against the query. 432 | 433 | ```js 434 | const { filter } = require("zadeh") 435 | 436 | // With an array of strings 437 | let candidates = ["Call", "Me", "Maybe"] 438 | let results = filter(candidates, "me") // ['Me', 'Maybe'] 439 | 440 | // With an array of objects 441 | const candidates = [ 442 | { name: "Call", id: 1 }, 443 | { name: "Me", id: 2 }, 444 | { name: "Maybe", id: 3 }, 445 | ] 446 | 447 | results = filter(candidates, "me", { key: "name" }) // [{name: 'Me', id: 2}, {name: 'Maybe', id: 3}] 448 | ``` 449 | 450 | **Deprecation Note**: use `StringArrayFilterer` or `ObjectArrayFilterer` class instead. `filter` internally uses this class, and in each call, it sets the candidates from scratch, which can slow down the process. 451 | 452 |
453 | 454 | # Comparison with other libraries 455 | 456 | ### Zadeh vs Fuzzaldrin and Fuzzaldrin-plus 457 | 458 | API is backward compatible with Fuzzaldrin and Fuzzaldrin-plus. Additional functions are provided to achieve better performance that could suit your needs 459 | 460 | Zadeh achieves 10x-20x performance improvement over Fuzzaldrin plus for chromium project with 300K files. This high performance is achieved using the following techniques. 461 | 462 | - Uses native C++ bindings that provide `~4x` performance benefit. 463 | - Use multiple threads to parallelize computation to achieve another `~4x` performance benefit. 464 | - Some miscellaneous improvements provide additional benefits. 465 | 466 | This project potentially solves the following Atom fuzzy-finder issues if used. 467 | https://github.com/atom/fuzzy-finder/issues/271 and https://github.com/atom/fuzzy-finder/issues/88 468 | -------------------------------------------------------------------------------- /src/binding/index.ts: -------------------------------------------------------------------------------- 1 | // @ts-ignore 2 | import nodeGypBuld from "node-gyp-build" 3 | 4 | import * as Binding from "./binding" 5 | const binding = nodeGypBuld(__dirname) as typeof Binding // __dirname relies on Parcel to bundle this file in the root of the package, so __dirname becomes correct 6 | 7 | /* 8 | ██████ ██████ ████████ ██ ██████ ███ ██ ███████ 9 | ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ ██ 10 | ██ ██ ██████ ██ ██ ██ ██ ██ ██ ██ ███████ 11 | ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ 12 | ██████ ██ ██ ██ ██████ ██ ████ ███████ 13 | */ 14 | 15 | type stringWithLength1 = string 16 | 17 | export interface IOptions { 18 | /** @default false */ 19 | allowErrors?: boolean 20 | 21 | /** @default true */ 22 | usePathScoring?: boolean 23 | 24 | /** @default false */ 25 | useExtensionBonus?: boolean 26 | 27 | /** 28 | * A path separator which is a string with length 1. Such as "/" or "". By default, this is chosen based on the 29 | * operating system. 30 | */ 31 | pathSeparator?: "/" | "\\" | stringWithLength1 32 | 33 | // TODO not implemented? 34 | // optCharRegEx?: RegExp 35 | 36 | // TODO not implemented? 37 | // wrap?: { tagOpen?: string; tagClass?: string; tagClose?: string } 38 | 39 | /** @deprecated: there is no major benefit by precomputing something just for the query. */ 40 | preparedQuery?: never 41 | } 42 | 43 | export type StringArrayFilterOptions = IOptions & { 44 | /** The maximum numbers of results to return */ 45 | maxResults?: number 46 | 47 | // TODO not implemented 48 | // maxInners?: number 49 | } 50 | 51 | export type ObjectArrayFilterOptions = StringArrayFilterOptions 52 | export type TreeFilterOptions = StringArrayFilterOptions 53 | 54 | /** @deprecated The key to use when candidates is an object Deprecated option. */ 55 | export type DeprecatedFilterOptions = IOptions & { 56 | key?: T extends string ? never : keyof T 57 | } 58 | 59 | const defaultPathSeparator = process.platform === "win32" ? "\\" : "/" 60 | 61 | function parseOptions(options: IOptions) { 62 | // options.allowErrors ? = false 63 | if (options.usePathScoring === undefined) { 64 | options.usePathScoring = true 65 | } 66 | // options.useExtensionBonus ? = false 67 | if (options.pathSeparator === undefined) { 68 | options.pathSeparator = defaultPathSeparator 69 | } 70 | } 71 | 72 | function parseFilterOptions(filterOptions: StringArrayFilterOptions | ObjectArrayFilterOptions | TreeFilterOptions) { 73 | // options.optCharRegEx ? = null 74 | // options.wrap ? = null 75 | if (filterOptions.maxResults === undefined) { 76 | filterOptions.maxResults = 0 77 | } 78 | // parse common options 79 | parseOptions(filterOptions) 80 | } 81 | 82 | /* 83 | █████ ██████ ██████ █████ ██ ██ ███████ ██ ██ ████████ ███████ ██████ 84 | ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ 85 | ███████ ██████ ██████ ███████ ████ █████ ██ ██ ██ █████ ██████ 86 | ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ 87 | ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ███████ ██ ███████ ██ ██ 88 | */ 89 | 90 | /** An object that stores its `dataKey` in `DataKey` */ 91 | export type ObjectWithKey = { 92 | [dk in DataKey]: string 93 | } & Record 94 | 95 | export type StringOrObjectArray = string | ObjectWithKey 96 | 97 | /** StringArrayFilterer is a class that performs filtering on an array of strings */ 98 | export class StringArrayFilterer { 99 | obj = new binding.Zadeh() 100 | // typescript cannot detect that candidates is definitely assigned 101 | // @ts-ignore 102 | private candidates: Array 103 | 104 | /** 105 | * Make a `StringArrayFilterer` for the candidates that are going to be filtered. 106 | * 107 | * @param candidates An array of strings. 108 | */ 109 | constructor(candidates?: Array) { 110 | if (candidates !== undefined) { 111 | this.setCandidates(candidates) 112 | } else { 113 | this.candidates = [] 114 | } 115 | } 116 | 117 | /** 118 | * The method to set the candidates that are going to be filtered 119 | * 120 | * @param candidates An array of strings. 121 | */ 122 | setCandidates(candidates: Array) { 123 | this.candidates = candidates 124 | 125 | Binding.validate_setArrayFiltererCandidates(candidates) 126 | return this.obj.setArrayFiltererCandidates(candidates) 127 | } 128 | 129 | /** 130 | * Filter the already set array of strings 131 | * 132 | * @param query A string query to match each candidate against. 133 | * @param options Options 134 | * @returns Returns an array of candidates sorted by best match against the query. 135 | */ 136 | filter(query: string, options: StringArrayFilterOptions = {}): Array { 137 | return this.filterIndices(query, options).map((ind: number) => this.candidates[ind]) 138 | } 139 | 140 | /** 141 | * Filter the already set array of strings and get the indices of the chosen candidate 142 | * 143 | * @param query A string query to match each candidate against. 144 | * @param options Options 145 | * @returns Returns an array of numbers indicating the index of the chosen candidate sorted by the best match against the query. 146 | */ 147 | filterIndices(query: string, options: StringArrayFilterOptions = {}): Array { 148 | parseFilterOptions(options) 149 | 150 | const maxResult = options.maxResults as number /* numberified by parseFilterOptions */ 151 | const usePathScoring = Boolean(options.usePathScoring) 152 | const useExtensionBonus = Boolean(options.useExtensionBonus) 153 | 154 | Binding.validate_filter(query, maxResult, usePathScoring, useExtensionBonus) 155 | // NOTE calling obj.filter is slower than (obj.filterIndices then map) due to the interop overhead 156 | return this.obj.filterIndices(query, maxResult, usePathScoring, useExtensionBonus) 157 | } 158 | } 159 | 160 | /** 161 | * ObjectArrayFilterer is a class that performs filtering on an array of objects based on a string stored in the given 162 | * `dataKey` for each object 163 | */ 164 | export class ObjectArrayFilterer { 165 | obj = new binding.Zadeh() 166 | // typescript cannot detect that candidates is definitely assigned 167 | // @ts-ignore 168 | private candidates: ObjectWithKey[] 169 | 170 | /** 171 | * Make an `ObjectArrayFilterer` for the candidates that are going to be filtered. 172 | * 173 | * @param candidates An array of objects. 174 | * @param dataKey The key which is indexed for each object, and filtering is done based on the resulting string 175 | */ 176 | constructor(candidates?: ObjectWithKey[], dataKey?: DataKey) { 177 | if (candidates !== undefined && dataKey !== undefined) { 178 | this.setCandidates(candidates, dataKey) 179 | } else { 180 | this.candidates = [] 181 | } 182 | } 183 | 184 | /** 185 | * Allows setting the candidates (if changed or not set in the constructor). 186 | * 187 | * @param candidates An array of objects. 188 | * @param dataKey The key which is indexed for each object, and filtering is done based on the resulting string 189 | */ 190 | setCandidates(candidates: ObjectWithKey[], dataKey: DataKey) { 191 | this.candidates = candidates 192 | const candidatesKeys = candidates.map((item) => item[dataKey]) 193 | 194 | Binding.validate_setArrayFiltererCandidates(candidatesKeys) 195 | this.obj.setArrayFiltererCandidates(candidatesKeys) 196 | } 197 | 198 | /** 199 | * Filter the already set objects 200 | * 201 | * @param query A string query to match the dataKey of each candidate against. 202 | * @param options Options 203 | * @returns Returns an array of objects sorted by best match against the query. 204 | */ 205 | filter(query: string, options: ObjectArrayFilterOptions = {}): ObjectWithKey[] { 206 | return this.filterIndices(query, options).map((ind: number) => this.candidates[ind]) 207 | } 208 | 209 | /** 210 | * Filter the already set array of objects and get the indices of the chosen candidate 211 | * 212 | * @param query A string query to match the dataKey of each candidate against. 213 | * @param options Options 214 | * @returns Returns an array of numbers indicating the index of the chosen candidate sorted by the best match against the query. 215 | */ 216 | filterIndices(query: string, options: StringArrayFilterOptions = {}): Array { 217 | parseFilterOptions(options) 218 | 219 | const maxResult = options.maxResults as number /* numberified by parseFilterOptions */ 220 | const usePathScoring = Boolean(options.usePathScoring) 221 | const useExtensionBonus = Boolean(options.useExtensionBonus) 222 | 223 | Binding.validate_filter(query, maxResult, usePathScoring, useExtensionBonus) 224 | if (query.length === 0) { 225 | // optimization for query === "" 226 | return [] 227 | } 228 | // NOTE calling obj.filter is slower than (obj.filterIndices then map) due to the interop overhead 229 | return this.obj.filterIndices(query, maxResult, usePathScoring, useExtensionBonus) 230 | } 231 | } 232 | 233 | /** @deprecated */ 234 | type DeprecatedFilterReturn = T extends string ? string[] : ObjectWithKey[] 235 | 236 | let warnStringArrayFilterer = true 237 | let warnfilterObjectArrayFilterer = true 238 | 239 | /** 240 | * @deprecated Use `StringArrayFilterer` or `ObjectArrayFilterer` instead Sort and filter the given candidates by 241 | * matching them against the given query. 242 | * @param candidates An array of strings or objects. 243 | * @param query A string query to match each candidate against. 244 | * @param options Options 245 | * @returns Returns an array of candidates sorted by best match against the query. 246 | */ 247 | export function filter( 248 | candidates: T[], 249 | query: string, 250 | options: DeprecatedFilterOptions = {} 251 | ): DeprecatedFilterReturn { 252 | if (!candidates || !query) { 253 | console.warn(`Zadeh: bad input to filter candidates: ${candidates}, query: ${query}`) 254 | // @ts-ignore: bad input guard which doesn't meet the types 255 | return [] 256 | } 257 | 258 | if (typeof candidates[0] === "object" && options.key) { 259 | // an object (options) containing the key 260 | if (warnfilterObjectArrayFilterer) { 261 | console.warn(`Zadeh: deprecated function. Use 'ObjectArrayFilterer' instead`) 262 | warnfilterObjectArrayFilterer = false 263 | } 264 | const dataKey = options.key 265 | const objectArrayFilterer = new ObjectArrayFilterer( 266 | candidates as ObjectWithKey[], 267 | dataKey 268 | ) 269 | return objectArrayFilterer.filter(query, options) as DeprecatedFilterReturn 270 | } else if (typeof candidates[0] === "string") { 271 | // string array 272 | if (warnStringArrayFilterer) { 273 | console.warn(`Zadeh: deprecated function. Use 'StringArrayFilterer' instead`) 274 | warnStringArrayFilterer = false 275 | } 276 | const stringArrayFilterer = new StringArrayFilterer(candidates as string[]) 277 | return stringArrayFilterer.filter(query, options) as DeprecatedFilterReturn 278 | } else { 279 | throw new Error(`Zadeh: bad input to filter candidates: ${candidates}, query: ${query}, options: ${options}`) 280 | } 281 | } 282 | 283 | /* 284 | ████████ ██████ ███████ ███████ ███████ ██ ██ ████████ ███████ ██████ 285 | ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ 286 | ██ ██████ █████ █████ █████ ██ ██ ██ █████ ██████ 287 | ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ 288 | ██ ██ ██ ███████ ███████ ██ ██ ███████ ██ ███████ ██ ██ 289 | */ 290 | 291 | // The object (an element of the array) returned from filtering trees. It has the address of the object in the tree using `index` and `parent_indices`. 292 | export interface TreeFilterIndicesResult { 293 | data: string 294 | index: number 295 | parent_indices: Array 296 | } 297 | 298 | /** 299 | * TreeFilterer filters the given query in the nodes of the given array of trees and returns an array of filtered trees 300 | * (or the indices of the filter candidates). A tree object is an object in which each entry stores the data in its 301 | * `dataKey`, and it has (may have) some children (with a similar structure) in its `childrenKey` 302 | */ 303 | export class TreeFilterer { 304 | obj = new binding.Zadeh() 305 | // typescript cannot detect that candidates is definitely assigned 306 | // @ts-ignore 307 | private candidates: Tree[] 308 | 309 | /** 310 | * The method to set an array of trees that are going to be filtered 311 | * 312 | * @param candidates An array of tree objects. 313 | * @param dataKey The key of the object (and its children) which holds the data (defaults to `"data"`) 314 | * @param childrenKey The key of the object (and its children) which hold the children (defaults to `"children"`) 315 | */ 316 | constructor( 317 | candidates?: Tree[], 318 | dataKey: DataKey = "data" as DataKey, 319 | childrenKey: ChildrenKey = "children" as ChildrenKey 320 | ) { 321 | if (candidates) { 322 | this.setCandidates(candidates, dataKey, childrenKey) 323 | } else { 324 | this.candidates = [] 325 | } 326 | } 327 | 328 | /** 329 | * The method to set an array of trees that are going to be filtered 330 | * 331 | * @param candidates An array of tree objects. 332 | * @param dataKey The key of the object (and its children) which holds the data (defaults to `"data"`) 333 | * @param childrenKey The key of the object (and its children) which hold the children (defaults to `"children"`) 334 | */ 335 | setCandidates( 336 | candidates: Tree[], 337 | dataKey: DataKey = "data" as DataKey, 338 | childrenKey: ChildrenKey = "children" as ChildrenKey 339 | ) { 340 | this.candidates = candidates 341 | 342 | Binding.validate_setTreeFiltererCandidates(candidates, dataKey, childrenKey) 343 | return this.obj.setTreeFiltererCandidates(candidates, dataKey, childrenKey) 344 | } 345 | 346 | /** 347 | * Filter the already set trees 348 | * 349 | * @param query A string query to match the dataKey of each candidate against. 350 | * @param options Options 351 | * @returns {Tree[]} An array of filtered trees. In a tree, the filtered data is at the last level (if it has 352 | * children, they are not included in the filtered tree) 353 | */ 354 | filter(query: string, options: TreeFilterOptions = {}): Tree[] { 355 | parseFilterOptions(options) 356 | 357 | const maxResult = options.maxResults as number /* numberified by parseFilterOptions */ 358 | const usePathScoring = Boolean(options.usePathScoring) 359 | const useExtensionBonus = Boolean(options.useExtensionBonus) 360 | 361 | Binding.validate_filterTree(query, maxResult, usePathScoring, useExtensionBonus) 362 | if (query.length === 0) { 363 | // optimization for query === "" 364 | return [] 365 | } 366 | return this.obj.filterTree(query, maxResult, usePathScoring, useExtensionBonus) 367 | } 368 | 369 | /** 370 | * The method to perform the filtering on the already set candidates 371 | * 372 | * @param query A string query to match the dataKey of each candidate against. 373 | * @param options Options 374 | * @returns {TreeFilterIndicesResult[]} An array candidate objects in form of `{data, index, parentIndices}` sorted by 375 | * best match against the query. Each object has the address of the object in the tree using `index` and `parent_indices` 376 | */ 377 | filterIndices(query: string, options: TreeFilterOptions = {}): TreeFilterIndicesResult[] { 378 | parseOptions(options) 379 | if (query.length === 0) { 380 | // optimization for query === "" 381 | return [] 382 | } 383 | return this.obj.filterIndicesTree( 384 | query, 385 | options.maxResults ?? 0, 386 | Boolean(options.usePathScoring), 387 | Boolean(options.useExtensionBonus) 388 | ) 389 | } 390 | } 391 | 392 | export type TreeDataProperty = { 393 | [dk in DataKey]: string 394 | } 395 | export type TreeChildrenProperty = { 396 | [ck in ChildrenKey]?: string[] // children is either an array or not provided 397 | } 398 | /** 399 | * A {Tree} object is an object in which each entry stores the data in its dataKey and it has (may have) some children 400 | * (with a similar structure) in its childrenKey 401 | */ 402 | export type Tree = TreeDataProperty & 403 | TreeChildrenProperty 404 | 405 | /* 406 | ███████ ██████ ██████ ██████ ███████ 407 | ██ ██ ██ ██ ██ ██ ██ 408 | ███████ ██ ██ ██ ██████ █████ 409 | ██ ██ ██ ██ ██ ██ ██ 410 | ███████ ██████ ██████ ██ ██ ███████ 411 | */ 412 | 413 | /** 414 | * Score the given string against the given query. 415 | * 416 | * @param candidate The string the score. 417 | * @param query The query to score the string against. 418 | * @param options Options 419 | */ 420 | export function score(candidate: string, query: string, options: IOptions = {}): number { 421 | if (!candidate || !query) { 422 | console.warn(`Zadeh: bad input to score candidates: ${candidate}, query: ${query}`) 423 | return 0 424 | } 425 | parseOptions(options) 426 | 427 | const usePathScoring = Boolean(options.usePathScoring) 428 | const useExtensionBonus = Boolean(options.useExtensionBonus) 429 | 430 | Binding.validate_score(candidate, query, usePathScoring, useExtensionBonus) 431 | return binding.score(candidate, query, usePathScoring, useExtensionBonus) 432 | } 433 | 434 | /* 435 | ███ ███ █████ ████████ ██████ ██ ██ 436 | ████ ████ ██ ██ ██ ██ ██ ██ 437 | ██ ████ ██ ███████ ██ ██ ███████ 438 | ██ ██ ██ ██ ██ ██ ██ ██ ██ 439 | ██ ██ ██ ██ ██ ██████ ██ ██ 440 | */ 441 | 442 | /** Gives an array of indices at which the query matches the given string */ 443 | export function match(str: string, query: string, options: IOptions = {}): number[] { 444 | if (!str || !query) { 445 | console.warn(`Zadeh: bad input to match str: ${str}, query: ${query}`) 446 | return [] 447 | } 448 | if (str === query) { 449 | return Array.from(Array(str.length).keys()) 450 | } 451 | parseOptions(options) 452 | 453 | const pathSeparator = options.pathSeparator as string /* stringified by parseOption */ 454 | 455 | Binding.validate_match(str, query, pathSeparator) 456 | return binding.match(str, query, pathSeparator) 457 | } 458 | 459 | /* 460 | ██ ██ ██████ █████ ██████ 461 | ██ ██ ██ ██ ██ ██ ██ ██ 462 | ██ █ ██ ██████ ███████ ██████ 463 | ██ ███ ██ ██ ██ ██ ██ ██ 464 | ███ ███ ██ ██ ██ ██ ██ 465 | */ 466 | 467 | /** Gives an HTML/Markdown string that highlights the range for which the match happens */ 468 | export function wrap(str: string, query: string, options: IOptions = {}): string { 469 | if (!str || !query) { 470 | console.warn(`Zadeh: bad input to wrap str: ${str}, query: ${query}`) 471 | // @ts-ignore 472 | return [] 473 | } 474 | parseOptions(options) 475 | 476 | const pathSeparator = options.pathSeparator as string /* stringified by parseOption */ 477 | 478 | Binding.validate_wrap(str, query, pathSeparator) 479 | return binding.wrap(str, query, pathSeparator) 480 | } 481 | 482 | /* 483 | ██████ ████████ ██ ██ ███████ ██████ 484 | ██ ██ ██ ██ ██ ██ ██ ██ 485 | ██ ██ ██ ███████ █████ ██████ 486 | ██ ██ ██ ██ ██ ██ ██ ██ 487 | ██████ ██ ██ ██ ███████ ██ ██ 488 | */ 489 | 490 | /** @deprecated: there is no major benefit by precomputing something just for the query. */ 491 | /* eslint-disable @typescript-eslint/no-unused-vars */ 492 | // @ts-ignore 493 | export function prepareQuery(query: string, options: IOptions = {}): {} { 494 | console.warn( 495 | "Zadeh: prepareQuery is deprecated. There is no major benefit by precomputing something just for the query. " 496 | ) 497 | // This is no - op since there is no major benefit by precomputing something 498 | // just for the query. 499 | return {} 500 | } 501 | --------------------------------------------------------------------------------