├── .gitignore ├── src ├── query.coffee ├── filter.coffee ├── fuzzaldrin.coffee ├── pathScorer.coffee ├── matcher.coffee └── scorer.coffee ├── .npmignore ├── .travis.yml ├── appveyor.yml ├── spec ├── score-spec.coffee ├── match-spec.coffee └── filter-spec.coffee ├── bower.json ├── fuzzaldrin-plus.nuspec ├── LICENSE.md ├── package.json ├── demo ├── demo.css ├── demo.html └── movies.json ├── Gruntfile.coffee ├── benchmark └── benchmark.coffee ├── dist-browser ├── fuzzaldrin-plus.min.js └── fuzzaldrin-plus.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | lib/ 3 | dist/ 4 | .idea/ 5 | .DS_Store 6 | npm-debug.log 7 | -------------------------------------------------------------------------------- /src/query.coffee: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeancroy/fuzz-aldrin-plus/HEAD/src/query.coffee -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .npmignore 2 | *.coffee 3 | script/ 4 | .DS_Store 5 | npm-debug.log 6 | .travis.yml 7 | appveyor.yml 8 | benchmark/ 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | 3 | notifications: 4 | email: 5 | on_success: never 6 | on_failure: change 7 | 8 | node_js: 9 | - 0.10 10 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # appveyor file 2 | # http://www.appveyor.com/docs/appveyor-yml 3 | 4 | # build version format 5 | version: "{build}" 6 | 7 | # what combinations to test 8 | environment: 9 | matrix: 10 | - nodejs_version: 0.10 11 | 12 | # Get the stable version of node 13 | install: 14 | - ps: Install-Product node $env:nodejs_version 15 | - npm install 16 | 17 | build: off 18 | 19 | test_script: 20 | - node --version 21 | - npm --version 22 | - cmd: npm test 23 | -------------------------------------------------------------------------------- /spec/score-spec.coffee: -------------------------------------------------------------------------------- 1 | {score} = require '../src/fuzzaldrin' 2 | 3 | describe "score(string, query)", -> 4 | it "returns a score", -> 5 | expect(score('Hello World', 'he')).toBeLessThan(score('Hello World', 'Hello')) 6 | expect(score('Hello World', '')).toBe 0 7 | expect(score('Hello World', null)).toBe 0 8 | expect(score('Hello World')).toBe 0 9 | expect(score()).toBe 0 10 | expect(score(null, 'he')).toBe 0 11 | expect(score('', '')).toBe 0 12 | expect(score('', 'abc')).toBe 0 13 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fuzzaldrin-plus", 3 | "description": "Fuzzy filtering and string similarity scoring - compatible with fuzzaldrin", 4 | "main": "./dist-browser/fuzzaldrin-plus.js", 5 | "authors": [ 6 | "Jean Christophe Roy " 7 | ], 8 | "license": "MIT", 9 | "keywords": [ 10 | "fuzzy", 11 | "approximate", 12 | "string", 13 | "matching", 14 | "filter", 15 | "search", 16 | "highlight", 17 | "sublime" 18 | ], 19 | "homepage": "https://github.com/jeancroy/fuzzaldrin-plus", 20 | "ignore": [ 21 | "**/.*", 22 | "node_modules", 23 | "bower_components", 24 | "benchmark", 25 | "demo", 26 | "dist", 27 | "lib", 28 | "spec" 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /fuzzaldrin-plus.nuspec: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fuzzaldrin-Plus 5 | 0.0 6 | Jean Christophe Roy 7 | Jean Christophe Roy 8 | https://github.com/jeancroy/fuzzaldrin-plus/blob/master/LICENSE.md 9 | https://github.com/jeancroy/fuzzaldrin-plus 10 | false 11 | A JavaScript library for fuzzy filtering 12 | Fuzzy FuzzySearch Fuzzaldrin 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Jean Christophe Roy 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /src/filter.coffee: -------------------------------------------------------------------------------- 1 | scorer = require './scorer' 2 | pathScorer = require './pathScorer' 3 | Query = require './query' 4 | 5 | pluckCandidates = (a) -> a.candidate 6 | sortCandidates = (a, b) -> b.score - a.score 7 | 8 | module.exports = (candidates, query, options) -> 9 | scoredCandidates = [] 10 | 11 | #See also option parsing on main module for default 12 | {key, maxResults, maxInners, usePathScoring} = options 13 | spotLeft = if maxInners? and maxInners > 0 then maxInners else candidates.length + 1 14 | bKey = key? 15 | scoreProvider = if usePathScoring then pathScorer else scorer 16 | 17 | for candidate in candidates 18 | string = if bKey then candidate[key] else candidate 19 | continue unless string 20 | score = scoreProvider.score(string, query, options) 21 | if score > 0 22 | scoredCandidates.push({candidate, score}) 23 | break unless --spotLeft 24 | 25 | # Sort scores in descending order 26 | scoredCandidates.sort(sortCandidates) 27 | 28 | #Extract original candidate 29 | candidates = scoredCandidates.map(pluckCandidates) 30 | 31 | #Trim to maxResults if specified 32 | candidates = candidates[0...maxResults] if maxResults? 33 | 34 | #And return 35 | candidates 36 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fuzzaldrin-plus", 3 | "version": "0.6.0", 4 | "description": "Fuzzy filtering and string similarity scoring - compatible with fuzzaldrin", 5 | "license": "MIT", 6 | "licenses": [ 7 | { 8 | "type": "MIT", 9 | "url": "https://github.com/jeancroy/fuzzaldrin-plus/raw/master/LICENSE.md" 10 | } 11 | ], 12 | "main": "./lib/fuzzaldrin.js", 13 | "scripts": { 14 | "prepublish": "grunt prepublish", 15 | "test": "grunt test", 16 | "benchmark": "node_modules/.bin/coffee benchmark/benchmark.coffee" 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "https://github.com/jeancroy/fuzzaldrin-plus.git" 21 | }, 22 | "bugs": { 23 | "url": "https://github.com/jeancroy/fuzzaldrin-plus/issues" 24 | }, 25 | "homepage": "https://github.com/jeancroy/fuzzaldrin-plus", 26 | "keywords": [ 27 | "fuzzy", 28 | "approximate", 29 | "string", 30 | "matching", 31 | "filter", 32 | "search", 33 | "highlight", 34 | "sublime" 35 | ], 36 | "devDependencies": { 37 | "coffee-script": "~1.7", 38 | "fuzzaldrin": "~2.1.0", 39 | "grunt": "~0.4.1", 40 | "grunt-bower-task": "^0.4.0", 41 | "grunt-browserify": "^5.0.0", 42 | "grunt-cli": "~0.1.8", 43 | "grunt-coffeelint": "0.0.6", 44 | "grunt-contrib-clean": "^1.0.0", 45 | "grunt-contrib-coffee": "~0.9.0", 46 | "grunt-contrib-uglify": "^1.0.1", 47 | "grunt-nuget": "^0.1.6", 48 | "grunt-shell": "~0.2.2", 49 | "jasmine-focused": "1.x" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/fuzzaldrin.coffee: -------------------------------------------------------------------------------- 1 | filter = require './filter' 2 | matcher = require './matcher' 3 | scorer = require './scorer' 4 | pathScorer = require './pathScorer' 5 | Query = require './query' 6 | 7 | preparedQueryCache = null 8 | defaultPathSeparator = if process?.platform is "win32" then '\\' else '/' 9 | 10 | module.exports = 11 | 12 | filter: (candidates, query, options = {}) -> 13 | return [] unless query?.length and candidates?.length 14 | options = parseOptions(options, query) 15 | filter(candidates, query, options) 16 | 17 | score: (string, query, options = {}) -> 18 | return 0 unless string?.length and query?.length 19 | options = parseOptions(options, query) 20 | if options.usePathScoring 21 | return pathScorer.score(string, query, options) 22 | else return scorer.score(string, query, options) 23 | 24 | match: (string, query, options = {}) -> 25 | return [] unless string 26 | return [] unless query 27 | return [0...string.length] if string is query 28 | options = parseOptions(options, query) 29 | return matcher.match(string, query, options) 30 | 31 | wrap: (string, query, options = {}) -> 32 | return [] unless string 33 | return [] unless query 34 | options = parseOptions(options, query) 35 | return matcher.wrap(string, query, options) 36 | 37 | prepareQuery: (query, options = {}) -> 38 | options = parseOptions(options, query) 39 | return options.preparedQuery 40 | 41 | #Setup default values 42 | parseOptions = (options, query) -> 43 | 44 | options.allowErrors ?= false 45 | options.usePathScoring ?= true 46 | options.useExtensionBonus ?= false 47 | options.pathSeparator ?= defaultPathSeparator 48 | options.optCharRegEx ?= null 49 | options.wrap ?= null 50 | 51 | options.preparedQuery ?= 52 | if preparedQueryCache and preparedQueryCache.query is query 53 | then preparedQueryCache 54 | else (preparedQueryCache = new Query(query, options)) 55 | 56 | return options 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /demo/demo.css: -------------------------------------------------------------------------------- 1 | 2 | * { 3 | box-sizing: border-box; 4 | } 5 | 6 | h1, h2, p, form, label { 7 | text-align: center; 8 | } 9 | 10 | h1, h2 { 11 | margin-top: 1.5em; 12 | margin-bottom: 0.5em; 13 | } 14 | 15 | #sourcetxt { 16 | display: block; 17 | margin-left: auto; 18 | margin-right: auto; 19 | width: 90%; 20 | max-width: 700px; 21 | height: 250px; 22 | } 23 | 24 | .center { 25 | display: block; 26 | margin-left: auto; 27 | margin-right: auto; 28 | width: 90%; 29 | max-width: 700px; 30 | } 31 | 32 | .typeahead { 33 | width: 90%; 34 | max-width: 700px; 35 | margin: auto; 36 | display: block; 37 | padding-bottom: 30px; 38 | position: relative; 39 | } 40 | 41 | .typeahead input { 42 | position: absolute; 43 | top: 0; 44 | left: 0; 45 | width: 100%; 46 | max-width: 700px; 47 | padding: 10px; 48 | opacity: 1; 49 | background: none 0% 0% / auto repeat scroll padding-box border-box rgb(255, 255, 255); 50 | } 51 | 52 | .twitter-typeahead { 53 | width: 100%; 54 | } 55 | 56 | .twitter-typeahead .tt-query, 57 | .twitter-typeahead .tt-hint { 58 | margin-bottom: 0; 59 | } 60 | 61 | .typeahead ul, .typeahead li { 62 | list-style: none; 63 | margin: 0; 64 | padding: 0; 65 | border: 0; 66 | font: inherit; 67 | font-size: 100%; 68 | vertical-align: baseline; 69 | } 70 | 71 | .tt-menu, .ui-menu { 72 | width: 100%; 73 | max-width: 700px; 74 | margin-top: 2px; 75 | padding: 5px 0; 76 | background-color: #ffffff; 77 | border: 1px solid rgba(0, 0, 0, 0.15); 78 | border-radius: 4px; 79 | -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); 80 | box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); 81 | background-clip: padding-box; 82 | 83 | } 84 | 85 | .tt-suggestion, .ui-menu-item { 86 | display: block; 87 | padding: 6px 20px; 88 | } 89 | 90 | .ui-helper-hidden-accessible { 91 | display: none; 92 | } 93 | 94 | .typeahead-footer { 95 | border-top: 1px solid #eee; 96 | margin-top: 10px; 97 | padding: 20px; 98 | } 99 | 100 | .title { 101 | color: #222; 102 | } 103 | 104 | .title strong { 105 | color: #000; 106 | } 107 | 108 | .author { 109 | color: #666 110 | } 111 | 112 | .author strong { 113 | color: #444 114 | } 115 | 116 | .score { 117 | color: #888; 118 | } 119 | 120 | td { 121 | vertical-align: top; 122 | text-align: left; 123 | } -------------------------------------------------------------------------------- /demo/demo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |

Source

22 | 23 | 24 |
25 | 26 | 27 |
28 | 29 |

Make a search

30 | 31 |
32 | 33 |
34 | 35 | 36 |
37 | 38 | 39 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /Gruntfile.coffee: -------------------------------------------------------------------------------- 1 | module.exports = (grunt) -> 2 | grunt.initConfig 3 | pkg: grunt.file.readJSON('package.json') 4 | clean: ['lib','dist'] 5 | coffee: 6 | glob_to_multiple: 7 | expand: true 8 | cwd: 'src' 9 | src: ['*.coffee'] 10 | dest: 'lib' 11 | ext: '.js' 12 | 13 | coffeelint: 14 | options: 15 | no_empty_param_list: 16 | level: 'error' 17 | max_line_length: 18 | level: 'ignore' 19 | 20 | src: ['src/*.coffee'] 21 | test: ['spec/*.coffee'] 22 | gruntfile: ['Gruntfile.coffee'] 23 | 24 | browserify: 25 | options: 26 | banner: '/* <%= pkg.name %> - v<%= pkg.version %> - @license: <%= pkg.license %>; @author: Jean Christophe Roy; @site: <%= pkg.homepage %> */\n' 27 | browserifyOptions: 28 | standalone: 'fuzzaldrin' 29 | dist: 30 | src: 'lib/fuzzaldrin.js' 31 | dest: 'dist-browser/fuzzaldrin-plus.js' 32 | 33 | uglify: 34 | options: 35 | compress: true 36 | preserveComments: false 37 | banner: '/* <%= pkg.name %> - v<%= pkg.version %> - @license: <%= pkg.license %>; @author: Jean Christophe Roy; @site: <%= pkg.homepage %> */\n' 38 | dist: 39 | src: 'dist-browser/fuzzaldrin-plus.js', 40 | dest: 'dist-browser/fuzzaldrin-plus.min.js' 41 | 42 | shell: 43 | test: 44 | command: 'node node_modules/jasmine-focused/bin/jasmine-focused --coffee --captureExceptions spec' 45 | options: 46 | stdout: true 47 | stderr: true 48 | failOnError: true 49 | mkdir: 50 | command: 'mkdir dist' 51 | options: 52 | stdout: true 53 | stderr: true 54 | 55 | nugetpack: 56 | options: 57 | properties:'versiondir=<%= pkg.version %>' 58 | verbosity: 'detailed' 59 | dist: 60 | src: 'fuzzaldrin-plus.nuspec' 61 | dest: 'dist/' 62 | options: 63 | version: '<%= pkg.version %>' 64 | 65 | nugetpush: 66 | dist: 67 | src: 'dist/*.nupkg' 68 | options: 69 | apiKey: '' 70 | 71 | grunt.loadNpmTasks('grunt-contrib-coffee') 72 | grunt.loadNpmTasks('grunt-shell') 73 | grunt.loadNpmTasks('grunt-coffeelint') 74 | grunt.loadNpmTasks('grunt-browserify') 75 | grunt.loadNpmTasks('grunt-contrib-uglify') 76 | grunt.loadNpmTasks('grunt-contrib-clean') 77 | grunt.loadNpmTasks('grunt-nuget') 78 | grunt.loadNpmTasks('grunt-bower-task') 79 | 80 | 81 | grunt.registerTask('lint', ['coffeelint']) 82 | grunt.registerTask('test', ['default', 'shell:test']) 83 | grunt.registerTask('prepublish', ['clean', 'test', 'distribute']) 84 | grunt.registerTask('default', ['coffee', 'lint']) 85 | grunt.registerTask('distribute', ['default', 'browserify', 'uglify']) 86 | grunt.registerTask('packnuget', ['shell:mkdir', 'nugetpack']) 87 | grunt.registerTask('publishnuget', ['packnuget', 'nugetpush']) 88 | -------------------------------------------------------------------------------- /spec/match-spec.coffee: -------------------------------------------------------------------------------- 1 | {match} = require '../src/fuzzaldrin' 2 | path = require 'path' 3 | 4 | describe "match(string, query)", -> 5 | 6 | it "returns an array of matched and unmatched strings", -> 7 | expect(match('Hello World', 'he')).toEqual [0, 1] 8 | expect(match()).toEqual [] 9 | expect(match('Hello World', 'wor')).toEqual [6..8] 10 | 11 | expect(match('Hello World', 'd')).toEqual [10] 12 | expect(match('Hello World', 'elwor')).toEqual [1, 2, 6, 7, 8] 13 | expect(match('Hello World', 'er')).toEqual [1, 8] 14 | expect(match('Hello World', '')).toEqual [] 15 | expect(match(null, 'he')).toEqual [] 16 | expect(match('', '')).toEqual [] 17 | expect(match('', 'abc')).toEqual [] 18 | 19 | it "matches paths with slashes", -> 20 | expect(match(path.join('X', 'Y'), path.join('X', 'Y'))).toEqual [0..2] 21 | expect(match(path.join('X', 'X-x'), 'X')).toEqual [0, 2] 22 | expect(match(path.join('X', 'Y'), 'XY')).toEqual [0, 2] 23 | expect(match(path.join('-', 'X'), 'X')).toEqual [2] 24 | expect(match(path.join('X-', '-'), "X#{path.sep}")).toEqual [0, 2] 25 | 26 | it "double matches characters in the path and the base", -> 27 | expect(match(path.join('XY', 'XY'), 'XY')).toEqual [0, 1, 3, 4] 28 | expect(match(path.join('--X-Y-', '-X--Y'), 'XY')).toEqual [2, 4, 8, 11] 29 | 30 | it "prefer whole word to scattered letters", -> 31 | expect(match('fiddle gruntfile filler', 'file')).toEqual [ 12, 13, 14,15] 32 | expect(match('fiddle file', 'file')).toEqual [ 7, 8, 9, 10] 33 | expect(match('find le file', 'file')).toEqual [ 8, 9, 10, 11] 34 | 35 | it "prefer whole word to scattered letters, even without exact matches", -> 36 | expect(match('fiddle gruntfile xfiller', 'filex')).toEqual [ 12, 13, 14,15, 17] 37 | expect(match('fiddle file xfiller', 'filex')).toEqual [ 7, 8, 9, 10, 12] 38 | expect(match('find le file xfiller', 'filex')).toEqual [ 8, 9, 10, 11, 13] 39 | 40 | it "prefer exact match", -> 41 | expect(match('filter gruntfile filler', 'file')).toEqual [ 12, 13, 14, 15] 42 | 43 | it "prefer case sensitive exact match", -> 44 | expect(match('ccc CCC cCc CcC CCc', 'ccc')).toEqual [ 0, 1, 2] 45 | expect(match('ccc CCC cCc CcC CCc', 'CCC')).toEqual [ 4, 5, 6] 46 | expect(match('ccc CCC cCc CcC CCc', 'cCc')).toEqual [ 8, 9, 10] 47 | expect(match('ccc CCC cCc CcC CCc', 'CcC')).toEqual [ 12, 13, 14] 48 | expect(match('ccc CCC cCc CcC CCc', 'CCc')).toEqual [ 16, 17, 18] 49 | 50 | it "prefer camelCase to scattered letters", -> 51 | expect(match('ImportanceTableCtrl', 'itc')).toEqual [0,10,15] 52 | 53 | it "prefer acronym to scattered letters", -> 54 | expect(match('action_config', 'acon')).toEqual [ 0, 7, 8, 9] 55 | expect(match('application_control', 'acon')).toEqual [ 0, 12, 13, 14] 56 | 57 | it "account for case in selecting camelCase vs consecutive", -> 58 | expect(match('0xACACAC: CamelControlClass.ccc', 'CCC')).toEqual [ 10, 15, 22] 59 | expect(match('0xACACAC: CamelControlClass.ccc', 'ccc')).toEqual [ 28, 29, 30] 60 | 61 | it "limit consecutive inside word boundary", -> 62 | 63 | #expect(match('Interns And Roles - Patterns Roles', 'interns roles')).toEqual [ 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16] 64 | # 65 | # the longest substring is "terns roles" 66 | # it's also not very intuitive to split the word interns like that. 67 | # limit consecutive at word boundary will help to prevent spiting words. 68 | # 69 | # Aside from doing more computation while scanning consecutive. 70 | # The main problem is that we don't reset the consecutive count unless we encounter a negative match. 71 | # 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/pathScorer.coffee: -------------------------------------------------------------------------------- 1 | {isMatch, computeScore, scoreSize} = require './scorer' 2 | 3 | 4 | tau_depth = 20 # Directory depth at which the full path influence is halved. 5 | file_coeff = 2.5 # Full path is also penalized for length of basename. This adjust a scale factor for that penalty. 6 | 7 | # 8 | # Main export 9 | # 10 | # Manage the logic of testing if there's a match and calling the main scoring function 11 | # Also manage scoring a path and optional character. 12 | 13 | exports.score = (string, query, options) -> 14 | {preparedQuery, allowErrors} = options 15 | return 0 unless allowErrors or isMatch(string, preparedQuery.core_lw, preparedQuery.core_up) 16 | string_lw = string.toLowerCase() 17 | score = computeScore(string, string_lw, preparedQuery) 18 | score = scorePath(string, string_lw, score, options) 19 | return Math.ceil(score) 20 | 21 | 22 | # 23 | # Score adjustment for path 24 | # 25 | 26 | scorePath = (subject, subject_lw, fullPathScore, options) -> 27 | return 0 if fullPathScore is 0 28 | 29 | {preparedQuery, useExtensionBonus, pathSeparator} = options 30 | 31 | # Skip trailing slashes 32 | end = subject.length - 1 33 | while subject[end] is pathSeparator then end-- 34 | 35 | # Get position of basePath of subject. 36 | basePos = subject.lastIndexOf(pathSeparator, end) 37 | fileLength = end-basePos 38 | 39 | # Get a bonus for matching extension 40 | extAdjust = 1.0 41 | 42 | if useExtensionBonus 43 | extAdjust += getExtensionScore(subject_lw, preparedQuery.ext, basePos, end, 2) 44 | fullPathScore *= extAdjust 45 | 46 | # no basePath, nothing else to compute. 47 | return fullPathScore if (basePos is -1) 48 | 49 | # Get the number of folder in query 50 | depth = preparedQuery.depth 51 | 52 | # Get that many folder from subject 53 | while basePos > -1 and depth-- > 0 54 | basePos = subject.lastIndexOf(pathSeparator, basePos - 1) 55 | 56 | # Get basePath score, if BaseName is the whole string, no need to recompute 57 | # We still need to apply the folder depth and filename penalty. 58 | basePathScore = if (basePos is -1) then fullPathScore else 59 | extAdjust * computeScore(subject.slice(basePos + 1, end + 1), subject_lw.slice(basePos + 1, end + 1), preparedQuery) 60 | 61 | # Final score is linear interpolation between base score and full path score. 62 | # For low directory depth, interpolation favor base Path then include more of full path as depth increase 63 | # 64 | # A penalty based on the size of the basePath is applied to fullPathScore 65 | # That way, more focused basePath match can overcome longer directory path. 66 | 67 | alpha = 0.5 * tau_depth / ( tau_depth + countDir(subject, end + 1, pathSeparator) ) 68 | return alpha * basePathScore + (1 - alpha) * fullPathScore * scoreSize(0, file_coeff * (fileLength)) 69 | 70 | 71 | # 72 | # Count number of folder in a path. 73 | # (consecutive slashes count as a single directory) 74 | # 75 | 76 | exports.countDir = countDir = (path, end, pathSeparator) -> 77 | return 0 if end < 1 78 | 79 | count = 0 80 | i = -1 81 | 82 | #skip slash at the start so `foo/bar` and `/foo/bar` have the same depth. 83 | while ++i < end and path[i] is pathSeparator 84 | continue 85 | 86 | while ++i < end 87 | if (path[i] is pathSeparator) 88 | count++ #record first slash, but then skip consecutive ones 89 | while ++i < end and path[i] is pathSeparator 90 | continue 91 | 92 | return count 93 | 94 | # 95 | # Find fraction of extension that is matched by query. 96 | # For example mf.h prefers myFile.h to myfile.html 97 | # This need special handling because it give point for not having characters (the `tml` in above example) 98 | # 99 | 100 | exports.getExtension = getExtension = (str) -> 101 | pos = str.lastIndexOf(".") 102 | if pos < 0 then "" else str.substr(pos + 1) 103 | 104 | 105 | getExtensionScore = (candidate, ext, startPos, endPos, maxDepth) -> 106 | # startPos is the position of last slash of candidate, -1 if absent. 107 | 108 | return 0 unless ext.length 109 | 110 | # Check that (a) extension exist, (b) it is after the start of the basename 111 | pos = candidate.lastIndexOf(".", endPos) 112 | return 0 unless pos > startPos # (note that startPos >= -1) 113 | 114 | n = ext.length 115 | m = endPos - pos 116 | 117 | # n contain the smallest of both extension length, m the largest. 118 | if( m < n) 119 | n = m 120 | m = ext.length 121 | 122 | #place cursor after dot & count number of matching characters in extension 123 | pos++ 124 | matched = -1 125 | while ++matched < n then break if candidate[pos + matched] isnt ext[matched] 126 | 127 | # if nothing found, try deeper for multiple extensions, with some penalty for depth 128 | if matched is 0 and maxDepth > 0 129 | return 0.9 * getExtensionScore(candidate, ext, startPos, pos - 2, maxDepth - 1) 130 | 131 | # cannot divide by zero because m is the largest extension length and we return if either is 0 132 | return matched / m 133 | -------------------------------------------------------------------------------- /benchmark/benchmark.coffee: -------------------------------------------------------------------------------- 1 | fs = require 'fs' 2 | path = require 'path' 3 | 4 | fuzzaldrinPlus = require '../src/fuzzaldrin' 5 | legacy = require 'fuzzaldrin' 6 | 7 | lines = fs.readFileSync(path.join(__dirname, 'data.txt'), 'utf8').trim().split('\n') 8 | forceAllMatch = {maxInners: -1} 9 | mitigation = {maxInners: Math.floor(0.2 * lines.length)} 10 | 11 | #warmup + compile 12 | fuzzaldrinPlus.filter(lines, 'index', forceAllMatch) 13 | legacy.filter(lines, 'index') 14 | 15 | console.log("======") 16 | 17 | startTime = Date.now() 18 | results = fuzzaldrinPlus.filter(lines, 'index') 19 | console.log("Filtering #{lines.length} entries for 'index' took #{Date.now() - startTime}ms for #{results.length} results (~10% of results are positive, mix exact & fuzzy)") 20 | 21 | if results.length isnt 6168 22 | console.error("Results count changed! #{results.length} instead of 6168") 23 | process.exit(1) 24 | 25 | startTime = Date.now() 26 | results = legacy.filter(lines, 'index') 27 | console.log("Filtering #{lines.length} entries for 'index' took #{Date.now() - startTime}ms for #{results.length} results (~10% of results are positive, Legacy method)") 28 | 29 | 30 | console.log("======") 31 | 32 | startTime = Date.now() 33 | results = fuzzaldrinPlus.filter(lines, 'indx') 34 | console.log("Filtering #{lines.length} entries for 'indx' took #{Date.now() - startTime}ms for #{results.length} results (~10% of results are positive, Fuzzy match)") 35 | 36 | startTime = Date.now() 37 | results = legacy.filter(lines, 'indx') 38 | console.log("Filtering #{lines.length} entries for 'indx' took #{Date.now() - startTime}ms for #{results.length} results (~10% of results are positive, Fuzzy match, Legacy)") 39 | 40 | console.log("======") 41 | 42 | startTime = Date.now() 43 | results = fuzzaldrinPlus.filter(lines, 'walkdr') 44 | console.log("Filtering #{lines.length} entries for 'walkdr' took #{Date.now() - startTime}ms for #{results.length} results (~1% of results are positive, fuzzy)") 45 | 46 | startTime = Date.now() 47 | results = legacy.filter(lines, 'walkdr') 48 | console.log("Filtering #{lines.length} entries for 'walkdr' took #{Date.now() - startTime}ms for #{results.length} results (~1% of results are positive, Legacy method)") 49 | 50 | 51 | console.log("======") 52 | 53 | startTime = Date.now() 54 | results = fuzzaldrinPlus.filter(lines, 'node', forceAllMatch) 55 | console.log("Filtering #{lines.length} entries for 'node' took #{Date.now() - startTime}ms for #{results.length} results (~98% of results are positive, mostly Exact match)") 56 | 57 | startTime = Date.now() 58 | results = legacy.filter(lines, 'node') 59 | console.log("Filtering #{lines.length} entries for 'node' took #{Date.now() - startTime}ms for #{results.length} results (~98% of results are positive, mostly Exact match, Legacy method)") 60 | 61 | 62 | console.log("======") 63 | 64 | startTime = Date.now() 65 | results = fuzzaldrinPlus.filter(lines, 'nm', forceAllMatch) 66 | console.log("Filtering #{lines.length} entries for 'nm' took #{Date.now() - startTime}ms for #{results.length} results (~98% of results are positive, Acronym match)") 67 | 68 | startTime = Date.now() 69 | results = legacy.filter(lines, 'nm') 70 | console.log("Filtering #{lines.length} entries for 'nm' took #{Date.now() - startTime}ms for #{results.length} results (~98% of results are positive, Acronym match, Legacy method)") 71 | 72 | 73 | console.log("======") 74 | 75 | startTime = Date.now() 76 | results = fuzzaldrinPlus.filter(lines, 'nodemodules', forceAllMatch) 77 | console.log("Filtering #{lines.length} entries for 'nodemodules' took #{Date.now() - startTime}ms for #{results.length} results (~98% positive + Fuzzy match, [Worst case scenario])") 78 | 79 | startTime = Date.now() 80 | results = fuzzaldrinPlus.filter(lines, 'nodemodules', mitigation) 81 | console.log("Filtering #{lines.length} entries for 'nodemodules' took #{Date.now() - startTime}ms for #{results.length} results (~98% positive + Fuzzy match, [Mitigation])") 82 | 83 | startTime = Date.now() 84 | results = legacy.filter(lines, 'nodemodules') 85 | console.log("Filtering #{lines.length} entries for 'nodemodules' took #{Date.now() - startTime}ms for #{results.length} results (Legacy)") 86 | 87 | console.log("======") 88 | 89 | startTime = Date.now() 90 | results = fuzzaldrinPlus.filter(lines, 'ndem', forceAllMatch) 91 | console.log("Filtering #{lines.length} entries for 'ndem' took #{Date.now() - startTime}ms for #{results.length} results (~98% positive + Fuzzy match, [Worst case but shorter srting])") 92 | 93 | startTime = Date.now() 94 | results = legacy.filter(lines, 'ndem') 95 | console.log("Filtering #{lines.length} entries for 'ndem' took #{Date.now() - startTime}ms for #{results.length} results (Legacy)") 96 | 97 | 98 | console.log("======") 99 | 100 | startTime = Date.now() 101 | query = 'index' 102 | prepared = fuzzaldrinPlus.prepareQuery(query) 103 | fuzzaldrinPlus.match(line, query, {preparedQuery: prepared}) for line in lines 104 | console.log("Matching #{lines.length} results for 'index' took #{Date.now() - startTime}ms (Prepare in advance)") 105 | 106 | startTime = Date.now() 107 | fuzzaldrinPlus.match(line, query) for line in lines 108 | console.log("Matching #{lines.length} results for 'index' took #{Date.now() - startTime}ms (cache)") 109 | # replace by `prepQuery ?= scorer.prepQuery(query)`to test without cache. 110 | 111 | startTime = Date.now() 112 | legacy.match(line, query) for line in lines 113 | console.log("Matching #{lines.length} results for 'index' took #{Date.now() - startTime}ms (legacy)") 114 | # replace by `prepQuery ?= scorer.prepQuery(query)`to test without cache. 115 | -------------------------------------------------------------------------------- /src/matcher.coffee: -------------------------------------------------------------------------------- 1 | # A match list is an array of indexes to characters that match. 2 | # This file should closely follow `scorer` except that it returns an array 3 | # of indexes instead of a score. 4 | 5 | {isMatch, isWordStart, scoreConsecutives, scoreCharacter, scoreAcronyms} = require './scorer' 6 | 7 | # 8 | # Main export 9 | # 10 | # Return position of character which matches 11 | 12 | exports.match = match = (string, query, options) -> 13 | 14 | {allowErrors, preparedQuery, pathSeparator} = options 15 | 16 | return [] unless allowErrors or isMatch(string, preparedQuery.core_lw, preparedQuery.core_up) 17 | string_lw = string.toLowerCase() 18 | 19 | # Full path results 20 | matches = computeMatch(string, string_lw, preparedQuery) 21 | 22 | #if there is no matches on the full path, there should not be any on the base path either. 23 | return matches if matches.length is 0 24 | 25 | # Is there a base path ? 26 | if(string.indexOf(pathSeparator) > -1) 27 | 28 | # Base path results 29 | baseMatches = basenameMatch(string, string_lw, preparedQuery, pathSeparator) 30 | 31 | # Combine the results, removing duplicate indexes 32 | matches = mergeMatches(matches, baseMatches) 33 | 34 | matches 35 | 36 | 37 | # 38 | # Wrap 39 | # 40 | # Helper around match if you want a string with result wrapped by some delimiter text 41 | 42 | exports.wrap = (string, query, options) -> 43 | 44 | if(options.wrap?) 45 | {tagClass, tagOpen, tagClose} = options.wrap 46 | 47 | tagClass ?= 'highlight' 48 | tagOpen ?= '' 49 | tagClose ?= '' 50 | 51 | if string == query 52 | return tagOpen + string + tagClose 53 | 54 | #Run get position where a match is found 55 | matchPositions = match(string, query, options) 56 | 57 | #If no match return as is 58 | if matchPositions.length == 0 59 | return string 60 | 61 | #Loop over match positions 62 | output = '' 63 | matchIndex = -1 64 | strPos = 0 65 | while ++matchIndex < matchPositions.length 66 | matchPos = matchPositions[matchIndex] 67 | 68 | # Get text before the current match position 69 | if matchPos > strPos 70 | output += string.substring(strPos, matchPos) 71 | strPos = matchPos 72 | 73 | # Get consecutive matches to wrap under a single tag 74 | while ++matchIndex < matchPositions.length 75 | if matchPositions[matchIndex] == matchPos + 1 76 | matchPos++ 77 | else 78 | matchIndex-- 79 | break 80 | 81 | #Get text inside the match, including current character 82 | matchPos++ 83 | if matchPos > strPos 84 | output += tagOpen 85 | output += string.substring(strPos, matchPos) 86 | output += tagClose 87 | strPos = matchPos 88 | 89 | #Get string after last match 90 | if(strPos <= string.length - 1) 91 | output += string.substring(strPos) 92 | 93 | #return wrapped text 94 | output 95 | 96 | 97 | 98 | basenameMatch = (subject, subject_lw, preparedQuery, pathSeparator) -> 99 | 100 | # Skip trailing slashes 101 | end = subject.length - 1 102 | end-- while subject[end] is pathSeparator 103 | 104 | # Get position of basePath of subject. 105 | basePos = subject.lastIndexOf(pathSeparator, end) 106 | 107 | #If no PathSeparator, no base path exist. 108 | return [] if (basePos is -1) 109 | 110 | # Get the number of folder in query 111 | depth = preparedQuery.depth 112 | 113 | # Get that many folder from subject 114 | while(depth-- > 0) 115 | basePos = subject.lastIndexOf(pathSeparator, basePos - 1) 116 | return [] if (basePos is -1) #consumed whole subject ? 117 | 118 | # Get basePath match 119 | basePos++ 120 | end++ 121 | computeMatch(subject[basePos ... end], subject_lw[basePos... end], preparedQuery, basePos) 122 | 123 | 124 | # 125 | # Combine two matches result and remove duplicate 126 | # (Assume sequences are sorted, matches are sorted by construction.) 127 | # 128 | 129 | mergeMatches = (a, b) -> 130 | m = a.length 131 | n = b.length 132 | 133 | return a.slice() if n is 0 134 | return b.slice() if m is 0 135 | 136 | i = -1 137 | j = 0 138 | bj = b[j] 139 | out = [] 140 | 141 | while ++i < m 142 | ai = a[i] 143 | 144 | while bj <= ai and ++j < n 145 | if bj < ai 146 | out.push bj 147 | bj = b[j] 148 | 149 | out.push ai 150 | 151 | while j < n 152 | out.push b[j++] 153 | 154 | return out 155 | 156 | #---------------------------------------------------------------------- 157 | 158 | # 159 | # Align sequence (used for fuzzaldrin.match) 160 | # Return position of subject characters that match query. 161 | # 162 | # Follow closely scorer.computeScore. 163 | # Except at each step we record what triggered the best score. 164 | # Then we trace back to output matched characters. 165 | # 166 | # Differences are: 167 | # - we record the best move at each position in a matrix, and finish by a traceback. 168 | # - we reset consecutive sequence if we do not take the match. 169 | # - no hit miss limit 170 | 171 | 172 | computeMatch = (subject, subject_lw, preparedQuery, offset = 0) -> 173 | query = preparedQuery.query 174 | query_lw = preparedQuery.query_lw 175 | 176 | m = subject.length 177 | n = query.length 178 | 179 | #this is like the consecutive bonus, but for camelCase / snake_case initials 180 | acro_score = scoreAcronyms(subject, subject_lw, query, query_lw).score 181 | 182 | #Init 183 | score_row = new Array(n) 184 | csc_row = new Array(n) 185 | 186 | # Directions constants 187 | STOP = 0 188 | UP = 1 189 | LEFT = 2 190 | DIAGONAL = 3 191 | 192 | #Traceback matrix 193 | trace = new Array(m * n) 194 | pos = -1 195 | 196 | #Fill with 0 197 | j = -1 #0..n-1 198 | while ++j < n 199 | score_row[j] = 0 200 | csc_row[j] = 0 201 | 202 | i = -1 #0..m-1 203 | while ++i < m #foreach char si of subject 204 | 205 | score = 0 206 | score_up = 0 207 | csc_diag = 0 208 | si_lw = subject_lw[i] 209 | 210 | j = -1 #0..n-1 211 | while ++j < n #foreach char qj of query 212 | 213 | #reset score 214 | csc_score = 0 215 | align = 0 216 | score_diag = score_up 217 | 218 | #Compute a tentative match 219 | if ( query_lw[j] is si_lw ) 220 | 221 | start = isWordStart(i, subject, subject_lw) 222 | 223 | # Forward search for a sequence of consecutive char 224 | csc_score = if csc_diag > 0 then csc_diag else 225 | scoreConsecutives(subject, subject_lw, query, query_lw, i, j, start) 226 | 227 | # Determine bonus for matching A[i] with B[j] 228 | align = score_diag + scoreCharacter(i, j, start, acro_score, csc_score) 229 | 230 | #Prepare next sequence & match score. 231 | score_up = score_row[j] # Current score_up is next run score diag 232 | csc_diag = csc_row[j] 233 | 234 | #In case of equality, moving UP get us closer to the start of the candidate string. 235 | if(score > score_up ) 236 | move = LEFT 237 | else 238 | score = score_up 239 | move = UP 240 | 241 | # Only take alignment if it's the absolute best option. 242 | if(align > score) 243 | score = align 244 | move = DIAGONAL 245 | else 246 | #If we do not take this character, break consecutive sequence. 247 | # (when consecutive is 0, it'll be recomputed) 248 | csc_score = 0 249 | 250 | score_row[j] = score 251 | csc_row[j] = csc_score 252 | trace[++pos] = if(score > 0) then move else STOP 253 | 254 | # ------------------- 255 | # Go back in the trace matrix 256 | # and collect matches (diagonals) 257 | 258 | i = m - 1 259 | j = n - 1 260 | pos = i * n + j 261 | backtrack = true 262 | matches = [] 263 | 264 | while backtrack and i >= 0 and j >= 0 265 | switch trace[pos] 266 | when UP 267 | i-- 268 | pos -= n 269 | when LEFT 270 | j-- 271 | pos-- 272 | when DIAGONAL 273 | matches.push(i + offset) 274 | j-- 275 | i-- 276 | pos -= n + 1 277 | else 278 | backtrack = false 279 | 280 | matches.reverse() 281 | return matches 282 | 283 | -------------------------------------------------------------------------------- /dist-browser/fuzzaldrin-plus.min.js: -------------------------------------------------------------------------------- 1 | /* fuzzaldrin-plus - v0.5.0 - @license: MIT; @author: Jean Christophe Roy; @site: https://github.com/jeancroy/fuzzaldrin-plus */ 2 | !function(a){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=a();else if("function"==typeof define&&define.amd)define([],a);else{var b;b="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,b.fuzzaldrin=a()}}(function(){return function a(b,c,d){function e(g,h){if(!c[g]){if(!b[g]){var i="function"==typeof require&&require;if(!h&&i)return i(g,!0);if(f)return f(g,!0);var j=new Error("Cannot find module '"+g+"'");throw j.code="MODULE_NOT_FOUND",j}var k=c[g]={exports:{}};b[g][0].call(k.exports,function(a){var c=b[g][1][a];return e(c?c:a)},k,k.exports,a,b,c,d)}return c[g].exports}for(var f="function"==typeof require&&require,g=0;g0?k:a.length+1,h=null!=j,n=r?d:f,s=0,t=a.length;s0&&(o.push({candidate:i,score:m}),!--p))));s++);return o.sort(g),a=o.map(e),null!=l&&(a=a.slice(0,l)),a}}).call(this)},{"./pathScorer":4,"./query":5,"./scorer":6}],2:[function(a,b,c){(function(c){(function(){var d,e,f,g,h,i,j,k;f=a("./filter"),g=a("./matcher"),k=a("./scorer"),i=a("./pathScorer"),d=a("./query"),j=null,e="win32"===("undefined"!=typeof c&&null!==c?c.platform:void 0)?"\\":"/",b.exports={filter:function(a,b,c){return null==c&&(c={}),(null!=b?b.length:void 0)&&(null!=a?a.length:void 0)?(c=h(c,b),f(a,b,c)):[]},score:function(a,b,c){return null==c&&(c={}),(null!=a?a.length:void 0)&&(null!=b?b.length:void 0)?(c=h(c,b),c.usePathScoring?i.score(a,b,c):k.score(a,b,c)):0},match:function(a,b,c){var d;return null==c&&(c={}),a&&b?a===b?function(){d=[];for(var b=0,c=a.length;0<=c?bc;0<=c?b++:b--)d.push(b);return d}.apply(this):(c=h(c,b),g.match(a,b,c)):[]},wrap:function(a,b,c){return null==c&&(c={}),a&&b?(c=h(c,b),g.wrap(a,b,c)):[]},prepareQuery:function(a,b){return null==b&&(b={}),b=h(b,a),b.preparedQuery}},h=function(a,b){return null==a.allowErrors&&(a.allowErrors=!1),null==a.usePathScoring&&(a.usePathScoring=!0),null==a.useExtensionBonus&&(a.useExtensionBonus=!1),null==a.pathSeparator&&(a.pathSeparator=e),null==a.optCharRegEx&&(a.optCharRegEx=null),null==a.wrap&&(a.wrap=null),null==a.preparedQuery&&(a.preparedQuery=j&&j.query===b?j:j=new d(b,a)),a}}).call(this)}).call(this,a("_process"))},{"./filter":1,"./matcher":3,"./pathScorer":4,"./query":5,"./scorer":6,_process:7}],3:[function(a,b,c){(function(){var b,d,e,f,g,h,i,j,k,l;l=a("./scorer"),e=l.isMatch,f=l.isWordStart,k=l.scoreConsecutives,j=l.scoreCharacter,i=l.scoreAcronyms,c.match=g=function(a,c,f){var g,i,j,k,l,m;return g=f.allowErrors,l=f.preparedQuery,k=f.pathSeparator,g||e(a,l.core_lw,l.core_up)?(m=a.toLowerCase(),j=d(a,m,l),0===j.length?j:(a.indexOf(k)>-1&&(i=b(a,m,l,k),j=h(j,i)),j)):[]},c.wrap=function(a,b,c){var d,e,f,h,i,j,k,l,m;if(null!=c.wrap&&(m=c.wrap,j=m.tagClass,l=m.tagOpen,k=m.tagClose),null==j&&(j="highlight"),null==l&&(l=''),null==k&&(k=""),a===b)return l+a+k;if(f=g(a,b,c),0===f.length)return a;for(h="",d=-1,i=0;++di&&(h+=a.substring(i,e),i=e);++di&&(h+=l,h+=a.substring(i,e),h+=k,i=e)}return i<=a.length-1&&(h+=a.substring(i)),h},b=function(a,b,c,e){var f,g,h;for(h=a.length-1;a[h]===e;)h--;if(f=a.lastIndexOf(e,h),f===-1)return[];for(g=c.depth;g-- >0;)if(f=a.lastIndexOf(e,f-1),f===-1)return[];return f++,h++,d(a.slice(f,h),b.slice(f,h),c,f)},h=function(a,b){var c,d,e,f,g,h,i;if(g=a.length,h=b.length,0===h)return a.slice();if(0===g)return b.slice();for(e=-1,f=0,d=b[f],i=[];++e0?p:k(a,b,z,A,s,t,G),n=C+j(s,t,G,m,r)),E=D[t],p=q[t],B>E?w=g:(B=E,w=l),n>B?(B=n,w=e):r=0,D[t]=B,q[t]=r,H[++y]=B>0?w:h;for(s=u-1,t=x-1,y=s*x+t,o=!0,v=[];o&&s>=0&&t>=0;)switch(H[y]){case l:s--,y-=x;break;case g:t--,y--;break;case e:v.push(s+d),t--,s--,y-=x+1;break;default:o=!1}return v.reverse(),v}}).call(this)},{"./scorer":6}],4:[function(a,b,c){(function(){var b,d,e,f,g,h,i,j,k,l;l=a("./scorer"),h=l.isMatch,b=l.computeScore,j=l.scoreSize,k=13,e=1.5,c.score=function(a,c,d){var e,f,g,j;return f=d.preparedQuery,e=d.allowErrors,e||h(a,f.core_lw,f.core_up)?(j=a.toLowerCase(),g=b(a,j,f),g=i(a,j,g,d),Math.ceil(g)):0},i=function(a,c,f,h){var i,l,m,n,o,p,q,r,s,t;if(0===f)return 0;for(s=h.preparedQuery,t=h.useExtensionBonus,r=h.pathSeparator,o=a.length-1;a[o]===r;)o--;if(m=a.lastIndexOf(r,o),q=o-m,p=1,t&&(p+=g(c,s.ext,m,o,2),f*=p),m===-1)return f;for(n=s.depth;m>-1&&n-- >0;)m=a.lastIndexOf(r,m-1);return l=m===-1?f:p*b(a.slice(m+1,o+1),c.slice(m+1,o+1),s),i=.5*k/(k+d(a,o+1,r)),i*l+(1-i)*f*j(0,e*q)},c.countDir=d=function(a,b,c){var d,e;if(b<1)return 0;for(d=0,e=-1;++ec))return 0;for(i=b.length,f=d-j,f0?.9*g(a,b,c,j-2,e-1):h/f}}).call(this)},{"./scorer":6}],5:[function(a,b,c){(function(){var c,d,e,f,g,h,i,j;j=a("./pathScorer"),e=j.countDir,g=j.getExtension,b.exports=c=function(){function a(a,b){var c,h,j;return j=null!=b?b:{},c=j.optCharRegEx,h=j.pathSeparator,a&&a.length?(this.query=a,this.query_lw=a.toLowerCase(),this.core=d(a,c),this.core_lw=this.core.toLowerCase(),this.core_up=i(this.core),this.depth=e(a,a.length,h),this.ext=g(this.query_lw),void(this.charCodes=f(this.query_lw))):null}return a}(),h=/[ _\-:\/\\]/g,d=function(a,b){return null==b&&(b=h),a.replace(b,"")},i=function(a){var b,c,d,e;for(c="",d=0,e=a.length;df)return!1;for(d=-1,e=-1;++e-1)return p(a,b,z,A,y,x,u);for(E=new Array(x),h=new Array(x),I=s(x,u),v=Math.ceil(j*x)+5,w=v,q=!0,t=-1;++tC&&(C=F),k=0,A[t]===G)if(H=i(r,a,b),k=g>0?g:n(a,b,z,A,r,t,H),f=D+m(r,t,H,e,k),f>C)C=f,w=v;else{if(B&&--w<=0)return Math.max(C,E[x-1])*I;B=!1}D=F,g=h[t],h[t]=k,E[t]=C}return C=E[x-1],C*I},c.isWordStart=i=function(a,b,c){var d,e;return 0===a||(d=b[a],e=b[a-1],g(e)||d!==c[a]&&e===c[a-1])},c.isWordEnd=h=function(a,b,c,d){var e,f;return a===d-1||(e=b[a],f=b[a+1],g(f)||e===c[a]&&f!==c[a+1])},g=function(a){return" "===a||"."===a||"-"===a||"_"===a||"/"===a||"\\"===a},r=function(a){var b;return ae?d:e)+10):f+u*e},c.scoreConsecutives=n=function(a,b,c,d,e,f,g){var i,j,k,l,m,n,o;for(j=a.length,l=c.length,k=j-e,m=l-f,i=k-1&&(n=i(l,a,b),n&&(e=l))),k=-1,m=0;++k1&&o>1))return d;for(j=0,t=0,u=0,r=0,l=-1,m=-1;++m-1){t++;continue}break}for(;++l12*h)return!1;for(f=-1;++fd)return!1;return!0}}).call(this)},{}],7:[function(a,b,c){function d(){throw new Error("setTimeout has not been defined")}function e(){throw new Error("clearTimeout has not been defined")}function f(a){if(l===setTimeout)return setTimeout(a,0);if((l===d||!l)&&setTimeout)return l=setTimeout,setTimeout(a,0);try{return l(a,0)}catch(b){try{return l.call(null,a,0)}catch(b){return l.call(this,a,0)}}}function g(a){if(m===clearTimeout)return clearTimeout(a);if((m===e||!m)&&clearTimeout)return m=clearTimeout,clearTimeout(a);try{return m(a)}catch(b){try{return m.call(null,a)}catch(b){return m.call(this,a)}}}function h(){q&&o&&(q=!1,o.length?p=o.concat(p):r=-1,p.length&&i())}function i(){if(!q){var a=f(h);q=!0;for(var b=p.length;b;){for(o=p,p=[];++r1)for(var c=1;c 37 | {preparedQuery, allowErrors} = options 38 | return 0 unless allowErrors or isMatch(string, preparedQuery.core_lw, preparedQuery.core_up) 39 | string_lw = string.toLowerCase() 40 | score = computeScore(string, string_lw, preparedQuery) 41 | return Math.ceil(score) 42 | 43 | 44 | # 45 | # isMatch: 46 | # Are all (non optional)characters of query in subject, in proper order ? 47 | # 48 | 49 | exports.isMatch = isMatch = (subject, query_lw, query_up) -> 50 | m = subject.length 51 | n = query_lw.length 52 | 53 | if !m or n > m 54 | return false 55 | 56 | i = -1 57 | j = -1 58 | 59 | #foreach char of query 60 | while ++j < n 61 | 62 | qj_lw = query_lw.charCodeAt j 63 | qj_up = query_up.charCodeAt j 64 | 65 | # continue walking the subject from where we have left with previous query char 66 | # until we have found a character that is either lowercase or uppercase query. 67 | while ++i < m 68 | si = subject.charCodeAt i 69 | break if si is qj_lw or si is qj_up 70 | 71 | # if we passed the last char, query is not in subject 72 | if i is m then return false 73 | 74 | #Found every char of query in subject in proper order, match is positive 75 | return true 76 | 77 | 78 | #---------------------------------------------------------------------- 79 | # 80 | # Main scoring algorithm 81 | # 82 | 83 | exports.computeScore = computeScore = (subject, subject_lw, preparedQuery) -> 84 | query = preparedQuery.query 85 | query_lw = preparedQuery.query_lw 86 | 87 | m = subject.length 88 | n = query.length 89 | 90 | 91 | #---------------------------- 92 | # Abbreviations sequence 93 | 94 | acro = scoreAcronyms(subject, subject_lw, query, query_lw) 95 | acro_score = acro.score 96 | 97 | # Whole query is abbreviation ? 98 | # => use that as score 99 | if( acro.count is n) 100 | return scoreExact(n, m, acro_score, acro.pos) 101 | 102 | #---------------------------- 103 | # Exact Match ? 104 | # => use that as score 105 | 106 | pos = subject_lw.indexOf(query_lw) 107 | if pos > -1 108 | return scoreExactMatch(subject, subject_lw, query, query_lw, pos, n, m) 109 | 110 | 111 | #---------------------------- 112 | # Individual characters 113 | # (Smith Waterman algorithm) 114 | 115 | 116 | # Init 117 | score_row = new Array(n) 118 | csc_row = new Array(n) 119 | sz = scoreSize(n, m) 120 | 121 | miss_budget = Math.ceil(miss_coeff * n) + 5 122 | miss_left = miss_budget 123 | csc_should_rebuild = true 124 | 125 | # Fill with 0 126 | j = -1 127 | while ++j < n 128 | score_row[j] = 0 129 | csc_row[j] = 0 130 | 131 | i = -1 132 | while ++i < m #foreach char si of subject 133 | si_lw = subject_lw[i] 134 | 135 | # if si_lw is not in query 136 | if not si_lw.charCodeAt(0) of preparedQuery.charCodes 137 | # reset csc_row and move to next subject char 138 | # unless we just cleaned it then keep cleaned version. 139 | if csc_should_rebuild 140 | j = -1 141 | while ++j < n 142 | csc_row[j] = 0 143 | csc_should_rebuild = false 144 | continue 145 | 146 | score = 0 147 | score_diag = 0 148 | csc_diag = 0 149 | record_miss = true 150 | csc_should_rebuild = true 151 | 152 | j = -1 #0..n-1 153 | while ++j < n #foreach char qj of query 154 | 155 | # What is the best gap ? 156 | # score_up contain the score of a gap in subject. 157 | # score_left = last iteration of score, -> gap in query. 158 | score_up = score_row[j] 159 | score = score_up if(score_up > score ) 160 | 161 | #Reset consecutive 162 | csc_score = 0 163 | 164 | #Compute a tentative match 165 | if ( query_lw[j] is si_lw ) 166 | 167 | start = isWordStart(i, subject, subject_lw) 168 | 169 | # Forward search for a sequence of consecutive char 170 | csc_score = if csc_diag > 0 then csc_diag else 171 | scoreConsecutives(subject, subject_lw, query, query_lw, i, j, start) 172 | 173 | # Determine bonus for matching A[i] with B[j] 174 | align = score_diag + scoreCharacter(i, j, start, acro_score, csc_score) 175 | 176 | #Are we better using this match or taking the best gap (currently stored in score)? 177 | if(align > score) 178 | score = align 179 | # reset consecutive missed hit count 180 | miss_left = miss_budget 181 | else 182 | # We rejected this match and record a miss. 183 | # If budget is exhausted exit 184 | # Each character of query have it's score history stored in score_row 185 | # To get full query score use last item of row. 186 | if(record_miss and --miss_left <= 0) then return Math.max(score, score_row[n - 1]) * sz 187 | 188 | record_miss = false 189 | 190 | 191 | #Prepare next sequence & match score. 192 | score_diag = score_up 193 | csc_diag = csc_row[j] 194 | csc_row[j] = csc_score 195 | score_row[j] = score 196 | 197 | # get hightest score so far 198 | score = score_row[n - 1] 199 | return score * sz 200 | 201 | # 202 | # Boundaries 203 | # 204 | # Is the character at the start of a word, end of the word, or a separator ? 205 | # Fortunately those small function inline well. 206 | # 207 | 208 | exports.isWordStart = isWordStart = (pos, subject, subject_lw) -> 209 | return true if pos is 0 # match is FIRST char ( place a virtual token separator before first char of string) 210 | curr_s = subject[pos] 211 | prev_s = subject[pos - 1] 212 | return isSeparator(prev_s) or # match FOLLOW a separator 213 | ( curr_s isnt subject_lw[pos] and prev_s is subject_lw[pos - 1] ) # match is Capital in camelCase (preceded by lowercase) 214 | 215 | 216 | exports.isWordEnd = isWordEnd = (pos, subject, subject_lw, len) -> 217 | return true if pos is len - 1 # last char of string 218 | curr_s = subject[pos] 219 | next_s = subject[pos + 1] 220 | return isSeparator(next_s) or # match IS FOLLOWED BY a separator 221 | ( curr_s is subject_lw[pos] and next_s isnt subject_lw[pos + 1] ) # match is lowercase, followed by uppercase 222 | 223 | 224 | isSeparator = (c) -> 225 | return c is ' ' or c is '.' or c is '-' or c is '_' or c is '/' or c is '\\' 226 | 227 | # 228 | # Scoring helper 229 | # 230 | 231 | scorePosition = (pos) -> 232 | if pos < pos_bonus 233 | sc = pos_bonus - pos 234 | return 100 + sc * sc 235 | else 236 | return Math.max(100 + pos_bonus - pos, 0) 237 | 238 | exports.scoreSize = scoreSize = (n, m) -> 239 | # Size penalty, use the difference of size (m-n) 240 | return tau_size / ( tau_size + Math.abs(m - n)) 241 | 242 | scoreExact = (n, m, quality, pos) -> 243 | return 2 * n * ( wm * quality + scorePosition(pos) ) * scoreSize(n, m) 244 | 245 | 246 | # 247 | # Shared scoring logic between exact match, consecutive & acronym 248 | # Ensure pattern length dominate the score then refine to take into account case-sensitivity 249 | # and structural quality of the pattern on the overall string (word boundary) 250 | # 251 | 252 | exports.scorePattern = scorePattern = (count, len, sameCase, start, end) -> 253 | sz = count 254 | 255 | bonus = 6 # to ensure consecutive length dominate score, this should be as large other bonus combined 256 | bonus += 2 if sameCase is count 257 | bonus += 3 if start 258 | bonus += 1 if end 259 | 260 | if count is len 261 | # when we match 100% of query we allow to break the size ordering. 262 | # This is to help exact match bubble up vs size, depth penalty etc 263 | if start 264 | if sameCase is len 265 | sz += 2 266 | else 267 | sz += 1 268 | if end 269 | bonus += 1 270 | 271 | return sameCase + sz * ( sz + bonus ) 272 | 273 | 274 | # 275 | # Compute the bonuses for two chars that are confirmed to matches in a case-insensitive way 276 | # 277 | 278 | exports.scoreCharacter = scoreCharacter = (i, j, start, acro_score, csc_score) -> 279 | 280 | # start of string / position of match bonus 281 | posBonus = scorePosition(i) 282 | 283 | # match IS a word boundary 284 | # choose between taking part of consecutive characters or consecutive acronym 285 | if start 286 | return posBonus + wm * ( (if acro_score > csc_score then acro_score else csc_score) + 10 ) 287 | 288 | # normal Match 289 | return posBonus + wm * csc_score 290 | 291 | 292 | # 293 | # Forward search for a sequence of consecutive character. 294 | # 295 | 296 | exports.scoreConsecutives = scoreConsecutives = (subject, subject_lw, query, query_lw, i, j, startOfWord) -> 297 | m = subject.length 298 | n = query.length 299 | 300 | mi = m - i 301 | nj = n - j 302 | k = if mi < nj then mi else nj 303 | 304 | sameCase = 0 305 | sz = 0 #sz will be one more than the last qi is sj 306 | 307 | # query_lw[i] is subject_lw[j] has been checked before entering now do case sensitive check. 308 | sameCase++ if (query[j] is subject[i]) 309 | 310 | #Continue while lowercase char are the same, record when they are case-sensitive match. 311 | while (++sz < k and query_lw[++j] is subject_lw[++i]) 312 | sameCase++ if (query[j] is subject[i]) 313 | 314 | 315 | # If we quit because of a non match 316 | # replace cursor to the last match 317 | if sz < k then i-- 318 | 319 | # Faster path for single match. 320 | # Isolated character match occurs often and are not really interesting. 321 | # Fast path so we don't compute expensive pattern score on them. 322 | # Acronym should be addressed with acronym context bonus instead of consecutive. 323 | return 1 + 2 * sameCase if sz is 1 324 | 325 | return scorePattern(sz, n, sameCase, startOfWord, isWordEnd(i, subject, subject_lw, m)) 326 | 327 | 328 | # 329 | # Compute the score of an exact match at position pos. 330 | # 331 | 332 | exports.scoreExactMatch = scoreExactMatch = (subject, subject_lw, query, query_lw, pos, n, m) -> 333 | 334 | # Test for word start 335 | start = isWordStart(pos, subject, subject_lw) 336 | 337 | # Heuristic 338 | # If not a word start, test next occurrence 339 | # - We want exact match to be fast 340 | # - For exact match, word start has the biggest impact on score. 341 | # - Testing 2 instances is somewhere between testing only one and testing every instances. 342 | 343 | if not start 344 | pos2 = subject_lw.indexOf(query_lw, pos + 1) 345 | if pos2 > -1 346 | start = isWordStart(pos2, subject, subject_lw) 347 | pos = pos2 if start 348 | 349 | #Exact case bonus. 350 | i = -1 351 | sameCase = 0 352 | while (++i < n) 353 | if (query[pos + i] is subject[i]) 354 | sameCase++ 355 | 356 | end = isWordEnd(pos + n - 1, subject, subject_lw, m) 357 | 358 | return scoreExact(n, m, scorePattern(n, n, sameCase, start, end), pos) 359 | 360 | 361 | # 362 | # Acronym prefix 363 | # 364 | 365 | class AcronymResult 366 | constructor: (@score, @pos, @count) -> 367 | 368 | emptyAcronymResult = new AcronymResult(0, 0.1, 0) 369 | 370 | exports.scoreAcronyms = scoreAcronyms = (subject, subject_lw, query, query_lw) -> 371 | m = subject.length 372 | n = query.length 373 | 374 | #a single char is not an acronym 375 | return emptyAcronymResult unless m > 1 and n > 1 376 | 377 | count = 0 378 | sepCount = 0 379 | sumPos = 0 380 | sameCase = 0 381 | 382 | i = -1 383 | j = -1 384 | 385 | #foreach char of query 386 | while ++j < n 387 | 388 | qj_lw = query_lw[j] 389 | 390 | # Separator will not score point but will continue the prefix when present. 391 | # Test that the separator is in the candidate and advance cursor to that position. 392 | # If no separator break the prefix 393 | 394 | if isSeparator(qj_lw) 395 | i = subject_lw.indexOf(qj_lw, i + 1) 396 | if i > -1 397 | sepCount++ 398 | continue 399 | else 400 | break 401 | 402 | # For other characters we search for the first match where subject[i] = query[j] 403 | # that also happens to be a start-of-word 404 | 405 | while ++i < m 406 | if qj_lw is subject_lw[i] and isWordStart(i, subject, subject_lw) 407 | sameCase++ if ( query[j] is subject[i] ) 408 | sumPos += i 409 | count++ 410 | break 411 | 412 | # All of subject is consumed, stop processing the query. 413 | if i is m then break 414 | 415 | 416 | # Here, all of query is consumed (or we have reached a character not in acronym) 417 | # A single character is not an acronym (also prevent division by 0) 418 | if(count < 2) 419 | return emptyAcronymResult 420 | 421 | # Acronym are scored as start-of-word 422 | # Unless the acronym is a 1:1 match with candidate then it is upgraded to full-word. 423 | fullWord = if count is n then isAcronymFullWord(subject, subject_lw, query, count) else false 424 | score = scorePattern(count, n, sameCase, true, fullWord) 425 | 426 | return new AcronymResult(score, sumPos / count, count + sepCount) 427 | 428 | 429 | # 430 | # Test whether there's a 1:1 relationship between query and acronym of candidate. 431 | # For that to happens 432 | # (a) All character of query must be matched to an acronym of candidate 433 | # (b) All acronym of candidate must be matched to a character of query. 434 | # 435 | # This method check for (b) assuming (a) has been checked before entering. 436 | 437 | isAcronymFullWord = (subject, subject_lw, query, nbAcronymInQuery) -> 438 | m = subject.length 439 | n = query.length 440 | count = 0 441 | 442 | # Heuristic: 443 | # Assume one acronym every (at most) 12 character on average 444 | # This filter out long paths, but then they can match on the filename. 445 | if (m > 12 * n) then return false 446 | 447 | i = -1 448 | while ++i < m 449 | #For each char of subject 450 | #Test if we have an acronym, if so increase acronym count. 451 | #If the acronym count is more than nbAcronymInQuery (number of non separator char in query) 452 | #Then we do not have 1:1 relationship. 453 | if isWordStart(i, subject, subject_lw) and ++count > nbAcronymInQuery then return false 454 | 455 | return true -------------------------------------------------------------------------------- /demo/movies.json: -------------------------------------------------------------------------------- 1 | ["A Nous la Liberte (1932)", 2 | "About Schmidt (2002)", 3 | "Absence of Malice (1981)", 4 | "Adam's Rib (1949)", 5 | "Adaptation (2002)", 6 | "The Adjuster (1991)", 7 | "The Adventures of Robin Hood (1938)", 8 | "Affliction (1998)", 9 | "The African Queen (1952)", 10 | "L'Age d'Or (1930, reviewed 1964)", 11 | "Aguirre, the Wrath of God (1972, reviewed 1977)", 12 | "A.I. (2001)", 13 | "Airplane! (1980)", 14 | "Aladdin (1992)", 15 | "Alexander Nevsky (1939)", 16 | "Alice Doesn't Live Here Anymore (1975)", 17 | "Alice's Restaurant (1969)", 18 | "Aliens (1986)", 19 | "All About Eve (1950)", 20 | "All About My Mother (1999)", 21 | "All Quiet on the Western Front (1930)", 22 | "All That Heaven Allows (1956)", 23 | "All the King's Men (1949)", 24 | "All the President's Men (1976)", 25 | "Amadeus (1984)", 26 | "Amarcord (1974)", 27 | "Amélie (2001)", 28 | "America, America (1963)", 29 | "The American Friend (1977)", 30 | "American Graffiti (1973)", 31 | "An American in Paris (1951)", 32 | "The Americanization of Emily (1964)", 33 | "American Movie (1999)", 34 | "Amores Perros (2000)", 35 | "Anastasia (1956)", 36 | "Anatomy of a Murder (1959)", 37 | "The Angry Silence (1960)", 38 | "Anna and the King of Siam (1946)", 39 | "Anna Christie (1930)", 40 | "Annie Hall (1977)", 41 | "The Apartment (1960)", 42 | "Apocalypse Now (1979)", 43 | "Apollo 13 (1995)", 44 | "The Apostle (1997)", 45 | "L'Argent (1983)", 46 | "Ashes and Diamonds (1958, reviewed 1961)", 47 | "Ashes and Diamonds (1958)", 48 | "The Asphalt Jungle (1950)", 49 | "L'Atalante (1934, reviewed 1947)", 50 | "Atlantic City (1981)", 51 | "Au Revoir Les Enfants (1988)", 52 | "L'Avventura (1961)", 53 | "The Awful Truth (1937)", 54 | "Babette's Feast (1987)", 55 | "Baby Doll (1956)", 56 | "Back to the Future (1985)", 57 | "The Bad and the Beautiful (1953)", 58 | "Bad Day at Black Rock (1955)", 59 | "Badlands (1973)", 60 | "The Baker's Wife (1940)", 61 | "Ball of Fire (1942)", 62 | "The Ballad of Cable Hogue (1970)", 63 | "Bambi (1942)", 64 | "The Band Wagon (1953)", 65 | "Bang the Drum Slowly (1973)", 66 | "The Bank Dick (1940)", 67 | "Barfly (1987)", 68 | "Barry Lyndon (1975)", 69 | "Barton Fink (1991)", 70 | "The Battle of Algiers (1965, reviewed 1967)", 71 | "Le Beau Mariage (1982)", 72 | "Beautiful People (2000)", 73 | "Beauty and the Beast (1947)", 74 | "Beauty and the Beast (1991)", 75 | "Bed and Board (1971)", 76 | "Beetlejuice (1988)", 77 | "Before Night Falls (2000)", 78 | "Before the Rain (1994, reviewed 1995)", 79 | "Being John Malkovich (1999)", 80 | "Being There (1979)", 81 | "Belle de Jour (1968)", 82 | "Ben-Hur (1959)", 83 | "Berlin Alexanderplatz (1983)", 84 | "The Best Years of Our Lives (1946)", 85 | "Beverly Hills Cop (1984)", 86 | "The Bicycle Thief (1949)", 87 | "The Big Chill (1983)", 88 | "The Big Clock (1948)", 89 | "The Big Deal on Madonna Street (1960)", 90 | "The Big Heat (1953)", 91 | "Big Night (1996)", 92 | "The Big Red One (1980)", 93 | "The Big Sky (1952)", 94 | "The Big Sleep (1946)", 95 | "Billy Liar (1963)", 96 | "Biloxi Blues (1988)", 97 | "The Birds (1963)", 98 | "Birdy (1984)", 99 | "Black Narcissus (1947)", 100 | "Black Orpheus (1959)", 101 | "Black Robe (1991)", 102 | "Blazing Saddles (1974)", 103 | "Bloody Sunday (2002)", 104 | "Blow-Up (1966)", 105 | "Blue Collar (1978)", 106 | "Blue Velvet (1986)", 107 | "Bob & Carol & Ted & Alice (1969)", 108 | "Bob le Flambeur (1955, reviewed 1981)", 109 | "Body Heat (1981)", 110 | "Bonnie and Clyde (1967)", 111 | "Boogie Nights (1997)", 112 | "Born on the Fourth of July (1989)", 113 | "Born Yesterday (1950)", 114 | "Le Boucher (1970)", 115 | "Bound for Glory (1976)", 116 | "Boys Don't Cry (1999)", 117 | "Boyz N the Hood (1991)", 118 | "Brazil (1985)", 119 | "Bread, Love and Dreams (1954)", 120 | "Breaker Morant (1980)", 121 | "The Breakfast Club (1985)", 122 | "Breaking Away (1979)", 123 | "Breaking the Waves (1996)", 124 | "Breathless (1961)", 125 | "The Bride Wore Black (1968)", 126 | "The Bridge on the River Kwai (1957)", 127 | "Brief Encounter (1946)", 128 | "A Brief History of Time (1992)", 129 | "Bringing Up Baby (1938)", 130 | "Broadcast News (1987)", 131 | "Brother's Keeper (1992)", 132 | "The Buddy Holly Story (1978)", 133 | "Bull Durham (1988)", 134 | "Bullitt (1968)", 135 | "Bus Stop (1956)", 136 | "Butch Cassidy and the Sundance Kid (1969)", 137 | "The Butcher Boy (1998)", 138 | "Bye Bye Brasil (1980)", 139 | "The Earrings of Madame De . . . (1954)", 140 | "Cabaret (1972)", 141 | "The Caine Mutiny (1954)", 142 | "California Suite (1978)", 143 | "Calle 54 (2000)", 144 | "Camelot (1967)", 145 | "Camille (1937)", 146 | "Captains Courageous (1937)", 147 | "Carmen Jones (1954)", 148 | "Carnal Knowledge (1971)", 149 | "Casablanca (1942)", 150 | "Cat on a Hot Tin Roof (1958)", 151 | "Catch-22 (1970)", 152 | "Cavalcade (1933)", 153 | "The Celebration (1998)", 154 | "La Cérémonie (1996)", 155 | "Chan Is Missing (1982)", 156 | "Chariots of Fire (1981)", 157 | "Charley Varrick (1973)", 158 | "Chicago (2002)", 159 | "Chicken Run (2000)", 160 | "La Chienne (1931, reviewed 1975)", 161 | "Chinatown (1974)", 162 | "Chloë in the Afternoon (1972)", 163 | "Chocolat (1988, reviewed 1989)", 164 | "The Cider House Rules (1999)", 165 | "The Citadel (1938)", 166 | "Citizen Kane (1941)", 167 | "Claire's Knee (1971)", 168 | "The Clockmaker (1973, reviewed 1976)", 169 | "A Clockwork Orange (1971)", 170 | "Close Encounters of the Third Kind (1977)", 171 | "Close-Up (1990, reviewed 1999)", 172 | "Clueless (1995)", 173 | "Coal Miner's Daughter (1980)", 174 | "The Color of Money (1986)", 175 | "Come Back, Little Sheba (1952)", 176 | "Coming Home (1978)", 177 | "The Conformist (1970)", 178 | "The Conquest of Everest (1953)", 179 | "Contempt (1964)", 180 | "The Conversation (1974)", 181 | "Cool Hand Luke (1967)", 182 | "The Count of Monte Cristo (1934)", 183 | "The Country Girl (1954)", 184 | "The Cousins (1959)", 185 | "The Cranes Are Flying (1960)", 186 | "Cries and Whispers (1972)", 187 | "Crossfire (1947)", 188 | "Crumb (1994)", 189 | "Cry, the Beloved Country (1952)", 190 | "The Crying Game (1992)", 191 | "Damn Yankees (1958)", 192 | "The Damned (1969)", 193 | "Dance with a Stranger (1985)", 194 | "Dangerous Liaisons (1988)", 195 | "Daniel (1983)", 196 | "Danton (1983)", 197 | "Dark Eyes (1987)", 198 | "Dark Victory (1939)", 199 | "Darling (1965)", 200 | "David Copperfield (1935)", 201 | "David Holtzman's Diary (1968, reviewed 1973)", 202 | "Dawn of the Dead (1979)", 203 | "Day for Night (1973)", 204 | "The Day of the Jackal (1973)", 205 | "The Day the Earth Stood Still (1951)", 206 | "Days of Heaven (1978)", 207 | "Days of Wine and Roses (1963)", 208 | "The Dead (1987)", 209 | "Dead Calm (1989)", 210 | "Dead End (1937)", 211 | "Dead Man Walking (1995)", 212 | "Dead of Night (1946, reviewed 1946)", 213 | "Dead Ringers (1988)", 214 | "Death in Venice (1971)", 215 | "Death of a Salesman (1951)", 216 | "The Decalogue (2000)", 217 | "Deep End (1971)", 218 | "The Deer Hunter (1978)", 219 | "The Defiant Ones (1958)", 220 | "Deliverance (1972)", 221 | "Desperately Seeking Susan (1985)", 222 | "Destry Rides Again (1939)", 223 | "Diabolique (1955)", 224 | "Dial M for Murder (1954)", 225 | "Diary of a Chambermaid (1964)", 226 | "Diary of a Country Priest (1950, reviewed 1954)", 227 | "Die Hard (1988)", 228 | "Diner (1982)", 229 | "Dinner at Eight (1933)", 230 | "The Dirty Dozen (1967)", 231 | "Dirty Harry (1971)", 232 | "Dirty Rotten Scoundrels (1988)", 233 | "The Discreet Charm of the Bourgeoisie (1972)", 234 | "Disraeli (1929)", 235 | "Distant Thunder (1973)", 236 | "Diva (1982)", 237 | "Divorce-Italian Style (1962)", 238 | "Do the Right Thing (1989)", 239 | "Dr. Jekyll and Mr. Hyde (1932)", 240 | "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)", 241 | "Doctor Zhivago (1965)", 242 | "Dodsworth (1936)", 243 | "La Dolce Vita (1961)", 244 | "Donnie Brasco (1997)", 245 | "Don't Look Back (1967)", 246 | "Double Indemnity (1944)", 247 | "Down by Law (1986)", 248 | "Dracula (1931)", 249 | "The Dreamlife of Angels (1998)", 250 | "Dressed to Kill (1980)", 251 | "The Dresser (1983)", 252 | "Driving Miss Daisy (1989)", 253 | "Drowning by Numbers (1991)", 254 | "Drugstore Cowboy (1989)", 255 | "Duck Soup (1933)", 256 | "The Duellists (1978)", 257 | "Dumbo (1941)", 258 | "The Earrings of Madame De . . .", 259 | "East of Eden (1955)", 260 | "Easy Living (1937)", 261 | "Eat Drink Man Woman (1994)", 262 | "Effi Briest (1977)", 263 | "8 1/2 (1963)", 264 | "Eight Men Out (1988)", 265 | "The Elephant Man (1980)", 266 | "Elmer Gantry (1960)", 267 | "Empire of the Sun (1987)", 268 | "Enemies, A Love Story (1989)", 269 | "Les Enfants du Paradis (1945, reviewed 1947)", 270 | "The English Patient (1996)", 271 | "The Entertainer (1960)", 272 | "Entre Nous (1983)", 273 | "E.T. the Extra-Terrestrial (1982)", 274 | "Europa, Europa (1991)", 275 | "Every Man for Himself (1980)", 276 | "The Exorcist (1973)", 277 | "The Exterminating Angel (1967)", 278 | "A Face in the Crowd (1957)", 279 | "Face to Face (1976)", 280 | "Faces (1968)", 281 | "The Family Game (1984)", 282 | "Fanny & Alexander (1983)", 283 | "Fantasia (1940)", 284 | "Farewell, My Concubine (1993)", 285 | "Far from Heaven (2002)", 286 | "Fargo (1996)", 287 | "Fast, Cheap & Out of Control (1997)", 288 | "Fast Runner (Atanarjuat) (2002)", 289 | "Fat City (1972)", 290 | "Fatal Attraction (1987)", 291 | "Father of the Bride (1950)", 292 | "Fellini Satyricon (1970)", 293 | "La Femme Infidèle (1969)", 294 | "La Femme Nikita (1991)", 295 | "The Fisher King (1991)", 296 | "Fist in His Pocket (1968)", 297 | "Fitzcarraldo (1982)", 298 | "Five Easy Pieces (1970)", 299 | "The Flamingo Kid (1984)", 300 | "The Fly (1958)", 301 | "The Flamingo Kid (1984)", 302 | "Force of Evil (1948)", 303 | "For Whom the Bell Tolls (1943)", 304 | "Forbidden Games (1952)", 305 | "A Foreign Affair (1948)", 306 | "The Fortune Cookie (1966)", 307 | "The 400 Blows (1959)", 308 | "Frankenstein (1931)", 309 | "The French Connection (1971)", 310 | "Frenzy (1972)", 311 | "Friendly Persuasion (1956)", 312 | "From Here to Eternity (1953)", 313 | "The Fugitive (1947)", 314 | "Full Metal Jacket (1987)", 315 | "The Full Monty (1997)", 316 | "Funny Face (1957)", 317 | "Funny Girl (1968)", 318 | "Fury (1936)", 319 | "Gallipoli (1981)", 320 | "Gandhi (1982)", 321 | "Gangs of New York (2002)", 322 | "The Garden of the Finzi-Continis (1971)", 323 | "Gas Food Lodging (1992)", 324 | "Gaslight (1944)", 325 | "Gate of Hell (1954)", 326 | "A Geisha (1978)", 327 | "The General (1998)", 328 | "General Della Rovere (1960)", 329 | "Genevieve (1954)", 330 | "Gentlemen Prefer Blondes (1953)", 331 | "Georgy Girl (1966)", 332 | "Get Carter (1971)", 333 | "Get Out Your Handkerchiefs (1978)", 334 | "Ghost World (2001)", 335 | "Giant (1956)", 336 | "Gigi (1958)", 337 | "Gimme Shelter (1970)", 338 | "The Girl Can't Help It (1956)", 339 | "Girl with a Suitcase (1961)", 340 | "The Gleaners and I (2001)", 341 | "The Goalie's Anxiety at the Penalty Kick (1977)", 342 | "The Go-Between (1971)", 343 | "The Godfather (1972)", 344 | "The Godfather Part II (1974)", 345 | "Going My Way (1944)", 346 | "Goldfinger (1964)", 347 | "Gone With the Wind (1939)", 348 | "The Good, the Bad and the Ugly (1968)", 349 | "The Good Earth (1937)", 350 | "Goodbye, Mr. Chips (1939)", 351 | "GoodFellas (1990)", 352 | "Gosford Park (2001)", 353 | "The Graduate (1967)", 354 | "Grand Hotel (1932)", 355 | "Grand Illusion (1938)", 356 | "The Grapes of Wrath (1940)", 357 | "The Great Dictator (1940)", 358 | "Great Expectations (1947)", 359 | "The Great Man (1957)", 360 | "The Great McGinty (1940)", 361 | "The Greatest Show on Earth (1952)", 362 | "Green for Danger (1947)", 363 | "Gregory's Girl (1982)", 364 | "The Grifters (1990)", 365 | "Groundhog Day (1993)", 366 | "The Gunfighter (1950)", 367 | "Gunga Din (1939)", 368 | "Hail the Conquering Hero (1944)", 369 | "Hair (1979)", 370 | "Hamlet (1948)", 371 | "Hamlet (2000)", 372 | "Handle With Care (1977)", 373 | "Hannah and Her Sisters (1986)", 374 | "Happiness (1998)", 375 | "A Hard Day's Night (1964)", 376 | "Harlan County, USA (1976)", 377 | "Harry and Tonto (1974)", 378 | "A Hatful of Rain (1957)", 379 | "The Heartbreak Kid (1972)", 380 | "Heartland (1981)", 381 | "Hearts of Darkness: A Filmmaker's Apocalypse (1991)", 382 | "Heat and Dust (1983)", 383 | "Heathers (1989)", 384 | "Heavy Traffic (1973)", 385 | "Heimat (1985)", 386 | "The Heiress (1949)", 387 | "Henry V (1946)", 388 | "Henry V (1989)", 389 | "Henry Fool (1998)", 390 | "Here Comes Mr. Jordan (1941)", 391 | "High and Low (Japan) (1963)", 392 | "The High and the Mighty (1954)", 393 | "High Art (1998)", 394 | "High Hopes (1988)", 395 | "High Noon (1952)", 396 | "High Sierra (1941)", 397 | "The Hill (1965)", 398 | "Hiroshima Mon Amour (1960)", 399 | "His Girl Friday (1940)", 400 | "The Homecoming (1973)", 401 | "Hoop Dreams (1994)", 402 | "Hope and Glory (1987)", 403 | "Hotel Terminus: Klaus Barbie et son Temps (1988)", 404 | "The Hours (2002)", 405 | "Household Saints (1993)", 406 | "House of Games (1987)", 407 | "How Green Was My Valley (1941)", 408 | "How to Marry a Millionaire (1953)", 409 | "Howards End (1992)", 410 | "Hud (1963)", 411 | "Ken Burns' America: Huey Long (1985)", 412 | "Husbands and Wives (1992)", 413 | "The Hustler (1961)", 414 | "I Know Where I'm Going! (1947)", 415 | "I Remember Mama (1948)", 416 | "I Want to Live! (1958)", 417 | "If... (1969)", 418 | "Ikiru (1952, reviewed 1960)", 419 | "I'm All Right Jack (1960)", 420 | "Imitation of Life (1959)", 421 | "In Cold Blood (1967)", 422 | "In the Bedroom (2001)", 423 | "In the Heat of the Night (1967)", 424 | "The Informer (1935)", 425 | "Inherit the Wind (1960)", 426 | "The Insider (1999)", 427 | "Internal Affairs (1990)", 428 | "The Ipcress File (1965)", 429 | "It Happened One Night (1934)", 430 | "It's a Gift (1935)", 431 | "It's a Wonderful Life (1946)", 432 | "Jailhouse Rock (1957)", 433 | "Jaws (1975)", 434 | "The Jazz Singer (1927)", 435 | "Jean de Florette (1987)", 436 | "Jerry Maguire (1996)", 437 | "Johnny Guitar (1954)", 438 | "The Judge and the Assassin (1982)", 439 | "Judgment at Nuremberg (1961)", 440 | "Ju Dou (1990)", 441 | "Jules and Jim (1962)", 442 | "Juliet of the Spirits (1965)", 443 | "Junior Bonner (1972)", 444 | "Kagemusha (1980)", 445 | "The Killers (1946)", 446 | "The Killing Fields (1984)", 447 | "Kind Hearts and Coronets (1950)", 448 | "The King and I (1956)", 449 | "King Kong (1933)", 450 | "King Lear (1971)", 451 | "The King of Comedy (1983)", 452 | "The King of Marvin Gardens (1972)", 453 | "Kiss of the Spider Woman (1985)", 454 | "Klute (1971)", 455 | "Knife in the Water (1963)", 456 | "Kramer vs. Kramer (1979)", 457 | "L.A. Confidential (1997)", 458 | "Lacombe Lucien (1974)", 459 | "The Lady Eve (1941)", 460 | "The Lady Vanishes (1938)", 461 | "Ladybird, Ladybird (1994)", 462 | "Lamerica (1994, reviewed 1995)", 463 | "The Last American Hero (1973)", 464 | "The Last Emperor (1987)", 465 | "The Last Metro (1980)", 466 | "The Last Picture Show (1971)", 467 | "The Last Seduction (1994)", 468 | "Last Tango in Paris (1973)", 469 | "The Last Temptation of Christ (1988)", 470 | "The Last Waltz (1978)", 471 | "Laura (1944)", 472 | "The Lavender Hill Mob (1951)", 473 | "Lawrence of Arabia (1962)", 474 | "A League of Their Own (1992)", 475 | "Leaving Las Vegas (1995)", 476 | "The Leopard (1963)", 477 | "The Letter (1963)", 478 | "A Letter to Three Wives (1949)", 479 | "Les Liaisons Dangereuses 1960 (1961)", 480 | "The Life and Death of Colonel Blimp (1945)", 481 | "Life Is Sweet (1991)", 482 | "The Life of Emile Zola (1937)", 483 | "Life With Father (1947)", 484 | "Like Water for Chocolate (1992, reviewed 1993)", 485 | "Lili (1953)", 486 | "Little Big Man (1970)", 487 | "Little Caesar (1931)", 488 | "The Little Foxes (1941)", 489 | "The Little Fugitive (1953)", 490 | "The Little Kidnappers (1954)", 491 | "Little Vera (1988, reviewed 1989)", 492 | "Little Women (1933)", 493 | "Little Women (1994)", 494 | "The Lives of a Bengal Lancer (1935)", 495 | "Living in Oblivion (1995)", 496 | "Local Hero (1983)", 497 | "Lola (1982)", 498 | "Lola Montès (1968)", 499 | "Lolita (1962)", 500 | "Lone Star (1996)", 501 | "The Loneliness of the Long Distance Runner (1962)", 502 | "Long Day's Journey into Night (1962)", 503 | "The Long Goodbye (1973)", 504 | "The Long Good Friday (1982)", 505 | "The Long Voyage Home (1940)", 506 | "The Longest Day (1962)", 507 | "Look Back in Anger (1959)", 508 | "Lost Horizon (1937)", 509 | "Lost in America (1985)", 510 | "The Lost Weekend (1945)", 511 | "Love (1973)", 512 | "Love Affair (1939)", 513 | "Love and Death (1975)", 514 | "A Love in Germany (1984)", 515 | "Love in the Afternoon (1957)", 516 | "Lovely and Amazing (2002)", 517 | "Love on the Run (1979)", 518 | "Lover Come Back (1962)", 519 | "The Lovers (1959)", 520 | "Loves of a Blonde (1966)", 521 | "Loving (1970)", 522 | "Lust for Life (1956)", 523 | "M (1931, reviewed 1933)", 524 | "Mad Max (1980)", 525 | "The Madness of King George (1994)", 526 | "The Magic Flute (1975)", 527 | "The Major and the Minor (1942)", 528 | "Major Barbara (1941)", 529 | "Make Way for Tomorrow (1937)", 530 | "Malcolm X (1992)", 531 | "The Maltese Falcon (1941)", 532 | "A Man for All Seasons (1966)", 533 | "Man Hunt (1941)", 534 | "The Man Who Came to Dinner (1942)", 535 | "The Man Who Loved Women (1977)", 536 | "The Man Who Wasn't There (2001)", 537 | "The Man With the Golden Arm (1955)", 538 | "The Manchurian Candidate (1962)", 539 | "Manhattan (1979)", 540 | "Manon of the Spring (1987)", 541 | "Marriage Italian Style (1964)", 542 | "The Marriage of Maria Braun (1979)", 543 | "Married to the Mob (1988)", 544 | "The Marrying Kind (1952)", 545 | "Marty (1955)", 546 | "Mary Poppins (1964)", 547 | "M*A*S*H (1970)", 548 | "The Match Factory Girl (1990)", 549 | "Mayerling (1937)", 550 | "McCabe & Mrs. Miller (1971)", 551 | "Mean Streets (1973)", 552 | "Meet Me in St. Louis (1944)", 553 | "Melvin and Howard (1980)", 554 | "Memories of Underdevelopment (1973)", 555 | "The Memory of Justice (1976)", 556 | "The Men (1950)", 557 | "Ménage (1986)", 558 | "Metropolitan (1990)", 559 | "Midnight (1939)", 560 | "Midnight Cowboy (1969)", 561 | "Minnie and Moskowitz (1971)", 562 | "The Miracle of Morgan's Creek (1944)", 563 | "Miracle on 34th Street (1947)", 564 | "The Miracle Worker (1962)", 565 | "Les Miserables (1935)", 566 | "The Misfits (1961)", 567 | "Missing (1982)", 568 | "Mr. and Mrs. Bridge (1990)", 569 | "Mr. Deeds Goes to Town (1936)", 570 | "Mr. Hulot's Holiday (1954)", 571 | "Mister Roberts (1955)", 572 | "Mr. Smith Goes to Washington (1939)", 573 | "Mrs. Miniver (1942)", 574 | "Mon Oncle d'Amérique (1980)", 575 | "Mona Lisa (1986)", 576 | "Monsieur Verdoux (1947, reviewed 1964)", 577 | "Monsters, Inc. (2001)", 578 | "Moonlighting (1982)", 579 | "Moonstruck (1987)", 580 | "The More the Merrier (1943)", 581 | "Morgan! (1966)", 582 | "The Mortal Storm (1940)", 583 | "Mother (1996)", 584 | "Moulin Rouge (1953)", 585 | "The Mouthpiece (1932)", 586 | "Much Ado About Nothing (1993)", 587 | "Mulholland Dr. (2001)", 588 | "Murmur of the Heart (1971)", 589 | "Mutiny on the Bounty (1935)", 590 | "My Beautiful Laundrette (1986)", 591 | "My Darling Clementine (1946)", 592 | "My Dinner With Andre (1981)", 593 | "My Fair Lady (1964)", 594 | "My Left Foot (1989)", 595 | "My Life as a Dog (1987)", 596 | "My Man Godfrey (1936)", 597 | "My Night at Maud's (1969)", 598 | "My Own Private Idaho (1991)", 599 | "My 20th Century (1990)", 600 | "Mon Oncle (1958)", 601 | "The Naked Gun: From the Files of Police Squad! (1988)", 602 | "Nashville (1975)", 603 | "National Lampoon's Animal House (1978)", 604 | "National Velvet (1944)", 605 | "Network (1976)", 606 | "Never on Sunday (1960)", 607 | "Night Moves (1975)", 608 | "The Night of the Hunter (1955)", 609 | "Night of the Living Dead (1968)", 610 | "A Night to Remember (1958)", 611 | "A Nightmare on Elm Street (1984)", 612 | "1900 (1977)", 613 | "Ninotchka (1939)", 614 | "Nobody's Fool (1994)", 615 | "Norma Rae (1979)", 616 | "North by Northwest (1959)", 617 | "Nothing But the Best (1964)", 618 | "Notorious (1946)", 619 | "Now, Voyager (1942)", 620 | "La Nuit De Varennes (1983)", 621 | "The Nun's Story (1959)", 622 | "Odd Man Out (1947)", 623 | "Of Mice and Men (1940)", 624 | "Oklahoma! (1955)", 625 | "Oliver Twist (1951)", 626 | "Los Olvidados (1950, reviewed 1952)", 627 | "On the Beach (1959)", 628 | "On the Town (1949)", 629 | "On the Waterfront (1954)", 630 | "One False Move (1992)", 631 | "One Flew Over the Cuckoo's Nest (1975)", 632 | "One Foot in Heaven (1941)", 633 | "One Hour with You (1932)", 634 | "One Night of Love (1934)", 635 | "One Potato, Two Potato (1964)", 636 | "One, Two, Three (1961)", 637 | "Only Angels Have Wings (1939)", 638 | "Open City (1946)", 639 | "Operation Crossbow (1965)", 640 | "The Opposite of Sex (1998)", 641 | "Ordinary People (1980)", 642 | "Ossessione (1942, reviewed 1976)", 643 | "Othello (1952, reviewed 1955)", 644 | "Our Town (1940)", 645 | "Out of the Past (1947)", 646 | "The Outlaw Josey Wales (1976)", 647 | "The Overlanders (1946)", 648 | "The Ox-Bow Incident (1943)", 649 | "Paint Your Wagon (1969)", 650 | "Paisan (1948)", 651 | "The Palm Beach Story (1942)", 652 | "The Parallax View (1974)", 653 | "A Passage to India (1984)", 654 | "The Passion of Anna (1970)", 655 | "Pather Panchali (1958)", 656 | "Paths of Glory (1957)", 657 | "Patton (1970)", 658 | "The Pawnbroker (1965)", 659 | "Payday (1973)", 660 | "Pelle the Conqueror (1988)", 661 | "The People Vs. Larry Flynt (1996)", 662 | "Persona (1967)", 663 | "Persuasion (1995)", 664 | "Le Petit Theatre de Jean Renoir (1974)", 665 | "Petulia (1968)", 666 | "The Philadelphia Story (1940)", 667 | "The Pianist (2002)", 668 | "The Piano (1993)", 669 | "Pickup on South Street (1953)", 670 | "The Pillow Book (1997)", 671 | "Pillow Talk (1959)", 672 | "The Pink Panther (1964)", 673 | "Pinocchio (1940)", 674 | "Pixote (1981)", 675 | "A Place in the Sun (1951)", 676 | "Places in the Heart (1984)", 677 | "Platoon (1986)", 678 | "Play Misty for Me (1971)", 679 | "The Player (1992)", 680 | "Playtime (1967, reviewed 1973)", 681 | "Point Blank (1967)", 682 | "Poltergeist (1982)", 683 | "Ponette (1997)", 684 | "Il Postino (The Postman) (1994)", 685 | "The Postman Always Rings Twice (1946)", 686 | "Pretty Baby (1978)", 687 | "Pride and Prejudice (1940)", 688 | "The Pride of the Yankees (1942)", 689 | "Prince of the City (1981)", 690 | "The Prisoner (1955)", 691 | "The Private Life of Henry VIII (1933)", 692 | "Prizzi's Honor (1985)", 693 | "The Producers (1968)", 694 | "Psycho (1960)", 695 | "The Public Enemy (1931)", 696 | "Pulp Fiction (1994)", 697 | "The Purple Rose of Cairo (1985)", 698 | "Pygmalion (1938)", 699 | "Quadrophenia (1979)", 700 | "The Quiet Man (1952)", 701 | "Raging Bull (1980)", 702 | "Raiders of the Lost Ark (1981)", 703 | "Rain Man (1988)", 704 | "Raise the Red Lantern (1991, reviewed 1992)", 705 | "Raising Arizona (1987)", 706 | "Ran (1985)", 707 | "The Rapture (1991)", 708 | "Rashomon (1951)", 709 | "Re-Animator (1985)", 710 | "Rear Window (1954)", 711 | "Rebecca (1940)", 712 | "Rebel Without a Cause (1955)", 713 | "Red (1994)", 714 | "The Red Badge of Courage (1951)", 715 | "Red River (1948)", 716 | "The Red Shoes (1948)", 717 | "Reds (1981)", 718 | "The Remains of the Day (1993)", 719 | "Repo Man (1984)", 720 | "Repulsion (1965)", 721 | "Reservoir Dogs (1992)", 722 | "The Return of Martin Guerre (1983)", 723 | "Reuben, Reuben (1983)", 724 | "Reversal of Fortune (1990)", 725 | "Richard III (1956)", 726 | "Ride the High Country (1962)", 727 | "Rififi (1956)", 728 | "The Right Stuff (1983)", 729 | "Risky Business (1983)", 730 | "River's Edge (1987)", 731 | "The Road Warrior (1982)", 732 | "Robocop (1987)", 733 | "Rocco and His Brothers (1960, reviewed 1961)", 734 | "Roger & Me (1989)", 735 | "Roman Holiday (1953)", 736 | "Romeo and Juliet (1936)", 737 | "Romeo and Juliet (1968)", 738 | "Room at the Top (1959)", 739 | "A Room With a View (1986)", 740 | "The Rose Tattoo (1955)", 741 | "Rosemary's Baby (1968)", 742 | "'Round Midnight (1986)", 743 | "Ruggles of Red Gap (1935)", 744 | "The Rules of the Game (1939, reviewed 1950 and 1961)", 745 | "The Ruling Class (1972)", 746 | "Rushmore (1998)", 747 | "Ruthless People (1986)", 748 | "Sahara (1943)", 749 | "Salaam Bombay! (1988)", 750 | "Salesman (1969)", 751 | "Sanjuro (1963)", 752 | "Sansho the Bailiff (1969)", 753 | "Saturday Night and Sunday Morning (1961)", 754 | "Saturday Night Fever (1977)", 755 | "Saving Private Ryan (1998)", 756 | "Say Anything... (1989)", 757 | "Sayonara (1957)", 758 | "Scenes From a Marriage (1974)", 759 | "Schindler's List (1993)", 760 | "The Scoundrel (1935)", 761 | "The Search (1948)", 762 | "The Searchers (1956)", 763 | "Secret Honor (1985)", 764 | "Secrets and Lies (1996)", 765 | "Sense and Sensibility (1995)", 766 | "Sergeant York (1941)", 767 | "Serpico (1973)", 768 | "The Servant (1963, reviewed 1964)", 769 | "The Set-Up (1949)", 770 | "Seven Beauties (1976)", 771 | "Seven Brides for Seven Brothers (1954)", 772 | "Seven Days to Noon (1950)", 773 | "The Seven Samurai (1956)", 774 | "7 Up/28 Up (1985)", 775 | "The Seven Year Itch (1955)", 776 | "The Seventh Seal (1958)", 777 | "Sex, Lies and Videotape (1989)", 778 | "Sexy Beast (2001)", 779 | "Shadow of a Doubt (1943)", 780 | "Shaft (1971)", 781 | "Shakespeare in Love (1998)", 782 | "Shane (1953)", 783 | "She Wore a Yellow Ribbon (1949)", 784 | "Sherman's March (1986)", 785 | "She's Gotta Have It (1986)", 786 | "The Shining (1980)", 787 | "Ship of Fools (1965)", 788 | "Shoah (1985)", 789 | "Shock Corridor (1963)", 790 | "Shoeshine (1947)", 791 | "Shoot the Piano Player (1962)", 792 | "The Shooting Party (1985)", 793 | "The Shootist (1976)", 794 | "The Shop Around the Corner (1940)", 795 | "The Shop on Main Street (1966)", 796 | "A Shot in the Dark (1964)", 797 | "Shrek (2001)", 798 | "Sid and Nancy (1986)", 799 | "The Silence (1964)", 800 | "The Silence of the Lambs (1991)", 801 | "The Silent World (1956)", 802 | "Silk Stockings (1957)", 803 | "Silkwood (1983)", 804 | "Singin' in the Rain (1952)", 805 | "Sitting Pretty (1948)", 806 | "Sleeper (1973)", 807 | "A Slight Case of Murder (1938)", 808 | "Smash Palace (1982)", 809 | "Smile (1975)", 810 | "Smiles of a Summer Night (1956, reviewed 1957)", 811 | "The Snake Pit (1948)", 812 | "Snow White and the Seven Dwarfs (1938)", 813 | "Some Like It Hot (1959)", 814 | "The Sorrow and the Pity (Le Chagrin et la Pitié) (1971)", 815 | "The Sound of Music (1965)", 816 | "South Pacific (1958)", 817 | "Spartacus (1960)", 818 | "Spellbound (1945)", 819 | "The Spiral Staircase (1946)", 820 | "Spirited Away (2002)", 821 | "Splendor in the Grass (1961)", 822 | "Stage Door (1937)", 823 | "Stagecoach (1939)", 824 | "Stairway to Heaven (1946)", 825 | "Stalag 17 (1953)", 826 | "A Star Is Born (1937)", 827 | "Star Trek II: The Wrath of Khan (1982)", 828 | "Star Wars (1977)", 829 | "Starman (1984)", 830 | "The Stars Look Down (1941)", 831 | "State Fair (1933)", 832 | "Stevie (1981)", 833 | "Stolen Kisses (1969)", 834 | "Stop Making Sense (1984)", 835 | "Stormy Monday (1988)", 836 | "The Story of Adèle H. (1975)", 837 | "The Story of G.I. Joe (1945)", 838 | "The Story of Qiu Ju (1992)", 839 | "Story of Women (1989)", 840 | "Storytelling (2001)", 841 | "La Strada (1956)", 842 | "The Straight Story (1999)", 843 | "Straight Time (1978)", 844 | "Stranger Than Paradise (1984)", 845 | "Strangers on a Train (1951)", 846 | "Straw Dogs (1971)", 847 | "A Streetcar Named Desire (1951)", 848 | "Stroszek (1977)", 849 | "Suddenly, Last Summer (1959)", 850 | "The Sugarland Express (1974)", 851 | "Sullivan's Travels (1941)", 852 | "Summer (1986)", 853 | "Summertime (1955)", 854 | "Sunday Bloody Sunday (1971)", 855 | "Sundays and Cybele (1962)", 856 | "Sunset Boulevard (1950)", 857 | "Suspicion (1941)", 858 | "The Sweet Hereafter (1997)", 859 | "Sweet Smell of Success (1957)", 860 | "Sweet Sweetback's Baadasssss Song (1971)", 861 | "Swept Away (By an Unusual Destiny in the Blue Sea of August) (1974)", 862 | "Swing Time (1936)", 863 | "The Taking of Pelham One Two Three (1974)", 864 | "Talk to Her (2002)", 865 | "Tampopo (1986)", 866 | "Taste of Cherry (1997)", 867 | "A Taste of Honey (1961, reviewed 1962)", 868 | "Taxi Driver (1976)", 869 | "A Taxing Woman (1987)", 870 | "A Taxing Woman's Return (1988)", 871 | "Tell Them Willie Boy Is Here (1969)", 872 | "10 (1979)", 873 | "The Ten Commandments (1956)", 874 | "Tender Mercies (1983)", 875 | "The Tender Trap (1955)", 876 | "Terms of Endearment (1983)", 877 | "La Terra trema (1947, reviewed 1965)", 878 | "Tess (1980)", 879 | "That Obscure Object of Desire (1977)", 880 | "That's Life! (1986)", 881 | "Thelma & Louise (1991)", 882 | "These Three (1936)", 883 | "They Live by Night (1949)", 884 | "They Shoot Horses, Don't They? (1969)", 885 | "They Were Expendable (1945)", 886 | "They Won't Forget (1937)", 887 | "The Thief of Bagdad (1940)", 888 | "The Thin Blue Line (1988)", 889 | "The Thin Man (1934)", 890 | "The Thin Red Line (1998)", 891 | "The Third Generation (1979, reviewed 1980)", 892 | "The Third Man (1949)", 893 | "The Thirty-Nine Steps (1935)", 894 | "Thirty Two Short Films About Glenn Gould (1994)", 895 | "This Is Spinal Tap (1984)", 896 | "The Man Must Die (1970)", 897 | "This Sporting Life (1963)", 898 | "Three Comrades (1938)", 899 | "Three Days of the Condor (1975)", 900 | "Throne of Blood (1957)", 901 | "Tight Little Island (1949)", 902 | "The Tin Drum (1979)", 903 | "To Be or Not to Be (1942)", 904 | "To Catch a Thief (1955)", 905 | "To Have and Have Not (1944)", 906 | "To Kill a Mockingbird (1962)", 907 | "To Live (1994)", 908 | "Tokyo Story (1953)", 909 | "Tom Jones (1963)", 910 | "Tootsie (1982)", 911 | "Top Hat (1935)", 912 | "Topaz (1969)", 913 | "Topkapi (1964)", 914 | "Total Recall (1990)", 915 | "Touch of Evil (1958)", 916 | "Toy Story (1995)", 917 | "Traffic (2000)", 918 | "The Train (1965)", 919 | "Trainspotting (1996)", 920 | "The Treasure of the Sierra Madre (1948)", 921 | "A Tree Grows in Brooklyn (1945)", 922 | "The Tree of the Wooden Clogs (1979)", 923 | "The Trip to Bountiful (1985)", 924 | "Tristana (1970)", 925 | "Trouble in Paradise (1932)", 926 | "The Trouble with Harry (1955)", 927 | "True Grit (1969)", 928 | "True Love (1989)", 929 | "Trust (1991)", 930 | "Tunes of Glory (1960)", 931 | "12 Angry Men (1957)", 932 | "Twelve O'Clock High (1949)", 933 | "Twentieth Century (1934)", 934 | "Two English Girls (1971)", 935 | "The Two of Us (1968)", 936 | "2001: A Space Odyssey (1968)", 937 | "Two Women (1961)", 938 | "Ugetsu (1954)", 939 | "Ulzana's Raid (1972)", 940 | "Umberto D. (1952)", 941 | "The Unbearable Lightness of Being (1988)", 942 | "Unforgiven (1992)", 943 | "The Usual Suspects (1995)", 944 | "Vanya on 42nd Street (1994)", 945 | "The Verdict (1982)", 946 | "Vertigo (1958)", 947 | "Videodrome (1982)", 948 | "Violette Nozière (1978)", 949 | "Viridiana (1962)", 950 | "Viva Zapata! (1952)", 951 | "The Voice of the Turtle (1947)", 952 | "The Wages of Fear (1955)", 953 | "Waking Life (2001)", 954 | "Walkabout (1971)", 955 | "A Walk in the Sun (1945)", 956 | "The War Game (1966)", 957 | "The War of the Roses (1989)", 958 | "The Warriors (1979)", 959 | "Watch on the Rhine (1943)", 960 | "The Waterdance (1991)", 961 | "The Way We Were (1973)", 962 | "Weekend (1968)", 963 | "Welcome to the Dollhouse (1996)", 964 | "The Well-Digger's Daughter (1941)", 965 | "West Side Story (1961)", 966 | "The Whales of August (1987)", 967 | "What Ever Happened to Baby Jane? (1962)", 968 | "What's Eating Gilbert Grape (1993)", 969 | "What's Up, Doc? (1972)", 970 | "When Harry Met Sally (1989)", 971 | "White Heat (1949)", 972 | "Who Framed Roger Rabbit (1988)", 973 | "Who's Afraid of Virginia Woolf? (1966)", 974 | "The Wild Bunch (1969)", 975 | "The Wild Child (1970)", 976 | "Wild Reeds (1994)", 977 | "Wild Strawberries (1959)", 978 | "Wilson (1944)", 979 | "Wings of Desire (1988)", 980 | "Wise Blood (1979)", 981 | "The Wizard of Oz (1939)", 982 | "Woman in the Dunes (1964)", 983 | "Woman of the Year (1942)", 984 | "The Women (1939)", 985 | "Women in Love (1970)", 986 | "Women on the Verge of a Nervous Breakdown (1988)", 987 | "Woodstock (1970)", 988 | "Working Girl (1988)", 989 | "The World of Apu (1959, reviewed 1960)", 990 | "The World of Henry Orient (1964)", 991 | "Written on the Wind (1956)", 992 | "Wuthering Heights (1939)", 993 | "Yankee Doodle Dandy (1942)", 994 | "The Year of Living Dangerously (1982)", 995 | "The Yearling (1983)", 996 | "Yellow Submarine (1968)", 997 | "Yi Yi: A One and a Two (2000)", 998 | "Yojimbo (1961)", 999 | "You Can Count On Me (2000)", 1000 | "You Only Live Once (1937)", 1001 | "Young Frankenstein (1974)", 1002 | "Young Mr. Lincoln (1939)", 1003 | "Y Tu Mamá También (2001)", 1004 | "Z (1969)", 1005 | "Zero for Conduct (1933)"] -------------------------------------------------------------------------------- /spec/filter-spec.coffee: -------------------------------------------------------------------------------- 1 | path = require 'path' 2 | {filter,score} = require '../src/fuzzaldrin' 3 | 4 | bestMatch = (candidates, query, options = {}) -> 5 | 6 | {debug} = options 7 | 8 | if debug? 9 | console.log("\n = Against query: #{query} = ") 10 | console.log(" #{score(c, query)}: #{c}") for c in candidates 11 | 12 | filter(candidates, query, options)[0] 13 | 14 | rootPath = (segments...) -> 15 | joinedPath = if process.platform is 'win32' then 'C:\\' else '/' 16 | #joinedPath = '/' 17 | 18 | for segment in segments 19 | if segment is path.sep 20 | joinedPath += segment 21 | else 22 | joinedPath = path.join(joinedPath, segment) 23 | joinedPath 24 | 25 | describe "filtering", -> 26 | 27 | it "returns an array of the most accurate results", -> 28 | candidates = ['Gruntfile', 'filter', 'bile', null, '', undefined] 29 | expect(filter(candidates, 'file')).toEqual ['Gruntfile', 'filter'] 30 | 31 | it "require all character to be present", -> 32 | candidates = ["Application:Hide"] 33 | expect(filter(candidates, 'help')).toEqual [] 34 | 35 | it "support unicode character with different length uppercase", -> 36 | 37 | candidates = ["Bernauer Stra\u00DFe Wall"] # Bernauer Straße Wall 38 | expect(filter(candidates, 'Stra\u00DFe Wall')).toEqual candidates 39 | # before correction, The map ß->SS , place the W out of sync and prevent a match. 40 | # After correction we map ß->S. 41 | 42 | describe "when the maxResults option is set", -> 43 | it "limits the results to the result size", -> 44 | candidates = ['Gruntfile', 'filter', 'bile'] 45 | expect(bestMatch(candidates, 'file')).toBe 'Gruntfile' 46 | 47 | 48 | #--------------------------------------------------- 49 | # 50 | # Exact match 51 | # 52 | 53 | describe "when query is an exact match", -> 54 | 55 | it "prefer match at word boundary (string limit)", -> 56 | candidates = ['0gruntfile0', 'gruntfile0', '0gruntfile'] 57 | expect(bestMatch(candidates, 'file')).toBe candidates[2] 58 | expect(bestMatch(candidates, 'grunt')).toBe candidates[1] 59 | 60 | it "prefer match at word boundary (separator limit)", -> 61 | candidates = ['0gruntfile0', 'hello gruntfile0', '0gruntfile world'] 62 | expect(bestMatch(candidates, 'file')).toBe candidates[2] 63 | expect(bestMatch(candidates, 'grunt')).toBe candidates[1] 64 | 65 | it "prefer match at word boundary (camelCase limit)", -> 66 | candidates = ['0gruntfile0', 'helloGruntfile0', '0gruntfileWorld'] 67 | expect(bestMatch(candidates, 'file')).toBe candidates[2] 68 | expect(bestMatch(candidates, 'grunt')).toBe candidates[1] 69 | 70 | 71 | it "it ranks full-word > start-of-word > end-of-word > middle-of-word > split > scattered letters", -> 72 | 73 | candidates = [ 74 | 'controller x', 75 | '0_co_re_00 x', 76 | '0core0_000 x', 77 | '0core_0000 x', 78 | '0_core0_00 x', 79 | '0_core_000 x' 80 | ] 81 | 82 | result = filter(candidates, 'core') 83 | expect(result[0]).toBe candidates[5] 84 | expect(result[1]).toBe candidates[4] 85 | expect(result[2]).toBe candidates[3] 86 | expect(result[3]).toBe candidates[2] 87 | expect(result[4]).toBe candidates[1] 88 | expect(result[5]).toBe candidates[0] 89 | 90 | # Also as part of a multiword query 91 | result = filter(candidates, 'core x') 92 | expect(result[0]).toBe candidates[5] 93 | expect(result[1]).toBe candidates[4] 94 | expect(result[2]).toBe candidates[3] 95 | expect(result[3]).toBe candidates[2] 96 | expect(result[4]).toBe candidates[1] 97 | expect(result[5]).toBe candidates[0] 98 | 99 | it "rank middle of word case-insensitive match better than complete word not quite exact match (sequence length is king)", -> 100 | 101 | candidates = [ 102 | 'ZFILEZ', 103 | 'fil e' 104 | ] 105 | 106 | expect(bestMatch(candidates, 'file')).toBe candidates[0] 107 | 108 | it "prefer smaller haystack", -> 109 | 110 | candidates = [ 111 | 'core_', 112 | 'core' 113 | ] 114 | 115 | expect(bestMatch(candidates, 'core')).toBe candidates[1] 116 | 117 | 118 | it "prefer match at the start of the string", -> 119 | 120 | candidates = [ 121 | 'data_core', 122 | 'core_data' 123 | ] 124 | 125 | expect(bestMatch(candidates, 'core')).toBe candidates[1] 126 | 127 | candidates = [ 128 | 'hello_data_core', 129 | 'hello_core_data' 130 | ] 131 | 132 | expect(bestMatch(candidates, 'core')).toBe candidates[1] 133 | 134 | it "prefer single letter start-of-word exact match vs longer query", -> 135 | 136 | candidates = [ 137 | 'Timecop: View', 138 | 'Markdown Preview: Copy Html' 139 | ] 140 | expect(bestMatch(candidates, 'm')).toBe candidates[1] 141 | 142 | candidates = [ 143 | 'Welcome: Show', 144 | 'Markdown Preview: Toggle Break On Newline' 145 | ] 146 | expect(bestMatch(candidates, 'm')).toBe candidates[1] 147 | 148 | candidates = [ 149 | 'TODO', 150 | path.join('doc', 'README') 151 | ] 152 | expect(bestMatch(candidates, 'd')).toBe candidates[1] 153 | 154 | it "can select a better occurrence that happens latter in string", -> 155 | 156 | candidates = [ 157 | 'Test Espanol', 158 | 'Portuges' 159 | ] 160 | expect(bestMatch(candidates, 'es')).toBe candidates[0] 161 | 162 | 163 | #--------------------------------------------------- 164 | # 165 | # Consecutive letters 166 | # 167 | 168 | describe "when query match in multiple group", -> 169 | 170 | it "ranks full-word > start-of-word > end-of-word > middle-of-word > scattered letters", -> 171 | 172 | candidates = [ 173 | 'model-controller.x' 174 | 'model-0core0-000.x' 175 | 'model-0core-0000.x' 176 | 'model-0-core0-00.x' 177 | 'model-0-core-000.x' 178 | ] 179 | 180 | result = filter(candidates, 'modelcore') 181 | expect(result[0]).toBe candidates[4] 182 | expect(result[1]).toBe candidates[3] 183 | expect(result[2]).toBe candidates[2] 184 | expect(result[3]).toBe candidates[1] 185 | expect(result[4]).toBe candidates[0] 186 | 187 | result = filter(candidates, 'modelcorex') 188 | expect(result[0]).toBe candidates[4] 189 | expect(result[1]).toBe candidates[3] 190 | expect(result[2]).toBe candidates[2] 191 | expect(result[3]).toBe candidates[1] 192 | expect(result[4]).toBe candidates[0] 193 | 194 | 195 | it "ranks full-word > start-of-word > end-of-word > middle-of-word > scattered letters (VS directory depth)", -> 196 | 197 | candidates = [ 198 | path.join('model', 'controller.x'), 199 | path.join('0', 'model', '0core0_0.x'), 200 | path.join('0', '0', 'model', '0core_00.x'), 201 | path.join('0', '0', '0', 'model', 'core0_00.x'), 202 | path.join('0', '0', '0', '0', 'model', 'core_000.x') 203 | ] 204 | 205 | result = filter(candidates, 'model core') 206 | expect(result[0]).toBe candidates[4] 207 | expect(result[1]).toBe candidates[3] 208 | expect(result[2]).toBe candidates[2] 209 | expect(result[3]).toBe candidates[1] 210 | expect(result[4]).toBe candidates[0] 211 | 212 | result = filter(candidates, 'model core x') 213 | expect(result[0]).toBe candidates[4] 214 | expect(result[1]).toBe candidates[3] 215 | expect(result[2]).toBe candidates[2] 216 | expect(result[3]).toBe candidates[1] 217 | expect(result[4]).toBe candidates[0] 218 | 219 | 220 | it "weighs consecutive character higher than scattered letters", -> 221 | 222 | candidates = [ 223 | 'application.rb' 224 | 'application_controller' 225 | ] 226 | expect(bestMatch(candidates, 'acon')).toBe candidates[1] 227 | 228 | it "prefers larger group of consecutive letter", -> 229 | 230 | #Here all group score the same context (full word). 231 | 232 | candidates = [ 233 | 'ab cd ef', 234 | ' abc def', 235 | ' abcd ef', 236 | ' abcde f', 237 | ' abcdef' 238 | ] 239 | 240 | result = filter(candidates, 'abcdef') 241 | expect(result[0]).toBe candidates[4] 242 | expect(result[1]).toBe candidates[3] 243 | expect(result[2]).toBe candidates[2] 244 | expect(result[3]).toBe candidates[1] 245 | expect(result[4]).toBe candidates[0] 246 | 247 | it "prefers larger group of consecutive letter VS better context", -> 248 | 249 | #Only apply when EVERY lowe quality context group are longer or equal length 250 | 251 | candidates = [ 252 | 'ab cd ef', # 3 x 2 253 | '0abc0def0' # 2 x 3 254 | ] 255 | 256 | expect(bestMatch(candidates, 'abcdef')).toBe candidates[1] 257 | 258 | candidates = [ 259 | 'ab cd ef', # 2 x 2 + 2 260 | '0abcd0ef0' # 1 x 4 + 2 261 | ] 262 | 263 | expect(bestMatch(candidates, 'abcdef')).toBe candidates[1] 264 | 265 | it "allows consecutive character in path overcome deeper path", -> 266 | 267 | candidates = [ 268 | path.join('controller', 'app.rb') 269 | path.join('controller', 'core', 'app.rb') 270 | ] 271 | expect(bestMatch(candidates, 'core app')).toBe candidates[1] 272 | 273 | it "weighs matches at the start of the string or base name higher", -> 274 | 275 | expect(bestMatch(['a_b_c', 'a_b'], 'ab')).toBe 'a_b' 276 | expect(bestMatch(['z_a_b', 'a_b'], 'ab')).toBe 'a_b' 277 | expect(bestMatch(['a_b_c', 'c_a_b'], 'ab')).toBe 'a_b_c' 278 | 279 | 280 | #--------------------------------------------------- 281 | # 282 | # Acronym + Case Sensitivity 283 | # 284 | 285 | describe "when the entries contains mixed case", -> 286 | 287 | it "weighs exact case matches higher", -> 288 | candidates = ['statusurl', 'StatusUrl'] 289 | expect(bestMatch(candidates, 'Status')).toBe 'StatusUrl' 290 | expect(bestMatch(candidates, 'status')).toBe 'statusurl' 291 | expect(bestMatch(candidates, 'statusurl')).toBe 'statusurl' 292 | expect(bestMatch(candidates, 'StatusUrl')).toBe 'StatusUrl' 293 | 294 | it "accounts for case while selecting an acronym", -> 295 | candidates = ['statusurl', 'status_url', 'StatusUrl'] 296 | expect(bestMatch(candidates, 'SU')).toBe 'StatusUrl' 297 | expect(bestMatch(candidates, 'su')).toBe 'status_url' 298 | expect(bestMatch(candidates, 'st')).toBe 'statusurl' 299 | 300 | it "weighs exact case matches higher", -> 301 | 302 | candidates = ['Diagnostic', 'diagnostics0000'] 303 | expect(bestMatch(candidates, 'diag')).toBe candidates[1] 304 | expect(bestMatch(candidates, 'diago')).toBe candidates[1] 305 | 306 | candidates = ['download_thread', 'DownloadTask'] 307 | expect(bestMatch(candidates, 'down')).toBe candidates[0] 308 | expect(bestMatch(candidates, 'downt')).toBe candidates[0] 309 | expect(bestMatch(candidates, 'downta')).toBe candidates[1] 310 | expect(bestMatch(candidates, 'dt')).toBe candidates[0] 311 | expect(bestMatch(candidates, 'DT')).toBe candidates[1] 312 | 313 | it "weighs case sentitive matches higher Vs directory depth", -> 314 | 315 | candidates = [path.join('0', 'Diagnostic'), path.join('0', '0', '0', 'diagnostics00')] 316 | expect(bestMatch(candidates, 'diag')).toBe candidates[1] 317 | expect(bestMatch(candidates, 'diago')).toBe candidates[1] 318 | 319 | 320 | it "weighs abbreviation matches after spaces, underscores, and dashes the same", -> 321 | expect(bestMatch(['sub-zero', 'sub zero', 'sub_zero'], 'sz')).toBe 'sub-zero' 322 | expect(bestMatch(['sub zero', 'sub_zero', 'sub-zero'], 'sz')).toBe 'sub zero' 323 | expect(bestMatch(['sub_zero', 'sub-zero', 'sub zero'], 'sz')).toBe 'sub_zero' 324 | 325 | 326 | it "weighs acronym matches higher than middle of word letter", -> 327 | 328 | candidates = [ 329 | 'FilterFactors.html', 330 | 'FilterFactorTests.html' 331 | ] 332 | 333 | # Alignment match "t" of factor preventing to score "T" of Test 334 | expect(bestMatch(candidates, 'FFT')).toBe 'FilterFactorTests.html' 335 | 336 | it "prefers longer acronym to a smaller case sensitive one", -> 337 | 338 | candidates = [ 339 | 'efficient' 340 | 'fun fact' 341 | 'FileFactory' 342 | 'FilterFactorTests.html' 343 | ] 344 | 345 | # fun fact is case-sensitive match for fft, but the t of fact is not an acronym 346 | expect(bestMatch(candidates, 'fft')).toBe 'FilterFactorTests.html' 347 | expect(bestMatch(candidates, 'ff')).toBe 'fun fact' 348 | expect(bestMatch(candidates, 'FF')).toBe 'FileFactory' 349 | 350 | it "weighs acronym matches higher than middle of word exact match", -> 351 | 352 | candidates = [ 353 | 'switch.css', 354 | 'ImportanceTableCtrl.js' 355 | ] 356 | expect(bestMatch(candidates, 'itc')).toBe candidates[1] 357 | expect(bestMatch(candidates, 'ITC')).toBe candidates[1] 358 | 359 | it "allows to select between snake_case and CamelCase using case of query", -> 360 | 361 | candidates = [ 362 | 'switch.css', 363 | 'user_id_to_client', 364 | 'ImportanceTableCtrl.js' 365 | ] 366 | expect(bestMatch(candidates, 'itc')).toBe candidates[1] 367 | expect(bestMatch(candidates, 'ITC')).toBe candidates[2] 368 | 369 | 370 | it "prefers CamelCase that happens sooner", -> 371 | 372 | candidates = [ 373 | 'anotherCamelCase', 374 | 'thisCamelCase000', 375 | ] 376 | 377 | #We test once for exact acronym then for general purpose match. 378 | expect(bestMatch(candidates, 'CC')).toBe candidates[1] 379 | expect(bestMatch(candidates, 'CCs')).toBe candidates[1] 380 | 381 | it "prefers CamelCase in shorter haystack", -> 382 | 383 | candidates = [ 384 | 'CamelCase0', 385 | 'CamelCase', 386 | ] 387 | expect(bestMatch(candidates, 'CC')).toBe candidates[1] 388 | expect(bestMatch(candidates, 'CCs')).toBe candidates[1] 389 | 390 | it "allows CamelCase to match across words", -> 391 | 392 | candidates = [ 393 | 'Gallas', 394 | 'Git Plus: Add All', #skip the Plus, still get bonus. 395 | ] 396 | expect(bestMatch(candidates, 'gaa')).toBe candidates[1] 397 | 398 | it "allows CamelCase to match even outside of acronym prefix", -> 399 | 400 | candidates = [ 401 | 'Git Plus: Stash Save', 402 | 'Git Plus: Add And Commit', 403 | 'Git Plus: Add All', 404 | ] 405 | 406 | result = filter(candidates, 'git AA') 407 | expect(result[0]).toBe candidates[2] 408 | expect(result[1]).toBe candidates[1] 409 | expect(result[2]).toBe candidates[0] 410 | 411 | result = filter(candidates, 'git aa') 412 | expect(result[0]).toBe candidates[2] 413 | expect(result[1]).toBe candidates[1] 414 | expect(result[2]).toBe candidates[0] 415 | 416 | 417 | it "accounts for match structure in CamelCase vs Substring matches", -> 418 | 419 | candidates = [ 420 | 'Input: Select All', 421 | 'Application: Install' 422 | ] 423 | 424 | expect(bestMatch(candidates, 'install')).toBe candidates[1] 425 | expect(bestMatch(candidates, 'isa')).toBe candidates[0] 426 | expect(bestMatch(candidates, 'isall')).toBe candidates[0] 427 | 428 | candidates = [ 429 | 'Git Plus: Stage Hunk', 430 | 'Git Plus: Push' 431 | ] 432 | 433 | expect(bestMatch(candidates, 'push')).toBe candidates[1] 434 | expect(bestMatch(candidates, 'git push')).toBe candidates[1] 435 | expect(bestMatch(candidates, 'psh')).toBe candidates[0] 436 | 437 | # expect(bestMatch(candidates, 'git PSH')).toBe candidates[0] 438 | # expect(bestMatch(candidates, 'git psh')).toBe candidates[0] 439 | # 440 | # not yet supported, because we only scan acronym structure on the start of the query (acronym prefix) :( 441 | # it might be possible to handle uppercase playing with case sensitivity instead of structure. 442 | 443 | 444 | it "accounts for case in CamelCase vs Substring matches", -> 445 | 446 | candidates = [ 447 | 'CamelCaseClass.js', 448 | 'cccManagerUI.java' 449 | ] 450 | 451 | #We test once for exact acronym 452 | expect(bestMatch(candidates, 'CCC')).toBe candidates[0] 453 | expect(bestMatch(candidates, 'ccc')).toBe candidates[1] 454 | 455 | #then for general purpose match. 456 | expect(bestMatch(candidates, 'CCCa')).toBe candidates[0] 457 | expect(bestMatch(candidates, 'ccca')).toBe candidates[1] 458 | 459 | it "prefers acronym matches that correspond to the full candidate acronym", -> 460 | candidates = [ 461 | 'JaVaScript', 462 | 'JavaScript' 463 | ] 464 | 465 | # scores better than 466 | expect(bestMatch(candidates, 'js')).toBe candidates[1] 467 | 468 | candidates = [ 469 | 'JSON', 470 | 'J.S.O.N.', 471 | 'JavaScript' 472 | ] 473 | 474 | # here 1:1 match outdo shorter start-of-word 475 | expect(bestMatch(candidates, 'js')).toBe candidates[2] 476 | 477 | candidates = [ 478 | 'CSON', 479 | 'C.S.O.N.', 480 | 'CoffeeScript' 481 | ] 482 | 483 | # here 1:1 match outdo shorter start-of-word 484 | expect(bestMatch(candidates, 'cs')).toBe candidates[2] 485 | 486 | 487 | #--------------------------------------------------- 488 | # 489 | # Path / Fuzzy finder 490 | # 491 | 492 | describe "when the entries contains slashes", -> 493 | 494 | it "weighs basename matches higher", -> 495 | 496 | candidates = [ 497 | rootPath('bar', 'foo') 498 | rootPath('foo', 'bar') 499 | ] 500 | expect(bestMatch(candidates, 'bar')).toBe candidates[1] 501 | expect(bestMatch(candidates, 'br')).toBe candidates[1] 502 | expect(bestMatch(candidates, 'b')).toBe candidates[1] 503 | 504 | candidates = [ 505 | path.join('foo', 'bar'), 506 | 'foobar' 507 | 'FooBar' 508 | 'foo_bar' 509 | 'foo bar' 510 | ] 511 | expect(bestMatch(candidates, 'bar')).toBe candidates[0] 512 | expect(bestMatch(candidates, 'br')).toBe candidates[0] 513 | expect(bestMatch(candidates, 'b')).toBe candidates[0] 514 | 515 | it "prefers shorter basename", -> 516 | 517 | # here full path is same size, but basename is smaller 518 | candidates = [ 519 | path.join('test', 'core_'), 520 | path.join('test', '_core'), 521 | path.join('test_', 'core'), 522 | ] 523 | 524 | expect(bestMatch(candidates, 'core')).toBe candidates[2] 525 | 526 | candidates = [ 527 | path.join('app', 'components', 'admin', 'member', 'modals', 'edit-payment.html'), 528 | path.join('app', 'components', 'admin', 'member', 'edit', 'edit.html'), 529 | path.join('app', 'components', 'admin', 'member', 'modals', 'edit-paykent.html'), 530 | ] 531 | 532 | expect(bestMatch(candidates, 'member edit htm')).toBe candidates[1] 533 | expect(bestMatch(candidates, 'member edit html')).toBe candidates[1] 534 | 535 | expect(bestMatch(candidates, 'edit htm')).toBe candidates[1] 536 | expect(bestMatch(candidates, 'edit html')).toBe candidates[1] 537 | 538 | 539 | 540 | 541 | it "prefers matches that are together in the basename (even if basename is longer)", -> 542 | 543 | candidates = [ 544 | path.join('tests', 'buyers', 'orders_e2e.js'), 545 | path.join('tests', 'buyers', 'users-addresses_e2e.js') 546 | ] 547 | 548 | expect(bestMatch(candidates, 'us_e2')).toBe candidates[1] 549 | 550 | 551 | candidates = [ 552 | path.join('app', 'controllers', 'match_controller.rb'), 553 | path.join('app', 'controllers', 'application_controller.rb') 554 | ] 555 | 556 | expect(bestMatch(candidates, 'appcontr')).toBe candidates[1] 557 | expect(bestMatch(candidates, 'appcontro')).toBe candidates[1] 558 | #expect(bestMatch(candidates, 'appcontrol', debug:true)).toBe candidates[1] 559 | #expect(bestMatch(candidates, 'appcontroll', debug:true)).toBe candidates[1] #Also look at issue #6 560 | 561 | 562 | it "allows to select using folder name", -> 563 | 564 | candidates = [ 565 | path.join('model', 'core', 'spec.rb') 566 | path.join('model', 'controller.rb') 567 | ] 568 | 569 | expect(bestMatch(candidates, 'model core')).toBe candidates[0] 570 | expect(bestMatch(candidates, path.join('model', 'core'))).toBe candidates[0] 571 | 572 | it "weighs basename matches higher than folder name", -> 573 | 574 | candidates = [ 575 | path.join('model', 'core', 'spec.rb') 576 | path.join('spec', 'model', 'core.rb') 577 | ] 578 | 579 | expect(bestMatch(candidates, 'model core')).toBe candidates[1] 580 | expect(bestMatch(candidates, path.join('model', 'core'))).toBe candidates[1] 581 | 582 | it "allows to select using acronym in path", -> 583 | 584 | candidates = [ 585 | path.join('app', 'controller', 'admin_controller') 586 | path.join('app', 'asset', 'javascript_admin') 587 | ] 588 | 589 | expect(bestMatch(candidates, 'acadmin')).toBe candidates[0] 590 | 591 | candidates = [ 592 | path.join('app', 'controller', 'macabre_controller') 593 | path.join('app', 'controller', 'articles_controller') 594 | ] 595 | 596 | expect(bestMatch(candidates, 'aca')).toBe candidates[1] 597 | 598 | it "weighs exact basename matches higher than acronym in path", -> 599 | 600 | candidates = [ 601 | path.join('c', 'o', 'r', 'e', 'foobar') 602 | path.join('f', 'o', 'o', 'b', 'a', 'r', 'core') 603 | ] 604 | 605 | expect(bestMatch(candidates, 'core')).toBe candidates[1] 606 | expect(bestMatch(candidates, 'foo')).toBe candidates[0] 607 | 608 | it "prefers file of the specified extension when useExtensionBonus is true ", -> 609 | 610 | candidates = [ 611 | path.join('meas_astrom', 'include', 'Isst', 'meas', 'astrom', 'matchOptimisticB.h') 612 | path.join('IsstDoxygen', 'html', 'match_optimistic_b_8cc.html') 613 | ] 614 | 615 | expect(bestMatch(candidates, 'mob.h', {useExtensionBonus: true})).toBe candidates[0] 616 | 617 | candidates = [ 618 | path.join('matchOptimisticB.htaccess') 619 | path.join('matchOptimisticB_main.html') 620 | ] 621 | 622 | expect(bestMatch(candidates, 'mob.ht', {useExtensionBonus: true})).toBe candidates[1] 623 | 624 | # Not clear this is the best example. Broken for something more important. 625 | # uit "support file with multiple extension", -> 626 | # candidates = [ 627 | # path.join('something-foobar.class') 628 | # path.join('something.class.php') 629 | # ] 630 | # 631 | # expect(bestMatch(candidates, 'some.cl', {useExtensionBonus: true})).toBe candidates[1] 632 | 633 | 634 | it "ignores trailing slashes", -> 635 | 636 | candidates = [ 637 | rootPath('bar', 'foo') 638 | rootPath('foo', 'bar', path.sep, path.sep, path.sep, path.sep, path.sep) 639 | ] 640 | expect(bestMatch(candidates, 'bar')).toBe candidates[1] 641 | expect(bestMatch(candidates, 'br')).toBe candidates[1] 642 | 643 | 644 | it "allows candidates to be all slashes", -> 645 | candidates = [path.sep, path.sep + path.sep + path.sep] 646 | expect(filter(candidates, 'bar')).toEqual [] 647 | 648 | 649 | describe "when the Query contains slashes (queryHasSlashes)", -> 650 | 651 | it "weighs end-of-path matches higher", -> 652 | 653 | candidates = [ 654 | path.join('project', 'folder', 'folder', 'file') 655 | path.join('folder', 'folder', 'project', 'file') 656 | ] 657 | 658 | expect(bestMatch(candidates, 'project file')).toBe candidates[0] 659 | expect(bestMatch(candidates, path.join('project', 'file'))).toBe candidates[1] 660 | 661 | it "prefers overall better match to shorter end-of-path length", -> 662 | 663 | candidates = [ 664 | 665 | path.join('CommonControl', 'Controls', 'Shared') 666 | path.join('CommonControl', 'Controls', 'Shared', 'Mouse') 667 | path.join('CommonControl', 'Controls', 'Shared', 'Keyboard') 668 | path.join('CommonControl', 'Controls', 'Shared', 'Keyboard', 'cc.js') 669 | 670 | ] 671 | 672 | expect(bestMatch(candidates, path.join('CC','Controls','Shared'))).toBe candidates[0] 673 | expect(bestMatch(candidates, 'CC Controls Shared')).toBe candidates[0] 674 | 675 | 676 | expect(bestMatch(candidates, 'CCCShared')).toBe candidates[0] 677 | expect(bestMatch(candidates, 'ccc shared')).toBe candidates[0] 678 | expect(bestMatch(candidates, 'cc c shared')).toBe candidates[0] 679 | 680 | expect(bestMatch(candidates, path.join('ccc','shared'))).toBe candidates[0] 681 | expect(bestMatch(candidates, path.join('cc','c','shared'))).toBe candidates[0] 682 | 683 | 684 | 685 | describe "when the entries are of differing directory depths", -> 686 | 687 | it "prefers shallow path", -> 688 | 689 | candidates = [ 690 | path.join('b', 'z', 'file'), 691 | path.join('b_z', 'file') 692 | ] 693 | 694 | expect(bestMatch(candidates, "file")).toBe candidates[1] 695 | expect(bestMatch(candidates, "fle")).toBe candidates[1] 696 | 697 | candidates = [ 698 | path.join('foo', 'bar', 'baz', 'file'), 699 | path.join('foo', 'bar_baz', 'file') 700 | ] 701 | 702 | expect(bestMatch(candidates, "file")).toBe candidates[1] 703 | expect(bestMatch(candidates, "fle")).toBe candidates[1] 704 | 705 | candidates = [ 706 | path.join('A Long User Full-Name', 'My Documents', 'file'), 707 | path.join('bin', 'lib', 'src', 'test', 'spec', 'file') 708 | ] 709 | 710 | expect(bestMatch(candidates, "file")).toBe candidates[0] 711 | 712 | # We have plenty of report on how this or that should win because file is a better basename match 713 | # But we have no report of searching too deep, because of that folder-depth penalty is pretty weak. 714 | 715 | 716 | it "allows better basename match to overcome slightly deeper directory / longer overall path", -> 717 | 718 | candidates = [ 719 | path.join('f', '1_a_z') 720 | path.join('f', 'o', 'a_z') 721 | ] 722 | 723 | expect(bestMatch(candidates, 'az')).toBe candidates[1] 724 | 725 | candidates = [ 726 | path.join('app', 'models', 'automotive', 'car.rb') 727 | path.join('spec', 'carts.rb') 728 | ] 729 | 730 | expect(bestMatch(candidates, 'car.rb')).toBe candidates[0] 731 | 732 | candidates = [ 733 | path.join('application', 'applicationPageStateServiceSpec.js') 734 | path.join('view', 'components', 'actions', 'actionsServiceSpec.js') 735 | ] 736 | 737 | expect(bestMatch(candidates, 'actionsServiceSpec.js')).toBe candidates[1] 738 | expect(bestMatch(candidates, 'ss')).toBe candidates[1] 739 | 740 | 741 | candidates = [ 742 | path.join('spec', 'models', 'user_search_spec.rb') 743 | path.join('spec', 'models', 'listing', 'location_detection', 'usa_spec.rb') 744 | ] 745 | 746 | expect(bestMatch(candidates, 'usa_spec')).toBe candidates[1] 747 | expect(bestMatch(candidates, 'usa spec')).toBe candidates[1] 748 | expect(bestMatch(candidates, 'usa')).toBe candidates[1] 749 | 750 | candidates = [ 751 | path.join('spec', 'models', 'usa_spec.rb') 752 | path.join('spec', 'models', 'listing', 'location_detection', 'user_search_spec.rb') 753 | ] 754 | 755 | expect(bestMatch(candidates, 'user')).toBe candidates[1] 756 | 757 | candidates = [ 758 | path.join('lib', 'exportable.rb'), 759 | path.join('app', 'models', 'table.rb') 760 | ] 761 | expect(bestMatch(candidates, 'table')).toBe candidates[1] 762 | 763 | 764 | candidates = [ 765 | path.join('db', 'emails', 'en', 'refund_notification.html'), 766 | path.join('app', 'views', 'admin', 'home', 'notification.erb') 767 | ] 768 | 769 | expect(bestMatch(candidates, 'notification')).toBe candidates[1] 770 | 771 | candidates = [ 772 | path.join('db', 'emails', 'en', 'refund_notification.html'), 773 | path.join('app', 'views', 'admin', 'home', '_notification_admin.erb') 774 | ] 775 | 776 | expect(bestMatch(candidates, '_notification')).toBe candidates[1] 777 | 778 | candidates = [ 779 | path.join('javascript', 'video-package', 'video-backbone.js'), 780 | path.join('third_party', 'javascript', 'project-src', 'backbone', 'backbone.js') 781 | ] 782 | 783 | expect(bestMatch(candidates, 'backbone')).toBe candidates[1] 784 | 785 | candidates = [ 786 | path.join('spec', 'controllers', 'apps_controller_spec.rb'), 787 | path.join('app', 'controllers', 'api_v2_featured', 'apps_controller.rb') 788 | ] 789 | 790 | expect(bestMatch(candidates, 'apps_controller')).toBe candidates[1] 791 | 792 | candidates = [ 793 | path.join('config', 'application.rb'), 794 | path.join('app', 'controllers', 'application_controller.rb') 795 | ] 796 | 797 | expect(bestMatch(candidates, 'appcon')).toBe candidates[1] 798 | 799 | 800 | # it "prefer path together to shorter path", -> 801 | # 802 | # candidates = [ 803 | # path.join('app', 'controllers', 'shipments_controller.rb'), 804 | # path.join('app', 'controllers', 'core', 'shipments_controller.rb') 805 | # ] 806 | # 807 | # expect(bestMatch(candidates, 'core shipments controller')).toBe candidates[1] 808 | 809 | # 810 | # Optional Characters 811 | # 812 | 813 | describe "when the query contain optional characters (generalize when the entries contains spaces)", -> 814 | 815 | it "allows to match path using either backward slash, forward slash, space or colon", -> 816 | 817 | candidates = [ 818 | path.join('foo', 'bar'), 819 | path.join('model', 'user'), 820 | ] 821 | 822 | expect(bestMatch(candidates, "model user")).toBe candidates[1] 823 | expect(bestMatch(candidates, "model/user")).toBe candidates[1] 824 | expect(bestMatch(candidates, "model\\user")).toBe candidates[1] 825 | expect(bestMatch(candidates, "model::user")).toBe candidates[1] 826 | 827 | it "prefer matches where the optional character is present", -> 828 | 829 | candidates = [ 830 | 'ModelUser', 831 | 'model user', 832 | 'model/user', 833 | 'model\\user', 834 | 'model::user', 835 | 'model_user', 836 | 'model-user', 837 | ] 838 | 839 | expect(bestMatch(candidates, "mdl user")).toBe candidates[1] 840 | expect(bestMatch(candidates, "mdl/user")).toBe candidates[2] 841 | expect(bestMatch(candidates, "mdl\\user")).toBe candidates[3] 842 | expect(bestMatch(candidates, "mdl::user")).toBe candidates[4] 843 | expect(bestMatch(candidates, "mdl_user")).toBe candidates[5] 844 | expect(bestMatch(candidates, "mdl-user")).toBe candidates[6] 845 | 846 | 847 | it "weighs basename matches higher (space don't have a strict preference for slash)", -> 848 | 849 | candidates = [ 850 | rootPath('bar', 'foo') 851 | rootPath('foo', 'bar foo') 852 | ] 853 | expect(bestMatch(candidates, 'br f')).toBe candidates[1] 854 | 855 | candidates = [ 856 | path.join('bazs', 'book-details.js') 857 | path.join('booking', 'baz', 'details.js') 858 | ] 859 | expect(bestMatch(candidates, 'baz details js')).toBe candidates[1] 860 | 861 | candidates = [ 862 | path.join('app', 'bookings.js') 863 | path.join('app', 'booking', 'booking.js') 864 | ] 865 | 866 | expect(bestMatch(candidates, 'booking')).toBe candidates[1] 867 | expect(bestMatch(candidates, 'booking js')).toBe candidates[1] 868 | 869 | candidates = [ 870 | path.join('app', 'components','booking','booking.ctrl.js') 871 | path.join('app', 'components','cards','bookings.js') 872 | path.join('app', 'components','admin','settings','cards','booking.js') 873 | path.join('app', 'components','booking','booking.js') 874 | ] 875 | 876 | result = filter(candidates, 'booking js') 877 | expect(result[0]).toBe candidates[3] 878 | expect(result[1]).toBe candidates[2] 879 | expect(result[2]).toBe candidates[1] 880 | expect(result[3]).toBe candidates[0] 881 | 882 | 883 | 884 | it "allows basename bonus to handle query with folder", -> 885 | 886 | # Without support for optional character, the basename bonus 887 | # would not be able to find "model" inside "user.rb" so the bonus would be 0 888 | 889 | candidates = [ 890 | path.join('www', 'lib', 'models', 'user.rb'), 891 | path.join('migrate', 'moderator_column_users.rb') 892 | ] 893 | 894 | expect(bestMatch(candidates, "model user")).toBe candidates[0] 895 | expect(bestMatch(candidates, "modeluser")).toBe candidates[0] 896 | expect(bestMatch(candidates, path.join("model", "user"))).toBe candidates[0] 897 | 898 | candidates = [ 899 | path.join('destroy_discard_pool.png'), 900 | path.join('resources', 'src', 'app_controller.coffee') 901 | ] 902 | 903 | expect(bestMatch(candidates, "src app")).toBe candidates[1] 904 | expect(bestMatch(candidates, path.join("src", "app"))).toBe candidates[1] 905 | 906 | candidates = [ 907 | path.join('template', 'emails-dialogs.handlebars'), 908 | path.join('emails', 'handlers.py') 909 | ] 910 | 911 | expect(bestMatch(candidates, "email handlers")).toBe candidates[1] 912 | expect(bestMatch(candidates, path.join("email", "handlers"))).toBe candidates[1] 913 | 914 | 915 | it "allows to select between full query and basename using path.sep", -> 916 | 917 | candidates = [ 918 | path.join('models', 'user.rb'), 919 | path.join('migrate', 'model_users.rb') 920 | ] 921 | 922 | expect(bestMatch(candidates, "modeluser")).toBe candidates[1] 923 | expect(bestMatch(candidates, "model user")).toBe candidates[1] 924 | expect(bestMatch(candidates, path.join("model", "user"))).toBe candidates[0] 925 | 926 | describe "when query is made only of optional characters", -> 927 | it "only return results having at least one specified optional character", -> 928 | candidates = ["bla", "_test", " test"] 929 | expect(filter(candidates, '_')).toEqual ['_test'] 930 | 931 | 932 | #--------------------------------------------------- 933 | # 934 | # Command Palette 935 | # 936 | 937 | 938 | describe "When entry are sentence / Natural language", -> 939 | 940 | it "prefers consecutive characters at the start of word", -> 941 | 942 | candidates = [ 943 | 'Find And Replace: Select All', 944 | 'Settings View: Uninstall Packages', 945 | 'Settings View: View Installed Themes', 946 | 'Application: Install Update', 947 | 'Install' 948 | ] 949 | 950 | result = filter(candidates, 'install') 951 | expect(result[0]).toBe candidates[4] 952 | expect(result[1]).toBe candidates[3] 953 | expect(result[2]).toBe candidates[2] 954 | expect(result[3]).toBe candidates[1] 955 | expect(result[4]).toBe candidates[0] 956 | 957 | # Even when we do not have an exact match 958 | 959 | result = filter(candidates, 'instll') 960 | expect(result[0]).toBe candidates[4] 961 | expect(result[1]).toBe candidates[3] 962 | expect(result[2]).toBe candidates[2] 963 | expect(result[3]).toBe candidates[1] 964 | expect(result[4]).toBe candidates[0] 965 | 966 | # for this one, complete word "Install" should win against: 967 | # 968 | # - case-sensitive end-of-word match "Uninstall", 969 | # - start of word match "Installed", 970 | # - double acronym match "in S t A ll" -> "Select All" 971 | # 972 | # also "Install" by itself should win against "Install" in a sentence 973 | 974 | 975 | it "weighs substring higher than individual characters", -> 976 | 977 | candidates = [ 978 | 'Git Plus: Stage Hunk', 979 | 'Git Plus: Reset Head', 980 | 'Git Plus: Push', 981 | 'Git Plus: Show' 982 | ] 983 | expect(bestMatch(candidates, 'push')).toBe candidates[2] 984 | expect(bestMatch(candidates, 'git push')).toBe candidates[2] 985 | expect(bestMatch(candidates, 'gpush')).toBe candidates[2] 986 | 987 | # Here "Plus Stage Hunk" accidentally match acronym on PuSH. 988 | # Using two words disable exactMatch bonus, we have to rely on consecutive match 989 | 990 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## What is fuzzaldrin-plus? 2 | 3 | - A fuzzy search / highlight that specialize for programmer text editor. It tries to provide intuitive result by recognizing patterns that people use while searching. 4 | 5 | - A rewrite of the fuzzaldrin library. API is backward compatible with some extra options. Tuning has been done from report usage of the Atom text editor. 6 | 7 | - At this point in time, it may either be merged back into fuzzaldrin or lives as a forked library, we'll see. 8 | 9 | 10 | ## What Problem are we trying to solve? 11 | 12 | ### Score how matched characters relate to one another. 13 | 14 | - One of the most often reported issues is not being able to find an exact match. 15 | - A great source of questionable results come from scattered character, spread seemingly randomly in the string. 16 | 17 | We plan to address those issues by scoring runs of consecutive characters. In that scheme, an exact match will be a special case where the run is 100% of the query length. 18 | 19 | In original fuzzaldrin, candidate length was used as a proxy for match quality. This work reasonably well when subject is a single word, but break when subject contain multiple words, for example, see: 20 | 21 | - **Core** 22 | - **Co**nt**r**oll**e**r 23 | - Extention**Core** 24 | 25 | In `Core` vs. `Controller` size is a good indicator of quality, but not so much in `Controller` vs. `ExtentionCore`. This situation happens because match compactness matters more than haystack size. Match compactness is the principle behind the scoring of the *Selecta* project. 26 | 27 | 28 | #### Run length / consecutive 29 | 30 | So far the examples can be handled by an `indexOf` call. However, there are times where a single query can target multiple parts of a candidate. 31 | 32 | For example when candidate contains multiple words 33 | - `git push` vs. `Git Plus: Push` 34 | - `email handler` vs. `email/handler.py` 35 | 36 | Another example is to jump over common strings. 37 | - `toLowerCase` 38 | - `toLocaleString` 39 | - `toLocalLowerCase` 40 | 41 | We could use a query like `tololo` to select the third option of these. 42 | 43 | 44 | ### Select character based on score. 45 | 46 | The previous algorithm always selects the first available matching character (leftmost alignment). Only after selection, it will try to identify how to score that character. The problem then is that the most interesting instance of a character is not necessarily on the left. 47 | 48 | For example on query `itc`, we should match 49 | - **I**mportance**T**able**C**trl. 50 | 51 | Instead leftmost aligement miss the acronym pattern: 52 | - **I**mpor**t**an**c**eTableCtrl. 53 | 54 | For query `core` against `controller_core` leftmost alignment miss the consecutive run: 55 | - **co**nt**r**oll**e**r_core 56 | 57 | To handle this, we propose to embed the pattern detection (consecutive and more) inside an optimal alignment scheme. Imagine you have an algorithm that allows you to recognize objects in images; it would make little sense to run it exclusively on the top left corner. 58 | 59 | 60 | ### Prevent Accidental acronym. 61 | 62 | Fuzzladrin handles acronym by giving a large bonus on character matches that start words. Currently, a start-of-word bonus matches almost as much as three proper-case character. 63 | 64 | For query `install` should result be in this order ? 65 | - F**in**d & Replace **S**elec**t** **All** 66 | - Application: **Install** 67 | 68 | In that example, we have '**S**elect **A**ll' boost the score of the first candidate because we score two word-starts while we only score one for 'install'. 69 | 70 | For query `git push`, should we order result in that order ? 71 | - "**Git** **P**l**u**s: **S**tage **H**unk" 72 | - "**Git** Plus: **Push**" 73 | 74 | What about the fact we match three start-of-words in `Plus Stage Hunk`? PSH is very close to '**p**u**sh**' (And `Plus` contains `u`). 75 | 76 | That kind of question arises even more often when we use optimal selection because the algorithm will lock on those extra acronym points. 77 | 78 | What we propose in this project is that start-of-words characters should only have a moderate advantage by themselves. Instead, they form strong score by making an acronym pattern with other start-of-words characters. 79 | 80 | For example with query `push`: 81 | - against `Plus: Stage Hunk`: we have `P + u + SH` grouped as 1, 1, 2 82 | - against `push`: we have a single group of 4. 83 | - The substring wins for having the largest group 84 | 85 | For example with query `psh`: 86 | - against `Plus: Stage Hunk`: we have `PSH` so a single group of 3 87 | - against `push`: we have `p + sh` so grouped as 1, 2 88 | - The acronym wins for having the largest group. 89 | 90 | 91 | This way we can score both substring and acronym match using the structure of the match. We'll refine the definition of consecutive acronym later. 92 | 93 | 94 | ### Score the context of the match. 95 | 96 | Some people proposed to give a perfect score to exact case-sensitive matches. This proposition can be understood because exact matches and case-sensitivity are two area where fuzzaldrin is not great. 97 | 98 | However should 'sw**itc**h.css' be an exact match for `itc`? 99 | Even when we have **I**mportance**T**able**C**trl available? 100 | 101 | Few people will argue against `diag` preferring `diagnostic` to `Diagnostics`. 102 | 103 | However should `install` prefer "Un**install**" over "**Install**" ? Or should it be the other way around? In this case, we have to consider the relative priority of case-sensitivity and start-of-word. 104 | 105 | Exact matches are used with enougth frequency that we should not only ensure they win against approximate matches but also ensure to rank quality properly amongst them. 106 | 107 | ### Manage the multiples balances of path scoring. 108 | 109 | We want to prefer match toward the start of the string. 110 | Except we also want to prefer match in the filename (which happens near the end of the string) 111 | 112 | We want to prefer shorter and shallower path. 113 | Except we also want to retrieve some deeper files when filename is clearly better. 114 | 115 | We want to prefer matches in the filename 116 | Except when the query describes a full path much better than an approximate file name. (Let's consider query `model user` vs `models/user.rb` or `moderator_column_users.rb`) 117 | 118 | 119 | ## Proposed Scoring Rules 120 | 121 | 122 | ### 1. Characters are chosen by their ability to form a pattern with others. 123 | 124 | Patterns can be composed of 125 | - consecutive letters of the subject, 126 | - sequential letters in the Acronym of the subject. 127 | 128 | Start-of-words (acronym) characters are special in that they can either forms pattern with the rest of the word or with other acronym characters. 129 | 130 | - Pattern based scoring replaces a situation where acronyms characters have a large bonus by themselves. Now the bonus is still large but conditional to being part of some pattern. 131 | 132 | - CamelCase and snake_case acronym are treated exactly the same. They will, however, get different score for matching uppercase/lowercase query 133 | 134 | 135 | ### 2. Primary score attribute is pattern length. 136 | 137 | - A pattern that span 100% of the query length is called an exact match. 138 | - There is such a thing as an acronym exact match. 139 | 140 | - Because every candidate is expected to match all of query larger group should win against multiple smaller one. 141 | - The rank of a candidate is the length of it's largest pattern. 142 | - When all patterns of a first candidate are larger than patterns in a second one, the candidate with the highest rank is said to be dominant. 143 | - 1 group of 6 > 2 group of 3 > 3 group of 2. 144 | 145 | - When some groups are larger, and some are smaller, the highest rank match is said to be semi-dominant. 146 | - Let's consider a first candidate grouped as 4+2 vs. a second candidate grouped as 3+3. 147 | - The first group of 4 wins against the first group of 3. 148 | - However, the group of 2 loose against the second group of 3. 149 | - In this case, we'll consider some extra information. 150 | 151 | ### 3. Secondary score attribute is the quality of matches. 152 | 153 | - Match quality is made of proper casing and context score 154 | - The main role of match quality is to order candidate of the same rank. 155 | - When match is semi-dominant match quality can overcome a small rank difference. 156 | 157 | - Context score considers where does the match occurs in the subject. 158 | - Full-word > Start-of-word > End-of-word > Middle-of-word 159 | - On that list, Acronym pattern score at Start-of-word level. (That is just bellow full-word) 160 | 161 | - Score for a proper case query has both gradual and absolute components. 162 | - The less error, the better 163 | - 100% Case Error will be called wrong-case, for example matching a `CamelCase` acronym using lowercase query `cc`. 164 | - Exactly 0 error is called CaseSentitive or ExactCase match. 165 | - CaseSentitive matches have a special bonus that is smaller than start-of-word bonus but greater than the end-of-word bonus. 166 | - This scoring scheme allows a start-of-word case-sensitive match to overcome a full-word wrong-case match. 167 | - It also allows to select between a lowercase consecutive and CamelCase acronym using case of query. 168 | - To answer the question asked in the introduction, "Installed" win over "Uninstall" because start-of-word > Exact Case. 169 | 170 | - **Q:** Why can't you simply add extra length for some bonus. For example, score a start-of-word match as if it had an extra character. 171 | - **A:** We cannot do that on partial matches because then the optimal alignment algorithm will be happy to split word and collect start-of-word bonus like stamps. (See accidental acronym) 172 | 173 | - **Q:** Why do you add extra length on exact matches? 174 | - **A:** First, once you have matched everything, there's no danger of splitting the query. Then, that bonus exists to ensure exact matches will bubble up in the firsts results, despite longer/deeper path. If, after more test and tuning, we realize it's not needed, we'll be happy to remove it, the fewer corner cases, the merrier. 175 | 176 | - **Q:** Why are you using lowercase to detect CamelCase? 177 | - **A** CamelCase are detected as a switch from lowercase to UPPERCASE. Defining UPPERCASE as not-lowercase, allow case-invariants characters to count as lowercase. For example `Git Push` the `P` of push will be recognised as CamelCase because we consider `` as lowercase. 178 | 179 | ### 4. Tertiary score attributes are subject size, match position and directory depth 180 | 181 | - Mostly there to help order match of the same rank and match quality, unless the difference in tertiary attributes is large. 182 | -(Proper definition of large is to be determined using real life example) 183 | 184 | - In term of the relative importance of effects it should rank start-of-string > string size > directory depth. 185 | 186 | ## Score Range 187 | 188 | - **Score is 0 if and only if there is no match.** 189 | 190 | - Otherwise, the score is a strictly positive integer. 191 | 192 | - The maximum range is `score(query,query)` whatever that number is. A longer query will have a greater maximal score. 193 | 194 | - Score exist mainly to for relative order with other scores of the same query and to implements scoring rule described above. 195 | 196 | - Score have a high dynamic range and consider a lot of information. Equality is unlikely. For that reason, **multiplicative bonuses should be preferred over additive ones**. 197 | 198 | ------------- 199 | 200 | ### Acronym Prefix 201 | More detail on acronym match 202 | 203 | 204 | An acronym prefix is a group of characters that are consecutive in the query and sequential in the acronym of the subject. That group starts at the first character of the query and end at the first character of the query, not in the acronym. If there's no missed character, then we have an acronym exact match (100% of query is sequential in the acronym) 205 | 206 | 207 | **For example if we match `ssrb` against `Set Syntax Ruby` we'll score it like so** 208 | 209 | ```` 210 | 012345678901234 211 | "Set Syntax Ruby" 212 | "000 SSR0b0 0000" 213 | ```` 214 | 215 | - Acronym scored as three consecutive character at start-of-word + an isolated letter. 216 | - Here we have a wrong-case match. "SSRb" or "SSRB" would have case-sensitive points on the acronym pattern (case of isolated letter is not important) 217 | - Position of the equivalent consecutive match is the average position of acronym characters. 218 | - For scoring, we use the size of the original candidate. 219 | 220 | 221 | **Another example is matching `gaa` against `Git Plus: Add All` we'll score it like so** 222 | 223 | ```` 224 | 01234567890123456 225 | "Git Plus: Add All" 226 | "000000 GAA00 0000" 227 | ```` 228 | 229 | - here we conveniently allow to skip the `P` of `Plus`. 230 | 231 | 232 | **Then what about something like "git aa" ?** 233 | 234 | This is a current limitation. We do not support acronym pattern outside of the prefix. Mostly for performance reason. 235 | Acronym outside of the acronym prefix will have some bonus, scoring between isolated character and 2 consecutive. 236 | There are multiple thing we can improve if one day we implement a proper multiple word query support, and this is one of them. 237 | 238 | ### Optional characters 239 | 240 | Legacy fuzzaldrin had some support for optional characters (Mostly space, see `SpaceRegEx`). Because the scoring does not support errors, the optional character was simply removed from the query. 241 | 242 | With this PR, optimal alignment algorithm supports an unlimited number of errors. The strict matching requirement is handled by a separate method `isMatch`. The optional character implementation is done by building a subset of the query containing only non-optional characters (`coreQuery`) and passing that to `isMatch`. 243 | 244 | This new way of doing thing means that while some characters are optional, candidates that match those characters have a better score. What this allow is to add characters to the optional list without compromising ranking. 245 | 246 | Optional character contains space, but also `-` and `_` because multiple specs require that we should treat them as space. Also `\` and `:` are also optional to support searching a file using the PHP or Ruby name-space. Finally `/` is optional to mirror `\` and support a better workflow in a multi-OS environment. 247 | 248 | Finally option `allowErrors` would make any character optional. Expected effect of that options would be some forgiveness on the spelling at the price of a slower match. 249 | 250 | 251 | ### Path Scoring 252 | 253 | - Score for a given path is computed from the score of the fullpath and score of the filename. For low directory depth, the influence of both is about equal. But, for deeper directory, there is less retrieval effect (importance of basename) 254 | 255 | - The full path is penalized twice for size. Once for its own size, then a second time for the size of the basename. Extra basename penalty is dampened a bit. 256 | 257 | - The basename is scored as if `allowErrors` was set to true. (Full-path must still pass `isMatch` test). This choice is made to support query such as `model user` against path `model/user`. Previously, the basename score would be 0 because it would not find `model` inside basename `user`. Variable `queryHasSlashes` partially addressed this issue, but was inconsistent with usage of `` as folder separator 258 | 259 | - When query has slashes (`path.sep`) the last or last few folder from the path are promoted to the basename. (as many folder from the path as folder in the query) 260 | 261 | ------------- 262 | 263 | ## Algorithm (Optimal alignment) 264 | 265 | ### LCS: Dynamic programming Table 266 | Let's compare A:`surgery` and B:`gsurvey`. 267 | To do so we can try to match every letter of A against every letter of B. 268 | 269 | This problem can be solved using a score matrix. 270 | - The match starts at [0,0] trying to compare the first letter of each. 271 | - The match end at [m,n] comparing the last letter of each. 272 | 273 | At each position [i,j] the best move can be one of the 3 options. 274 | 275 | - match `A[i]` with `B[j]` (move diagonal, add 1 to score) 276 | - skip `A[i]` (move left, copy score) 277 | - skip `B[j]` (move down, copy score) 278 | 279 | We do not know which one of these 3 is the best move until we reach the end, so we record the score of the best move so far. The last cell contains the score of the best alignment. If we want to output that alignment we need to rebuild it backward from the last cell. 280 | 281 | ```` 282 | s u r g e r y 283 | g [0,0,0,1,1,1,1] : best move is to align `g` of *g*survey with `g` of sur*g*ery, score 1 284 | s [1,1,1,1,1,1,1] : we can align `s`, but doing so invalidate `g`. Both score 1, we cannot decide 285 | u [1,2,2,2,2,2,2] : if we align s, we can also align u, we have a winner 286 | r [1,2,3,3,3,3,3] : we can align `r` 287 | v [1,2,3,3,3,3,3] : nothing we can do with that `v`, score stay the same 288 | e [1,2,3,3,4,4,4] : we can align `e` (we skipped `g` the same way we skipped `v`) 289 | y [1,2,3,3,4,4,5] : align y (we skipped `r` ) 290 | ```` 291 | 292 | **Best alignment is** 293 | 294 | ```` 295 | gsur-ve-y 296 | -|||--|-| 297 | -surg-ery 298 | ```` 299 | 300 | For those familiar with code diff, this is essentially the same problem. Except, in this case, we the do the alignment of characters in a word and a diff performs alignment of lines in a file. Characters present in the second word but not in the first counts as additions; characters present only in the first word are deletions and characters present in both are matches - like unchanged lines in a diff. 301 | 302 | To get that alignment, we start from the last character and trace back the best option. The pattern to looks for an **alignment** is the corner increase (diagonal+1 is greater than left or up.) 303 | 304 | ```` 305 | 4,4 3,3 2,2 1,1 0,0 306 | 4,5 3,4 2,3 1,2 0,1 307 | ```` 308 | 309 | - (There are an implicit row and column of 0 before the matrix) 310 | 311 | The pattern to look for to **move left** is: 312 | 313 | ```` 314 | 3,3 315 | 4,4 316 | ```` 317 | 318 | The pattern to look for to **move up** is: 319 | 320 | ```` 321 | 3,4 322 | 3,4 323 | ```` 324 | 325 | We try to resolve equality the following way: 326 | 327 | ```` 328 | 3,3 329 | 3,3 330 | ```` 331 | 332 | 1. Prefer moving UP: toward the start of the candidate. This strategy ensures we highlight toward the start of string instead of the end when all else is equal. 333 | 2. If not available, prefer moving LEFT (optional character) 334 | 3. Only accept alignment DIAG when it is the absolute best option. 335 | 336 | 337 | 338 | 339 | ### Algorithm Conclusion 340 | 341 | The LCS algorithm allows to detect which character of the query are common to both words while being in proper order. (For example g is common to both word but discarded because out of order.) 342 | 343 | LCS is not immediately useful for fuzzaldrin needs. Because fuzzaldrin require ALL characters of the query to be in subject to have a score greater than 0, LCS for all positive candidates would be the length of the query. 344 | 345 | However, the dynamic programming table used to solve LCS is very useful to our need. The ability to select the best path and skip that `g` even if it is present in both query and candidate is the key to improves over left-most alignment. All we need for this to works is a bit more detail in score than 0 or 1. 346 | 347 | ### Similarity score 348 | 349 | Matching character does not have to be binary. Case sensitive match can still prefer proper case, same goes with accents. A diff tools can decide a line has been modified, instead of registering an addition and a deletion. A handwriting recognition tool can decide `a` and `o` are somewhat more similar to each other than they are to `w`, and so on. 350 | 351 | We use character similarity as a way to build and score patterns. That is, we consider that character are similar from their own quality ( such as case) as well of being part of a similar neighborhood (consecutive letters or acronyms) 352 | 353 | There are some rules that limit our scoring ability (for example we cannot go back in time and correct the score based on future choice) but overall that scheme is very flexible. 354 | 355 | ### Where is the matrix? 356 | 357 | While the programming table describes computation, we do not need to store the whole matrix when we only output the score. Fundamentally when computing a score, we only need 3other previously computed cell: UP, LEFT and DIAG. 358 | 359 | Suppose we process the cell [3,5] 360 | 361 | 20, 21, 22, 23, 24, 25, 26, *27, 28, 29* 362 | *30, 31, 32, 33, 34,* **35**, 36, 37, 38, 39 363 | 364 | To build that score we only need values 24(DIAG), 25(UP), 34(LEFT). 365 | So instead of a whole matrix we can keep only the two current lines. 366 | 367 | Furthermore, anything on the left of 24 on the first line is not needed anymore. Also, anything to the right of 35 on the second line has not yet been computed. So we can build a more compact structure using one composite row + one diagonal. 368 | 369 | score_diag = 24 370 | score_row = 30, 31, 32, 33, 34, 25, 26, 27, 28, 29 371 | 372 | #### Preparing next value 373 | 374 | Once we have computed the value of the cell [3,5], we can insert that value into the structure, taking care of saving next diagonal before overwriting it. 375 | 376 | diag = 25 377 | row = 30, 31, 32, 33, 34, **35**, 26, 27, 28, 29 378 | 379 | To compute value of cell [3,6] we take 380 | - UP value (26) from the row. 381 | - DIAG value, from the diag register. 382 | - LEFT value from the previously computed value: 35 383 | 384 | ### Initial values 385 | 386 | Before entering the matching process, the row is initialized with 0. Before scoring each row, the LEFT and DIAG register are reset to 0. 387 | 388 | That strategy has the effect of placing a virtual row and column of 0 before the matrix. Moreover, it allows to deal with boundary condition without any special case. 389 | 390 | ### Memory management 391 | 392 | We set up the row vector with the size of the query. Using a full matrix, scoring a query of size 5 against a path of size 100, would require a 500 cells. Instead, we use a 5 item row + some registers. This should ease memory management pressure. 393 | 394 | Each character of the query manages its best score. More precisely, each cell `row[j]` manage the best score so far of matching `query[0..j]` against candidate[0..i]. 395 | 396 | ### Consecutive Score (Neighbourhood) Matrix. 397 | 398 | We cache the consecutive score in a virtual matrix following the same composite row scheme that we do with score values. 399 | 400 | In `fuzzaldrin.score` The candidate entirely determines the Neighbourhood quality. It is not affected by which character has been chosen. In highlight, (`fuzzaldrin.match`) we further refine the formula to make the consecutive bonus conditional to not breaking the consecutive chain: 401 | 402 | For example query `abcdz` vs. subject `abcdzbcdz`. Between `abcd` and `bcdz`, `abcd` wins for being sooner in the string. Now between the two `z`, the first one is isolated and the second one is part of a rank 4 group. However given that `bcd` are matched sooner, the second `z` is an isolated match, so the first `z` wins. 403 | 404 | ------------- 405 | 406 | ## Performance 407 | 408 | Let's consider the following autocomplete scenario. 409 | - Symbol bank has 1000 items. 410 | - The user receives about 5 suggestion for its query. 411 | - Of those 5, 1 is a exact case-sensitive match. 412 | - That particular user almost always wants that case sensitive match. 413 | 414 | Should we optimize for case sensitive `indexOf` before trying other things? Our answer to that question is no. 415 | 416 | Case sensitive exact match are valuable because they are rare. Even if the user tries to get them, for each one of those we have to reject 995 entry and deal with 4 other kinds of matches. 417 | 418 | This is our first principle for optimization: **Most of the haystack is not the needle**. Because rejection of candidate happens often, we should be very good at doing that. 419 | 420 | Failing a test for case-sensitive `indexOf` tell us exactly nothing for case-insensitive `indexOf`, or acronyms, or even scattered letters. 421 | That test is too specific. To reject match efficiently, we should aim for the lowest common denominator: scattered case-insensitive match. 422 | 423 | This is exactly the purpose of `isMatch`. 424 | 425 | ### Most of the haystack is not the needle 426 | 427 | We just have shown how that sentence applies at the candidate level, but it is also at the character level. 428 | 429 | **Let's consider this line: `if (subject_lw[i] == query_lw[j])`** 430 | This test is for match points (or hits). It refers to the `diag+1` in the algorithm description, with the `+1` being refined to handle the differents levels of character and neighborhood similarity. 431 | 432 | 433 | **How often is that condition true ?** 434 | 435 | Let's consider an alphabet that contain 26 lowercase letters, 10 numbers, a few symbols ` _!?=<>`. That is a 40+ symbol alphabet. Under a uniform usage model of those symbols, we have the hit condition occurs about 2.5% of the time (1/40). If we suppose only 10-20 of those characters are popular, the hit rate is about 5-10%. 436 | 437 | This means we'll try to minimize the number of operation that happens outside of math points. In that context, increasing the cost of a hit, while decreasing the cost of non-hits looks like a possibly worthwhile proposition. 438 | 439 | A canonical example of this is that, instead of testing each character against the list of separators, setting a flag for next character being a start-of-word, we first confirm a match then look behind for separator. This characterization work is sometimes repeated more than once, but so far this scheme benchmarked better than alternatives we have tried to avoid doing extra work. 440 | 441 | Having work concentrated at hit points is also a natural fit to our logic, the most expensive part being to determine how to score similarity between characters (including context similarity). However, it also means we'll want to have some control over the number of positive hits we'll compute - that is the purpose of missed hit optimisation. 442 | 443 | 444 | ### What about a stack of needles? 445 | 446 | To the extent the user is searching for a specific resource, this should be uncommon. 447 | 448 | It can still happen in some situation such as: 449 | - Search is carried as user type (the query is not intentional) 450 | - The intentional query is not fully typed, match-all is a temporary step. 451 | 452 | One way to deal with that is not to use the full matching algorithm when we can deal with something simpler. This is what we have done while searching for `indexOf` instance. 453 | 454 | One special note: Acronym still have to be checked even if we have an exact match: for example query `su` against `StatusUrl`. As an exact match it is poor: 'Statu**sU**rl' is a middle of word match and have the wrong case. However as an acronym it is great: '**S**tatus**U**rl'. That motivated us to create the specialized `scoreAcronyms`. 455 | 456 | What is nice is that while `scoreAcronyms` was created to speed up exact matches search, it also provided very valuable information for accuracy. It later became a corner stone in the processing of accidental acronym. 457 | 458 | The result is that for exact matches and exact acronym matches we bypass the optimal alignment algorithm, giving very fast results. 459 | We still have to deal with fuzzier stacks of needles and the next two optimization address this. 460 | 461 | ### Hit Miss Optimization. 462 | 463 | A hit occurs when character of query is also in the subject. 464 | - Every (i,j) such that subject[i] == query[j], in lowercase. 465 | 466 | A missed hit occurs when a hit does not improve the score. 467 | 468 | To guarantee optimal alignment, every hit has to be considered. 469 | However when candidate are long (deep path) & query contains common use character, for example, vowels , we can spend a huge amount of time scoring accidental hits. 470 | 471 | So we use the number of missed hit as a heuristic for current score that are unlikely to improve. Let's score `itc` vs `ImportanceTableControl` 472 | 473 | - `I` of `Importance`: First occurrence, improve over none. 474 | - `t` of `Importance`: First occurrence, improve over none. 475 | - `c` of `Importance`: First occurrence, improve over none. 476 | - `T` of `Table` : Acronym match, improve over an isolated middle of word. 477 | - `C` of `Control` : Acronym match, improve over an isolated middle of word. 478 | - `t` of `Control`: no improvement over acronym `T`: first hit miss. 479 | 480 | - After a certain threshold of missed hit we can consider it is unlikely the score will improve by much. 481 | - Despite above example hit miss optimization do not affect scoring of exact match (sub-string or acronym) 482 | - There are some legitimate use for hit miss, for example while scoring query `Mississippi` each positive match for `s` or `i` may trigger up to 3 hit miss on the other occurrence of that letter in query. 483 | 484 | - For that reason, we propose counting consecutive hit miss and having a maximum of one hit miss per character of the subject. 485 | 486 | **Q:** Does this grantee improvement over leftmost alignment? 487 | **A:** It'll often be the case but no guarantee on pathological matches. 488 | For example, in query `abcde` against candidate '**abc**abcabcabcabcabcabczde' we may trigger the miss count before matching `de`. It'll still be registered as a match and probably a good one with `abc` at the start, `de` will be scored as optional characters not present. 489 | 490 | Candidate 'abcabcabcabcabcabc**abcde**' will not have any problem because it does not affect exact match. 491 | 492 | A real world example is searching `index` in the benchmark. Where `i`, `n`, `d`, `e` exist scattered in folder name, but x exist in the extension `.txt`. However, the whole point of this project is to prefer structured match to scattered one so this might not be a problem. 493 | 494 | ### High Positive count mitigation 495 | **[option `maxInners`, disabled by default]** 496 | 497 | A lot of the speed of this PR come from the idea that rejection happens often, and we need to be very efficient on them to offset slower higher quality match. Unfortunately, some query will match against almost everything. 498 | 499 | - Fast short-circuit path for exact substring acronym help a lot. 500 | - Missed hit heuristic also help a lot for general purpose match. 501 | 502 | However, we may still be too slow for interactive time query on large data set. This is why `maxInners` option is provided. 503 | 504 | This is the maximum number of positive candidate we collect before sorting and returning the list. 505 | 506 | The realization is that a query that match everything on a 50K item data set is unlikely to show anything useful to the user above the fold (say in the first 15 results). 507 | 508 | So then the priority is to detect such case of low quality (low discrimination power) query and report fast to the user so user can refine its query. 509 | 510 | A `maxInners` size of about 20% of the list works well. It is not needed on a smaller list. 511 | 512 | ### Active Region Optimization 513 | 514 | Before the first occurrence of the first char of query in the subject, or after the last occurrence of the last char of query in the subject it is impossible to make a match. So we'll trim the subject to that active region. The search for those boundaries is linear while the optimal alignment algorithm is quadratic, so it is an improvement, however, little or large we move. 515 | 516 | ### Benchmark 517 | - All test compare this PR to previous version (legacy) 518 | 519 | - The first test `index` is a typical use case, 10% positive, 1/3 of positive are exact matches. 520 | - We are about 2x faster 521 | 522 | - Second test `indx` remove exact matches. Just under 2x faster 523 | 524 | - Third test `walkdr`, 1% positive, mostly testing `isMatch()`, above 2x faster. 525 | 526 | - Fourth test `node`, exact match, 98% positive, bit under 2x faster. 527 | 528 | - Test 5 `nm`, exact acronym match, 98% positive, about 10% slower. 529 | 530 | - Test 6 `nodemodules` is special in that it use a string that score on almost every candidate, often multiple time per candidate and individuals characters are popular. It also avoid exact match speed-up. About 2x slower, but unlikely to happens in real life. `maxInners` mitigation cover that case. 531 | 532 | 533 | ```` 534 | Filtering 66672 entries for 'index' took 62ms for 6168 results (~10% of results are positive, mix exact & fuzzy) 535 | Filtering 66672 entries for 'index' took 120ms for 6168 results (~10% of results are positive, Legacy method) 536 | ====== 537 | Filtering 66672 entries for 'indx' took 69ms for 6192 results (~10% of results are positive, Fuzzy match) 538 | Filtering 66672 entries for 'indx' took 126ms for 6192 results (~10% of results are positive, Fuzzy match, Legacy) 539 | ====== 540 | Filtering 66672 entries for 'walkdr' took 30ms for 504 results (~1% of results are positive, fuzzy) 541 | Filtering 66672 entries for 'walkdr' took 70ms for 504 results (~1% of results are positive, Legacy method) 542 | ====== 543 | Filtering 66672 entries for 'node' took 112ms for 65136 results (~98% of results are positive, mostly Exact match) 544 | Filtering 66672 entries for 'node' took 213ms for 65136 results (~98% of results are positive, mostly Exact match, Legacy method) 545 | ====== 546 | Filtering 66672 entries for 'nm' took 60ms for 65208 results (~98% of results are positive, Acronym match) 547 | Filtering 66672 entries for 'nm' took 56ms for 65208 results (~98% of results are positive, Acronym match, Legacy method) 548 | ====== 549 | Filtering 66672 entries for 'nodemodules' took 602ms for 65124 results (~98% positive + Fuzzy match, [Worst case scenario]) 550 | Filtering 66672 entries for 'nodemodules' took 123ms for 13334 results (~98% positive + Fuzzy match, [Mitigation]) 551 | Filtering 66672 entries for 'nodemodules' took 295ms for 65124 results (Legacy) 552 | ```` 553 | 554 | **Q:** My results are not as good. 555 | **A:** Run the benchmark a few time, it looks like some optimization kick in later. (Or CPU on energy efficient device might need to warm up before some optimization are activated) 556 | 557 | 558 | 559 | ## Prior Art 560 | 561 | [Chrome FilePathScore](https://chromium.googlesource.com/chromium/blink/+/master/Source/devtools/front_end/sources/FilePathScoreFunction.js#70) 562 | 563 | [Textmate ranker](https://github.com/textmate/textmate/blob/master/Frameworks/text/src/ranker.cc#L46) 564 | 565 | [VIM Command-T ](https://github.com/wincent/command-t/blob/master/ruby/command-t/match.c#L22) 566 | 567 | [Selecta](https://github.com/garybernhardt/selecta/blob/master/selecta#L415) 568 | 569 | [PeepOpen](https://github.com/topfunky/PeepOpen/blob/master/Classes/Models/FuzzyRecord.rb) 570 | 571 | [flx](https://github.com/lewang/flx) 572 | 573 | 574 | ## List of addressed issues 575 | 576 | ### Exact match vs directory depth. 577 | 578 | https://github.com/atom/fuzzaldrin/issues/18 579 | -> actionsServiceSpec 580 | 581 | https://github.com/atom/atom/issues/7783 582 | -> usa_spec 583 | 584 | ### Start of string VS directory depth 585 | 586 | https://github.com/atom/fuzzy-finder/issues/57#issuecomment-133531653 587 | -> notification 588 | 589 | https://github.com/atom/fuzzy-finder/issues/21#issuecomment-48795958 590 | -> video backbone 591 | 592 | ### Folder / file query 593 | 594 | https://github.com/atom/fuzzy-finder/issues/21#issue-29106280 595 | -> src/app vs destroy_discard 596 | 597 | https://github.com/atom/fuzzy-finder/issues/21#issuecomment-46920333 598 | -> email handler 599 | 600 | https://github.com/substantial/atomfiles/issues/43 601 | -> model user 602 | 603 | ### Spread/group vs directory depth 604 | 605 | https://github.com/atom/fuzzy-finder/issues/21#issuecomment-138664303 606 | -> controller core 607 | 608 | ### Initialism 609 | 610 | https://github.com/atom/fuzzy-finder/issues/57#issue-42120886 611 | -> itc switch / ImportanceTableCtrl 612 | 613 | https://github.com/atom/fuzzy-finder/issues/57#issuecomment-95623924 614 | -> application controller 615 | 616 | https://github.com/atom/fuzzaldrin/issues/21 617 | -> fft vs FilterFactorTests 618 | 619 | ### Accidental Acronym 620 | 621 | https://github.com/atom/command-palette/issues/28 622 | -> Install / Find Select All 623 | 624 | https://github.com/atom/fuzzaldrin/issues/20#issue-93279352 625 | -> Git Plus Stage Hunk / Git Plus Push 626 | 627 | 628 | ### Case sensitivity 629 | 630 | https://github.com/atom/autocomplete-plus/issues/42 631 | -> downloadThread / DownloadTask 632 | 633 | https://github.com/atom/fuzzaldrin/issues/17 634 | -> diagnostics / Diagnostic 635 | 636 | 637 | ### Optional Characters 638 | 639 | https://github.com/atom/fuzzy-finder/issues/91 640 | https://github.com/atom/fuzzaldrin/issues/24 641 | 642 | -> PHP Namespaces, let "\" match "/" 643 | -> (would be nice for config file in mixed OS environment too) 644 | 645 | https://github.com/atom/fuzzy-finder/pull/51 646 | 647 | -> Ruby Namespaces, let "::" match "/" 648 | 649 | https://github.com/atom/fuzzy-finder/issues/10 650 | -> SpaceRegex, let " " match "/" 651 | -> was already implemented, posted here to show parallel. 652 | 653 | 654 | ### Suggestions 655 | 656 | https://github.com/atom/fuzzy-finder/issues/21#issue-29106280 657 | -> we implement suggestion of score based on run length 658 | -> todo allows fuzzaldrin to support external knowledge. 659 | -------------------------------------------------------------------------------- /dist-browser/fuzzaldrin-plus.js: -------------------------------------------------------------------------------- 1 | /* fuzzaldrin-plus - v0.5.0 - @license: MIT; @author: Jean Christophe Roy; @site: https://github.com/jeancroy/fuzzaldrin-plus */ 2 | 3 | (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.fuzzaldrin = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 0 ? maxInners : candidates.length + 1; 26 | bKey = key != null; 27 | scoreProvider = usePathScoring ? pathScorer : scorer; 28 | for (_i = 0, _len = candidates.length; _i < _len; _i++) { 29 | candidate = candidates[_i]; 30 | string = bKey ? candidate[key] : candidate; 31 | if (!string) { 32 | continue; 33 | } 34 | score = scoreProvider.score(string, query, options); 35 | if (score > 0) { 36 | scoredCandidates.push({ 37 | candidate: candidate, 38 | score: score 39 | }); 40 | if (!--spotLeft) { 41 | break; 42 | } 43 | } 44 | } 45 | scoredCandidates.sort(sortCandidates); 46 | candidates = scoredCandidates.map(pluckCandidates); 47 | if (maxResults != null) { 48 | candidates = candidates.slice(0, maxResults); 49 | } 50 | return candidates; 51 | }; 52 | 53 | }).call(this); 54 | 55 | },{"./pathScorer":4,"./query":5,"./scorer":6}],2:[function(require,module,exports){ 56 | (function (process){ 57 | (function() { 58 | var Query, defaultPathSeparator, filter, matcher, parseOptions, pathScorer, preparedQueryCache, scorer; 59 | 60 | filter = require('./filter'); 61 | 62 | matcher = require('./matcher'); 63 | 64 | scorer = require('./scorer'); 65 | 66 | pathScorer = require('./pathScorer'); 67 | 68 | Query = require('./query'); 69 | 70 | preparedQueryCache = null; 71 | 72 | defaultPathSeparator = (typeof process !== "undefined" && process !== null ? process.platform : void 0) === "win32" ? '\\' : '/'; 73 | 74 | module.exports = { 75 | filter: function(candidates, query, options) { 76 | if (options == null) { 77 | options = {}; 78 | } 79 | if (!((query != null ? query.length : void 0) && (candidates != null ? candidates.length : void 0))) { 80 | return []; 81 | } 82 | options = parseOptions(options, query); 83 | return filter(candidates, query, options); 84 | }, 85 | score: function(string, query, options) { 86 | if (options == null) { 87 | options = {}; 88 | } 89 | if (!((string != null ? string.length : void 0) && (query != null ? query.length : void 0))) { 90 | return 0; 91 | } 92 | options = parseOptions(options, query); 93 | if (options.usePathScoring) { 94 | return pathScorer.score(string, query, options); 95 | } else { 96 | return scorer.score(string, query, options); 97 | } 98 | }, 99 | match: function(string, query, options) { 100 | var _i, _ref, _results; 101 | if (options == null) { 102 | options = {}; 103 | } 104 | if (!string) { 105 | return []; 106 | } 107 | if (!query) { 108 | return []; 109 | } 110 | if (string === query) { 111 | return (function() { 112 | _results = []; 113 | for (var _i = 0, _ref = string.length; 0 <= _ref ? _i < _ref : _i > _ref; 0 <= _ref ? _i++ : _i--){ _results.push(_i); } 114 | return _results; 115 | }).apply(this); 116 | } 117 | options = parseOptions(options, query); 118 | return matcher.match(string, query, options); 119 | }, 120 | wrap: function(string, query, options) { 121 | if (options == null) { 122 | options = {}; 123 | } 124 | if (!string) { 125 | return []; 126 | } 127 | if (!query) { 128 | return []; 129 | } 130 | options = parseOptions(options, query); 131 | return matcher.wrap(string, query, options); 132 | }, 133 | prepareQuery: function(query, options) { 134 | if (options == null) { 135 | options = {}; 136 | } 137 | options = parseOptions(options, query); 138 | return options.preparedQuery; 139 | } 140 | }; 141 | 142 | parseOptions = function(options, query) { 143 | if (options.allowErrors == null) { 144 | options.allowErrors = false; 145 | } 146 | if (options.usePathScoring == null) { 147 | options.usePathScoring = true; 148 | } 149 | if (options.useExtensionBonus == null) { 150 | options.useExtensionBonus = false; 151 | } 152 | if (options.pathSeparator == null) { 153 | options.pathSeparator = defaultPathSeparator; 154 | } 155 | if (options.optCharRegEx == null) { 156 | options.optCharRegEx = null; 157 | } 158 | if (options.wrap == null) { 159 | options.wrap = null; 160 | } 161 | if (options.preparedQuery == null) { 162 | options.preparedQuery = preparedQueryCache && preparedQueryCache.query === query ? preparedQueryCache : (preparedQueryCache = new Query(query, options)); 163 | } 164 | return options; 165 | }; 166 | 167 | }).call(this); 168 | 169 | }).call(this,require('_process')) 170 | },{"./filter":1,"./matcher":3,"./pathScorer":4,"./query":5,"./scorer":6,"_process":7}],3:[function(require,module,exports){ 171 | (function() { 172 | var basenameMatch, computeMatch, isMatch, isWordStart, match, mergeMatches, scoreAcronyms, scoreCharacter, scoreConsecutives, _ref; 173 | 174 | _ref = require('./scorer'), isMatch = _ref.isMatch, isWordStart = _ref.isWordStart, scoreConsecutives = _ref.scoreConsecutives, scoreCharacter = _ref.scoreCharacter, scoreAcronyms = _ref.scoreAcronyms; 175 | 176 | exports.match = match = function(string, query, options) { 177 | var allowErrors, baseMatches, matches, pathSeparator, preparedQuery, string_lw; 178 | allowErrors = options.allowErrors, preparedQuery = options.preparedQuery, pathSeparator = options.pathSeparator; 179 | if (!(allowErrors || isMatch(string, preparedQuery.core_lw, preparedQuery.core_up))) { 180 | return []; 181 | } 182 | string_lw = string.toLowerCase(); 183 | matches = computeMatch(string, string_lw, preparedQuery); 184 | if (matches.length === 0) { 185 | return matches; 186 | } 187 | if (string.indexOf(pathSeparator) > -1) { 188 | baseMatches = basenameMatch(string, string_lw, preparedQuery, pathSeparator); 189 | matches = mergeMatches(matches, baseMatches); 190 | } 191 | return matches; 192 | }; 193 | 194 | exports.wrap = function(string, query, options) { 195 | var matchIndex, matchPos, matchPositions, output, strPos, tagClass, tagClose, tagOpen, _ref1; 196 | if ((options.wrap != null)) { 197 | _ref1 = options.wrap, tagClass = _ref1.tagClass, tagOpen = _ref1.tagOpen, tagClose = _ref1.tagClose; 198 | } 199 | if (tagClass == null) { 200 | tagClass = 'highlight'; 201 | } 202 | if (tagOpen == null) { 203 | tagOpen = ''; 204 | } 205 | if (tagClose == null) { 206 | tagClose = ''; 207 | } 208 | if (string === query) { 209 | return tagOpen + string + tagClose; 210 | } 211 | matchPositions = match(string, query, options); 212 | if (matchPositions.length === 0) { 213 | return string; 214 | } 215 | output = ''; 216 | matchIndex = -1; 217 | strPos = 0; 218 | while (++matchIndex < matchPositions.length) { 219 | matchPos = matchPositions[matchIndex]; 220 | if (matchPos > strPos) { 221 | output += string.substring(strPos, matchPos); 222 | strPos = matchPos; 223 | } 224 | while (++matchIndex < matchPositions.length) { 225 | if (matchPositions[matchIndex] === matchPos + 1) { 226 | matchPos++; 227 | } else { 228 | matchIndex--; 229 | break; 230 | } 231 | } 232 | matchPos++; 233 | if (matchPos > strPos) { 234 | output += tagOpen; 235 | output += string.substring(strPos, matchPos); 236 | output += tagClose; 237 | strPos = matchPos; 238 | } 239 | } 240 | if (strPos <= string.length - 1) { 241 | output += string.substring(strPos); 242 | } 243 | return output; 244 | }; 245 | 246 | basenameMatch = function(subject, subject_lw, preparedQuery, pathSeparator) { 247 | var basePos, depth, end; 248 | end = subject.length - 1; 249 | while (subject[end] === pathSeparator) { 250 | end--; 251 | } 252 | basePos = subject.lastIndexOf(pathSeparator, end); 253 | if (basePos === -1) { 254 | return []; 255 | } 256 | depth = preparedQuery.depth; 257 | while (depth-- > 0) { 258 | basePos = subject.lastIndexOf(pathSeparator, basePos - 1); 259 | if (basePos === -1) { 260 | return []; 261 | } 262 | } 263 | basePos++; 264 | end++; 265 | return computeMatch(subject.slice(basePos, end), subject_lw.slice(basePos, end), preparedQuery, basePos); 266 | }; 267 | 268 | mergeMatches = function(a, b) { 269 | var ai, bj, i, j, m, n, out; 270 | m = a.length; 271 | n = b.length; 272 | if (n === 0) { 273 | return a.slice(); 274 | } 275 | if (m === 0) { 276 | return b.slice(); 277 | } 278 | i = -1; 279 | j = 0; 280 | bj = b[j]; 281 | out = []; 282 | while (++i < m) { 283 | ai = a[i]; 284 | while (bj <= ai && ++j < n) { 285 | if (bj < ai) { 286 | out.push(bj); 287 | } 288 | bj = b[j]; 289 | } 290 | out.push(ai); 291 | } 292 | while (j < n) { 293 | out.push(b[j++]); 294 | } 295 | return out; 296 | }; 297 | 298 | computeMatch = function(subject, subject_lw, preparedQuery, offset) { 299 | var DIAGONAL, LEFT, STOP, UP, acro_score, align, backtrack, csc_diag, csc_row, csc_score, i, j, m, matches, move, n, pos, query, query_lw, score, score_diag, score_row, score_up, si_lw, start, trace; 300 | if (offset == null) { 301 | offset = 0; 302 | } 303 | query = preparedQuery.query; 304 | query_lw = preparedQuery.query_lw; 305 | m = subject.length; 306 | n = query.length; 307 | acro_score = scoreAcronyms(subject, subject_lw, query, query_lw).score; 308 | score_row = new Array(n); 309 | csc_row = new Array(n); 310 | STOP = 0; 311 | UP = 1; 312 | LEFT = 2; 313 | DIAGONAL = 3; 314 | trace = new Array(m * n); 315 | pos = -1; 316 | j = -1; 317 | while (++j < n) { 318 | score_row[j] = 0; 319 | csc_row[j] = 0; 320 | } 321 | i = -1; 322 | while (++i < m) { 323 | score = 0; 324 | score_up = 0; 325 | csc_diag = 0; 326 | si_lw = subject_lw[i]; 327 | j = -1; 328 | while (++j < n) { 329 | csc_score = 0; 330 | align = 0; 331 | score_diag = score_up; 332 | if (query_lw[j] === si_lw) { 333 | start = isWordStart(i, subject, subject_lw); 334 | csc_score = csc_diag > 0 ? csc_diag : scoreConsecutives(subject, subject_lw, query, query_lw, i, j, start); 335 | align = score_diag + scoreCharacter(i, j, start, acro_score, csc_score); 336 | } 337 | score_up = score_row[j]; 338 | csc_diag = csc_row[j]; 339 | if (score > score_up) { 340 | move = LEFT; 341 | } else { 342 | score = score_up; 343 | move = UP; 344 | } 345 | if (align > score) { 346 | score = align; 347 | move = DIAGONAL; 348 | } else { 349 | csc_score = 0; 350 | } 351 | score_row[j] = score; 352 | csc_row[j] = csc_score; 353 | trace[++pos] = score > 0 ? move : STOP; 354 | } 355 | } 356 | i = m - 1; 357 | j = n - 1; 358 | pos = i * n + j; 359 | backtrack = true; 360 | matches = []; 361 | while (backtrack && i >= 0 && j >= 0) { 362 | switch (trace[pos]) { 363 | case UP: 364 | i--; 365 | pos -= n; 366 | break; 367 | case LEFT: 368 | j--; 369 | pos--; 370 | break; 371 | case DIAGONAL: 372 | matches.push(i + offset); 373 | j--; 374 | i--; 375 | pos -= n + 1; 376 | break; 377 | default: 378 | backtrack = false; 379 | } 380 | } 381 | matches.reverse(); 382 | return matches; 383 | }; 384 | 385 | }).call(this); 386 | 387 | },{"./scorer":6}],4:[function(require,module,exports){ 388 | (function() { 389 | var computeScore, countDir, file_coeff, getExtension, getExtensionScore, isMatch, scorePath, scoreSize, tau_depth, _ref; 390 | 391 | _ref = require('./scorer'), isMatch = _ref.isMatch, computeScore = _ref.computeScore, scoreSize = _ref.scoreSize; 392 | 393 | tau_depth = 13; 394 | 395 | file_coeff = 1.5; 396 | 397 | exports.score = function(string, query, options) { 398 | var allowErrors, preparedQuery, score, string_lw; 399 | preparedQuery = options.preparedQuery, allowErrors = options.allowErrors; 400 | if (!(allowErrors || isMatch(string, preparedQuery.core_lw, preparedQuery.core_up))) { 401 | return 0; 402 | } 403 | string_lw = string.toLowerCase(); 404 | score = computeScore(string, string_lw, preparedQuery); 405 | score = scorePath(string, string_lw, score, options); 406 | return Math.ceil(score); 407 | }; 408 | 409 | scorePath = function(subject, subject_lw, fullPathScore, options) { 410 | var alpha, basePathScore, basePos, depth, end, extAdjust, fileLength, pathSeparator, preparedQuery, useExtensionBonus; 411 | if (fullPathScore === 0) { 412 | return 0; 413 | } 414 | preparedQuery = options.preparedQuery, useExtensionBonus = options.useExtensionBonus, pathSeparator = options.pathSeparator; 415 | end = subject.length - 1; 416 | while (subject[end] === pathSeparator) { 417 | end--; 418 | } 419 | basePos = subject.lastIndexOf(pathSeparator, end); 420 | fileLength = end - basePos; 421 | extAdjust = 1.0; 422 | if (useExtensionBonus) { 423 | extAdjust += getExtensionScore(subject_lw, preparedQuery.ext, basePos, end, 2); 424 | fullPathScore *= extAdjust; 425 | } 426 | if (basePos === -1) { 427 | return fullPathScore; 428 | } 429 | depth = preparedQuery.depth; 430 | while (basePos > -1 && depth-- > 0) { 431 | basePos = subject.lastIndexOf(pathSeparator, basePos - 1); 432 | } 433 | basePathScore = basePos === -1 ? fullPathScore : extAdjust * computeScore(subject.slice(basePos + 1, end + 1), subject_lw.slice(basePos + 1, end + 1), preparedQuery); 434 | alpha = 0.5 * tau_depth / (tau_depth + countDir(subject, end + 1, pathSeparator)); 435 | return alpha * basePathScore + (1 - alpha) * fullPathScore * scoreSize(0, file_coeff * fileLength); 436 | }; 437 | 438 | exports.countDir = countDir = function(path, end, pathSeparator) { 439 | var count, i; 440 | if (end < 1) { 441 | return 0; 442 | } 443 | count = 0; 444 | i = -1; 445 | while (++i < end && path[i] === pathSeparator) { 446 | continue; 447 | } 448 | while (++i < end) { 449 | if (path[i] === pathSeparator) { 450 | count++; 451 | while (++i < end && path[i] === pathSeparator) { 452 | continue; 453 | } 454 | } 455 | } 456 | return count; 457 | }; 458 | 459 | exports.getExtension = getExtension = function(str) { 460 | var pos; 461 | pos = str.lastIndexOf("."); 462 | if (pos < 0) { 463 | return ""; 464 | } else { 465 | return str.substr(pos + 1); 466 | } 467 | }; 468 | 469 | getExtensionScore = function(candidate, ext, startPos, endPos, maxDepth) { 470 | var m, matched, n, pos; 471 | if (!ext.length) { 472 | return 0; 473 | } 474 | pos = candidate.lastIndexOf(".", endPos); 475 | if (!(pos > startPos)) { 476 | return 0; 477 | } 478 | n = ext.length; 479 | m = endPos - pos; 480 | if (m < n) { 481 | n = m; 482 | m = ext.length; 483 | } 484 | pos++; 485 | matched = -1; 486 | while (++matched < n) { 487 | if (candidate[pos + matched] !== ext[matched]) { 488 | break; 489 | } 490 | } 491 | if (matched === 0 && maxDepth > 0) { 492 | return 0.9 * getExtensionScore(candidate, ext, startPos, pos - 2, maxDepth - 1); 493 | } 494 | return matched / m; 495 | }; 496 | 497 | }).call(this); 498 | 499 | },{"./scorer":6}],5:[function(require,module,exports){ 500 | (function() { 501 | var Query, coreChars, countDir, getCharCodes, getExtension, opt_char_re, truncatedUpperCase, _ref; 502 | 503 | _ref = require("./pathScorer"), countDir = _ref.countDir, getExtension = _ref.getExtension; 504 | 505 | module.exports = Query = (function() { 506 | function Query(query, _arg) { 507 | var optCharRegEx, pathSeparator, _ref1; 508 | _ref1 = _arg != null ? _arg : {}, optCharRegEx = _ref1.optCharRegEx, pathSeparator = _ref1.pathSeparator; 509 | if (!(query && query.length)) { 510 | return null; 511 | } 512 | this.query = query; 513 | this.query_lw = query.toLowerCase(); 514 | this.core = coreChars(query, optCharRegEx); 515 | this.core_lw = this.core.toLowerCase(); 516 | this.core_up = truncatedUpperCase(this.core); 517 | this.depth = countDir(query, query.length, pathSeparator); 518 | this.ext = getExtension(this.query_lw); 519 | this.charCodes = getCharCodes(this.query_lw); 520 | } 521 | 522 | return Query; 523 | 524 | })(); 525 | 526 | opt_char_re = /[ _\-:\/\\]/g; 527 | 528 | coreChars = function(query, optCharRegEx) { 529 | if (optCharRegEx == null) { 530 | optCharRegEx = opt_char_re; 531 | } 532 | return query.replace(optCharRegEx, ''); 533 | }; 534 | 535 | truncatedUpperCase = function(str) { 536 | var char, upper, _i, _len; 537 | upper = ""; 538 | for (_i = 0, _len = str.length; _i < _len; _i++) { 539 | char = str[_i]; 540 | upper += char.toUpperCase()[0]; 541 | } 542 | return upper; 543 | }; 544 | 545 | getCharCodes = function(str) { 546 | var charCodes, i, len; 547 | len = str.length; 548 | i = -1; 549 | charCodes = []; 550 | while (++i < len) { 551 | charCodes[str.charCodeAt(i)] = true; 552 | } 553 | return charCodes; 554 | }; 555 | 556 | }).call(this); 557 | 558 | },{"./pathScorer":4}],6:[function(require,module,exports){ 559 | (function() { 560 | var AcronymResult, computeScore, emptyAcronymResult, isAcronymFullWord, isMatch, isSeparator, isWordEnd, isWordStart, miss_coeff, pos_bonus, scoreAcronyms, scoreCharacter, scoreConsecutives, scoreExact, scoreExactMatch, scorePattern, scorePosition, scoreSize, tau_size, wm; 561 | 562 | wm = 150; 563 | 564 | pos_bonus = 20; 565 | 566 | tau_size = 85; 567 | 568 | miss_coeff = 0.75; 569 | 570 | exports.score = function(string, query, options) { 571 | var allowErrors, preparedQuery, score, string_lw; 572 | preparedQuery = options.preparedQuery, allowErrors = options.allowErrors; 573 | if (!(allowErrors || isMatch(string, preparedQuery.core_lw, preparedQuery.core_up))) { 574 | return 0; 575 | } 576 | string_lw = string.toLowerCase(); 577 | score = computeScore(string, string_lw, preparedQuery); 578 | return Math.ceil(score); 579 | }; 580 | 581 | exports.isMatch = isMatch = function(subject, query_lw, query_up) { 582 | var i, j, m, n, qj_lw, qj_up, si; 583 | m = subject.length; 584 | n = query_lw.length; 585 | if (!m || n > m) { 586 | return false; 587 | } 588 | i = -1; 589 | j = -1; 590 | while (++j < n) { 591 | qj_lw = query_lw.charCodeAt(j); 592 | qj_up = query_up.charCodeAt(j); 593 | while (++i < m) { 594 | si = subject.charCodeAt(i); 595 | if (si === qj_lw || si === qj_up) { 596 | break; 597 | } 598 | } 599 | if (i === m) { 600 | return false; 601 | } 602 | } 603 | return true; 604 | }; 605 | 606 | exports.computeScore = computeScore = function(subject, subject_lw, preparedQuery) { 607 | var acro, acro_score, align, csc_diag, csc_row, csc_score, csc_should_rebuild, i, j, m, miss_budget, miss_left, n, pos, query, query_lw, record_miss, score, score_diag, score_row, score_up, si_lw, start, sz; 608 | query = preparedQuery.query; 609 | query_lw = preparedQuery.query_lw; 610 | m = subject.length; 611 | n = query.length; 612 | acro = scoreAcronyms(subject, subject_lw, query, query_lw); 613 | acro_score = acro.score; 614 | if (acro.count === n) { 615 | return scoreExact(n, m, acro_score, acro.pos); 616 | } 617 | pos = subject_lw.indexOf(query_lw); 618 | if (pos > -1) { 619 | return scoreExactMatch(subject, subject_lw, query, query_lw, pos, n, m); 620 | } 621 | score_row = new Array(n); 622 | csc_row = new Array(n); 623 | sz = scoreSize(n, m); 624 | miss_budget = Math.ceil(miss_coeff * n) + 5; 625 | miss_left = miss_budget; 626 | csc_should_rebuild = true; 627 | j = -1; 628 | while (++j < n) { 629 | score_row[j] = 0; 630 | csc_row[j] = 0; 631 | } 632 | i = -1; 633 | while (++i < m) { 634 | si_lw = subject_lw[i]; 635 | if (!si_lw.charCodeAt(0) in preparedQuery.charCodes) { 636 | if (csc_should_rebuild) { 637 | j = -1; 638 | while (++j < n) { 639 | csc_row[j] = 0; 640 | } 641 | csc_should_rebuild = false; 642 | } 643 | continue; 644 | } 645 | score = 0; 646 | score_diag = 0; 647 | csc_diag = 0; 648 | record_miss = true; 649 | csc_should_rebuild = true; 650 | j = -1; 651 | while (++j < n) { 652 | score_up = score_row[j]; 653 | if (score_up > score) { 654 | score = score_up; 655 | } 656 | csc_score = 0; 657 | if (query_lw[j] === si_lw) { 658 | start = isWordStart(i, subject, subject_lw); 659 | csc_score = csc_diag > 0 ? csc_diag : scoreConsecutives(subject, subject_lw, query, query_lw, i, j, start); 660 | align = score_diag + scoreCharacter(i, j, start, acro_score, csc_score); 661 | if (align > score) { 662 | score = align; 663 | miss_left = miss_budget; 664 | } else { 665 | if (record_miss && --miss_left <= 0) { 666 | return Math.max(score, score_row[n - 1]) * sz; 667 | } 668 | record_miss = false; 669 | } 670 | } 671 | score_diag = score_up; 672 | csc_diag = csc_row[j]; 673 | csc_row[j] = csc_score; 674 | score_row[j] = score; 675 | } 676 | } 677 | score = score_row[n - 1]; 678 | return score * sz; 679 | }; 680 | 681 | exports.isWordStart = isWordStart = function(pos, subject, subject_lw) { 682 | var curr_s, prev_s; 683 | if (pos === 0) { 684 | return true; 685 | } 686 | curr_s = subject[pos]; 687 | prev_s = subject[pos - 1]; 688 | return isSeparator(prev_s) || (curr_s !== subject_lw[pos] && prev_s === subject_lw[pos - 1]); 689 | }; 690 | 691 | exports.isWordEnd = isWordEnd = function(pos, subject, subject_lw, len) { 692 | var curr_s, next_s; 693 | if (pos === len - 1) { 694 | return true; 695 | } 696 | curr_s = subject[pos]; 697 | next_s = subject[pos + 1]; 698 | return isSeparator(next_s) || (curr_s === subject_lw[pos] && next_s !== subject_lw[pos + 1]); 699 | }; 700 | 701 | isSeparator = function(c) { 702 | return c === ' ' || c === '.' || c === '-' || c === '_' || c === '/' || c === '\\'; 703 | }; 704 | 705 | scorePosition = function(pos) { 706 | var sc; 707 | if (pos < pos_bonus) { 708 | sc = pos_bonus - pos; 709 | return 100 + sc * sc; 710 | } else { 711 | return Math.max(100 + pos_bonus - pos, 0); 712 | } 713 | }; 714 | 715 | exports.scoreSize = scoreSize = function(n, m) { 716 | return tau_size / (tau_size + Math.abs(m - n)); 717 | }; 718 | 719 | scoreExact = function(n, m, quality, pos) { 720 | return 2 * n * (wm * quality + scorePosition(pos)) * scoreSize(n, m); 721 | }; 722 | 723 | exports.scorePattern = scorePattern = function(count, len, sameCase, start, end) { 724 | var bonus, sz; 725 | sz = count; 726 | bonus = 6; 727 | if (sameCase === count) { 728 | bonus += 2; 729 | } 730 | if (start) { 731 | bonus += 3; 732 | } 733 | if (end) { 734 | bonus += 1; 735 | } 736 | if (count === len) { 737 | if (start) { 738 | if (sameCase === len) { 739 | sz += 2; 740 | } else { 741 | sz += 1; 742 | } 743 | } 744 | if (end) { 745 | bonus += 1; 746 | } 747 | } 748 | return sameCase + sz * (sz + bonus); 749 | }; 750 | 751 | exports.scoreCharacter = scoreCharacter = function(i, j, start, acro_score, csc_score) { 752 | var posBonus; 753 | posBonus = scorePosition(i); 754 | if (start) { 755 | return posBonus + wm * ((acro_score > csc_score ? acro_score : csc_score) + 10); 756 | } 757 | return posBonus + wm * csc_score; 758 | }; 759 | 760 | exports.scoreConsecutives = scoreConsecutives = function(subject, subject_lw, query, query_lw, i, j, startOfWord) { 761 | var k, m, mi, n, nj, sameCase, sz; 762 | m = subject.length; 763 | n = query.length; 764 | mi = m - i; 765 | nj = n - j; 766 | k = mi < nj ? mi : nj; 767 | sameCase = 0; 768 | sz = 0; 769 | if (query[j] === subject[i]) { 770 | sameCase++; 771 | } 772 | while (++sz < k && query_lw[++j] === subject_lw[++i]) { 773 | if (query[j] === subject[i]) { 774 | sameCase++; 775 | } 776 | } 777 | if (sz < k) { 778 | i--; 779 | } 780 | if (sz === 1) { 781 | return 1 + 2 * sameCase; 782 | } 783 | return scorePattern(sz, n, sameCase, startOfWord, isWordEnd(i, subject, subject_lw, m)); 784 | }; 785 | 786 | exports.scoreExactMatch = scoreExactMatch = function(subject, subject_lw, query, query_lw, pos, n, m) { 787 | var end, i, pos2, sameCase, start; 788 | start = isWordStart(pos, subject, subject_lw); 789 | if (!start) { 790 | pos2 = subject_lw.indexOf(query_lw, pos + 1); 791 | if (pos2 > -1) { 792 | start = isWordStart(pos2, subject, subject_lw); 793 | if (start) { 794 | pos = pos2; 795 | } 796 | } 797 | } 798 | i = -1; 799 | sameCase = 0; 800 | while (++i < n) { 801 | if (query[pos + i] === subject[i]) { 802 | sameCase++; 803 | } 804 | } 805 | end = isWordEnd(pos + n - 1, subject, subject_lw, m); 806 | return scoreExact(n, m, scorePattern(n, n, sameCase, start, end), pos); 807 | }; 808 | 809 | AcronymResult = (function() { 810 | function AcronymResult(score, pos, count) { 811 | this.score = score; 812 | this.pos = pos; 813 | this.count = count; 814 | } 815 | 816 | return AcronymResult; 817 | 818 | })(); 819 | 820 | emptyAcronymResult = new AcronymResult(0, 0.1, 0); 821 | 822 | exports.scoreAcronyms = scoreAcronyms = function(subject, subject_lw, query, query_lw) { 823 | var count, fullWord, i, j, m, n, qj_lw, sameCase, score, sepCount, sumPos; 824 | m = subject.length; 825 | n = query.length; 826 | if (!(m > 1 && n > 1)) { 827 | return emptyAcronymResult; 828 | } 829 | count = 0; 830 | sepCount = 0; 831 | sumPos = 0; 832 | sameCase = 0; 833 | i = -1; 834 | j = -1; 835 | while (++j < n) { 836 | qj_lw = query_lw[j]; 837 | if (isSeparator(qj_lw)) { 838 | i = subject_lw.indexOf(qj_lw, i + 1); 839 | if (i > -1) { 840 | sepCount++; 841 | continue; 842 | } else { 843 | break; 844 | } 845 | } 846 | while (++i < m) { 847 | if (qj_lw === subject_lw[i] && isWordStart(i, subject, subject_lw)) { 848 | if (query[j] === subject[i]) { 849 | sameCase++; 850 | } 851 | sumPos += i; 852 | count++; 853 | break; 854 | } 855 | } 856 | if (i === m) { 857 | break; 858 | } 859 | } 860 | if (count < 2) { 861 | return emptyAcronymResult; 862 | } 863 | fullWord = count === n ? isAcronymFullWord(subject, subject_lw, query, count) : false; 864 | score = scorePattern(count, n, sameCase, true, fullWord); 865 | return new AcronymResult(score, sumPos / count, count + sepCount); 866 | }; 867 | 868 | isAcronymFullWord = function(subject, subject_lw, query, nbAcronymInQuery) { 869 | var count, i, m, n; 870 | m = subject.length; 871 | n = query.length; 872 | count = 0; 873 | if (m > 12 * n) { 874 | return false; 875 | } 876 | i = -1; 877 | while (++i < m) { 878 | if (isWordStart(i, subject, subject_lw) && ++count > nbAcronymInQuery) { 879 | return false; 880 | } 881 | } 882 | return true; 883 | }; 884 | 885 | }).call(this); 886 | 887 | },{}],7:[function(require,module,exports){ 888 | // shim for using process in browser 889 | var process = module.exports = {}; 890 | 891 | // cached from whatever global is present so that test runners that stub it 892 | // don't break things. But we need to wrap it in a try catch in case it is 893 | // wrapped in strict mode code which doesn't define any globals. It's inside a 894 | // function because try/catches deoptimize in certain engines. 895 | 896 | var cachedSetTimeout; 897 | var cachedClearTimeout; 898 | 899 | function defaultSetTimout() { 900 | throw new Error('setTimeout has not been defined'); 901 | } 902 | function defaultClearTimeout () { 903 | throw new Error('clearTimeout has not been defined'); 904 | } 905 | (function () { 906 | try { 907 | if (typeof setTimeout === 'function') { 908 | cachedSetTimeout = setTimeout; 909 | } else { 910 | cachedSetTimeout = defaultSetTimout; 911 | } 912 | } catch (e) { 913 | cachedSetTimeout = defaultSetTimout; 914 | } 915 | try { 916 | if (typeof clearTimeout === 'function') { 917 | cachedClearTimeout = clearTimeout; 918 | } else { 919 | cachedClearTimeout = defaultClearTimeout; 920 | } 921 | } catch (e) { 922 | cachedClearTimeout = defaultClearTimeout; 923 | } 924 | } ()) 925 | function runTimeout(fun) { 926 | if (cachedSetTimeout === setTimeout) { 927 | //normal enviroments in sane situations 928 | return setTimeout(fun, 0); 929 | } 930 | // if setTimeout wasn't available but was latter defined 931 | if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) { 932 | cachedSetTimeout = setTimeout; 933 | return setTimeout(fun, 0); 934 | } 935 | try { 936 | // when when somebody has screwed with setTimeout but no I.E. maddness 937 | return cachedSetTimeout(fun, 0); 938 | } catch(e){ 939 | try { 940 | // When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally 941 | return cachedSetTimeout.call(null, fun, 0); 942 | } catch(e){ 943 | // same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error 944 | return cachedSetTimeout.call(this, fun, 0); 945 | } 946 | } 947 | 948 | 949 | } 950 | function runClearTimeout(marker) { 951 | if (cachedClearTimeout === clearTimeout) { 952 | //normal enviroments in sane situations 953 | return clearTimeout(marker); 954 | } 955 | // if clearTimeout wasn't available but was latter defined 956 | if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) { 957 | cachedClearTimeout = clearTimeout; 958 | return clearTimeout(marker); 959 | } 960 | try { 961 | // when when somebody has screwed with setTimeout but no I.E. maddness 962 | return cachedClearTimeout(marker); 963 | } catch (e){ 964 | try { 965 | // When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally 966 | return cachedClearTimeout.call(null, marker); 967 | } catch (e){ 968 | // same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error. 969 | // Some versions of I.E. have different rules for clearTimeout vs setTimeout 970 | return cachedClearTimeout.call(this, marker); 971 | } 972 | } 973 | 974 | 975 | 976 | } 977 | var queue = []; 978 | var draining = false; 979 | var currentQueue; 980 | var queueIndex = -1; 981 | 982 | function cleanUpNextTick() { 983 | if (!draining || !currentQueue) { 984 | return; 985 | } 986 | draining = false; 987 | if (currentQueue.length) { 988 | queue = currentQueue.concat(queue); 989 | } else { 990 | queueIndex = -1; 991 | } 992 | if (queue.length) { 993 | drainQueue(); 994 | } 995 | } 996 | 997 | function drainQueue() { 998 | if (draining) { 999 | return; 1000 | } 1001 | var timeout = runTimeout(cleanUpNextTick); 1002 | draining = true; 1003 | 1004 | var len = queue.length; 1005 | while(len) { 1006 | currentQueue = queue; 1007 | queue = []; 1008 | while (++queueIndex < len) { 1009 | if (currentQueue) { 1010 | currentQueue[queueIndex].run(); 1011 | } 1012 | } 1013 | queueIndex = -1; 1014 | len = queue.length; 1015 | } 1016 | currentQueue = null; 1017 | draining = false; 1018 | runClearTimeout(timeout); 1019 | } 1020 | 1021 | process.nextTick = function (fun) { 1022 | var args = new Array(arguments.length - 1); 1023 | if (arguments.length > 1) { 1024 | for (var i = 1; i < arguments.length; i++) { 1025 | args[i - 1] = arguments[i]; 1026 | } 1027 | } 1028 | queue.push(new Item(fun, args)); 1029 | if (queue.length === 1 && !draining) { 1030 | runTimeout(drainQueue); 1031 | } 1032 | }; 1033 | 1034 | // v8 likes predictible objects 1035 | function Item(fun, array) { 1036 | this.fun = fun; 1037 | this.array = array; 1038 | } 1039 | Item.prototype.run = function () { 1040 | this.fun.apply(null, this.array); 1041 | }; 1042 | process.title = 'browser'; 1043 | process.browser = true; 1044 | process.env = {}; 1045 | process.argv = []; 1046 | process.version = ''; // empty string to avoid regexp issues 1047 | process.versions = {}; 1048 | 1049 | function noop() {} 1050 | 1051 | process.on = noop; 1052 | process.addListener = noop; 1053 | process.once = noop; 1054 | process.off = noop; 1055 | process.removeListener = noop; 1056 | process.removeAllListeners = noop; 1057 | process.emit = noop; 1058 | process.prependListener = noop; 1059 | process.prependOnceListener = noop; 1060 | 1061 | process.listeners = function (name) { return [] } 1062 | 1063 | process.binding = function (name) { 1064 | throw new Error('process.binding is not supported'); 1065 | }; 1066 | 1067 | process.cwd = function () { return '/' }; 1068 | process.chdir = function (dir) { 1069 | throw new Error('process.chdir is not supported'); 1070 | }; 1071 | process.umask = function() { return 0; }; 1072 | 1073 | },{}]},{},[2])(2) 1074 | }); --------------------------------------------------------------------------------