├── .istanbul.yml ├── .gitignore ├── spec ├── support │ └── jasmine.json └── string-similarity.spec.js ├── .eslintrc.json ├── wallaby.config.js ├── .travis.yml ├── tsconfig.json ├── tslint.json ├── dist ├── string-similarity.d.ts ├── string-similarity.js.map └── string-similarity.js ├── LICENSE.md ├── package.json ├── src └── string-similarity.ts └── README.md /.istanbul.yml: -------------------------------------------------------------------------------- 1 | instrumentation: 2 | excludes: ['wallaby.config.js'] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | .DS_Store 3 | /coverage/ 4 | package-lock.json -------------------------------------------------------------------------------- /spec/support/jasmine.json: -------------------------------------------------------------------------------- 1 | { 2 | "spec_dir": "spec", 3 | "spec_files": [ 4 | "**/*[sS]pec.js" 5 | ], 6 | "stopSpecOnExpectationFailure": false 7 | } 8 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "eslint:recommended", 3 | "parser": "babel-eslint", 4 | "rules": { 5 | "semi": ["error", "always"] 6 | } 7 | } -------------------------------------------------------------------------------- /wallaby.config.js: -------------------------------------------------------------------------------- 1 | /* globals module */ 2 | module.exports = function () { 3 | return { 4 | files: [ 5 | 'dist/**/*.js' 6 | ], 7 | 8 | tests: [ 9 | 'spec/**/*spec.js' 10 | ], 11 | 12 | env: {type: 'node'} 13 | }; 14 | }; -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '10.4' 4 | before_install: 5 | - pip install --user codecov 6 | - npm install -g typescript 7 | script: 8 | - npm test 9 | after_success: 10 | - codecov --file coverage/lcov.info --disable search -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es5", 5 | "sourceMap": true, 6 | "rootDir": "src", 7 | "strict": true, 8 | "outDir": "dist", 9 | "declaration": true, 10 | "lib": [ 11 | "es5","es2015.collection" 12 | ] 13 | } 14 | } -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaultSeverity": "error", 3 | "extends": [ 4 | "tslint:recommended" 5 | ], 6 | "jsRules": {}, 7 | "rules": { 8 | "indent": [true, "tabs"], 9 | "curly": [true, "as-needed"], 10 | "max-line-length": [true, 150] 11 | }, 12 | "rulesDirectory": [] 13 | } -------------------------------------------------------------------------------- /dist/string-similarity.d.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Calculate similarity between two strings 3 | * @param {string} str1 First string to match 4 | * @param {string} str2 Second string to match 5 | * @param {number} [substringLength=2] Optional. Length of substring to be used in calculating similarity. Default 2. 6 | * @param {boolean} [caseSensitive=false] Optional. Whether you want to consider case in string matching. Default false; 7 | * @returns Number between 0 and 1, with 0 being a low match score. 8 | */ 9 | export declare const stringSimilarity: (str1: string, str2: string, substringLength?: number, caseSensitive?: boolean) => number; 10 | export default stringSimilarity; 11 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2018 Stephen Brown 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "string-similarity-js", 3 | "version": "2.1.4", 4 | "description": "Calculates similarity between two strings", 5 | "keywords": [ 6 | "fuzzy search", 7 | "string similarity", 8 | "dice coefficient", 9 | "dice's", 10 | "sorenson-dice", 11 | "fuzzy", 12 | "string match", 13 | "string", 14 | "typos", 15 | "misspell", 16 | "misspelling", 17 | "compare strings" 18 | ], 19 | "main": "dist/string-similarity.js", 20 | "scripts": { 21 | "test": "istanbul cover --include-all-sources jasmine-node spec" 22 | }, 23 | "repository": { 24 | "type": "git", 25 | "url": "git+https://github.com/stephenjjbrown/string-similarity-js.git" 26 | }, 27 | "author": "Stephen Brown", 28 | "license": "MIT", 29 | "bugs": { 30 | "url": "https://github.com/stephenjjbrown/string-similarity-js/issues" 31 | }, 32 | "homepage": "https://github.com/stephenjjbrown/string-similarity-js#readme", 33 | "devDependencies": { 34 | "@types/chai": "^4.2.21", 35 | "@types/jasmine": "^3.8.2", 36 | "babel-eslint": "^10.0.1", 37 | "chai": "^4.3.4", 38 | "eslint": "^7.32.0", 39 | "istanbul": "^0.4.5", 40 | "jasmine-node": "^3.0.0", 41 | "tslint": "^5.20.1", 42 | "typescript": "^4.3.5" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /spec/string-similarity.spec.js: -------------------------------------------------------------------------------- 1 | /*global require describe it*/ 2 | var { expect } = require("chai"); 3 | var { stringSimilarity } = require("../dist/string-similarity"); 4 | 5 | describe("stringSimilarity", () => { 6 | 7 | it("Should return 1 for exact strings", () => { 8 | expect(stringSimilarity("String", "String")).to.equal(1); 9 | expect(stringSimilarity("So", "So", 2)).to.equal(1); 10 | }); 11 | 12 | it("Should return 0 if strings are exact but substring length is larger", () => { 13 | expect(stringSimilarity("So", "So", 3)).to.equal(0); 14 | }); 15 | 16 | it("Should return 0 if either string is empty", () => { 17 | expect(stringSimilarity("String", "")).to.equal(0); 18 | expect(stringSimilarity("", "String")).to.equal(0); 19 | expect(stringSimilarity("", "")).to.equal(0); 20 | }); 21 | 22 | it("Should be case insensitive by default", () => { 23 | expect(stringSimilarity("TEST", "test")).to.equal(1); 24 | }); 25 | 26 | it("Should be case sensitive if set", () => { 27 | expect(stringSimilarity("TEST", "test", undefined, true)).to.equal(0); 28 | }); 29 | 30 | it("Should return strong match for rearranged words", () => { 31 | expect(stringSimilarity("Lorem ipsum dolor", "Dolor lorem ipsum")).to.be.above(0.75); 32 | }); 33 | 34 | it("Should return strong match for misspellings", () => { 35 | expect(stringSimilarity("Lorem ipsum dolor", "Lorem ipsum dlr")).to.be.above(0.75); 36 | }); 37 | }); -------------------------------------------------------------------------------- /dist/string-similarity.js.map: -------------------------------------------------------------------------------- 1 | {"version":3,"file":"string-similarity.js","sourceRoot":"","sources":["../src/string-similarity.ts"],"names":[],"mappings":";;;AAAA,yBAAyB;AACzB;;;;;;;GAOG;AACI,IAAM,gBAAgB,GAAG,UAAC,IAAY,EAAE,IAAY,EAAE,eAA2B,EAAE,aAA8B;IAA3D,gCAAA,EAAA,mBAA2B;IAAE,8BAAA,EAAA,qBAA8B;IACvH,IAAI,CAAC,aAAa,EAAE;QACnB,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAC1B,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;KAC1B;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe;QACjE,OAAO,CAAC,CAAC;IAEV,IAAM,GAAG,GAAG,IAAI,GAAG,EAAE,CAAC;IACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,eAAe,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC7D,IAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC;QAChD,GAAG,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KAC9D;IAED,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,eAAe,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC7D,IAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC;QAChD,IAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACtD,IAAI,KAAK,GAAG,CAAC,EAAE;YACd,GAAG,CAAC,GAAG,CAAC,OAAO,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YAC5B,KAAK,EAAE,CAAC;SACR;KACD;IAED,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;AAChF,CAAC,CAAC;AA1BW,QAAA,gBAAgB,oBA0B3B;AACF,kBAAe,wBAAgB,CAAC"} -------------------------------------------------------------------------------- /src/string-similarity.ts: -------------------------------------------------------------------------------- 1 | /* global exports, Map */ 2 | /** 3 | * Calculate similarity between two strings 4 | * @param {string} str1 First string to match 5 | * @param {string} str2 Second string to match 6 | * @param {number} [substringLength=2] Optional. Length of substring to be used in calculating similarity. Default 2. 7 | * @param {boolean} [caseSensitive=false] Optional. Whether you want to consider case in string matching. Default false; 8 | * @returns Number between 0 and 1, with 0 being a low match score. 9 | */ 10 | export const stringSimilarity = (str1: string, str2: string, substringLength: number = 2, caseSensitive: boolean = false) => { 11 | if (!caseSensitive) { 12 | str1 = str1.toLowerCase(); 13 | str2 = str2.toLowerCase(); 14 | } 15 | 16 | if (str1.length < substringLength || str2.length < substringLength) 17 | return 0; 18 | 19 | const map = new Map(); 20 | for (let i = 0; i < str1.length - (substringLength - 1); i++) { 21 | const substr1 = str1.substr(i, substringLength); 22 | map.set(substr1, map.has(substr1) ? map.get(substr1) + 1 : 1); 23 | } 24 | 25 | let match = 0; 26 | for (let j = 0; j < str2.length - (substringLength - 1); j++) { 27 | const substr2 = str2.substr(j, substringLength); 28 | const count = map.has(substr2) ? map.get(substr2) : 0; 29 | if (count > 0) { 30 | map.set(substr2, count - 1); 31 | match++; 32 | } 33 | } 34 | 35 | return (match * 2) / (str1.length + str2.length - ((substringLength - 1) * 2)); 36 | }; 37 | export default stringSimilarity; 38 | -------------------------------------------------------------------------------- /dist/string-similarity.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | exports.stringSimilarity = void 0; 4 | /* global exports, Map */ 5 | /** 6 | * Calculate similarity between two strings 7 | * @param {string} str1 First string to match 8 | * @param {string} str2 Second string to match 9 | * @param {number} [substringLength=2] Optional. Length of substring to be used in calculating similarity. Default 2. 10 | * @param {boolean} [caseSensitive=false] Optional. Whether you want to consider case in string matching. Default false; 11 | * @returns Number between 0 and 1, with 0 being a low match score. 12 | */ 13 | var stringSimilarity = function (str1, str2, substringLength, caseSensitive) { 14 | if (substringLength === void 0) { substringLength = 2; } 15 | if (caseSensitive === void 0) { caseSensitive = false; } 16 | if (!caseSensitive) { 17 | str1 = str1.toLowerCase(); 18 | str2 = str2.toLowerCase(); 19 | } 20 | if (str1.length < substringLength || str2.length < substringLength) 21 | return 0; 22 | var map = new Map(); 23 | for (var i = 0; i < str1.length - (substringLength - 1); i++) { 24 | var substr1 = str1.substr(i, substringLength); 25 | map.set(substr1, map.has(substr1) ? map.get(substr1) + 1 : 1); 26 | } 27 | var match = 0; 28 | for (var j = 0; j < str2.length - (substringLength - 1); j++) { 29 | var substr2 = str2.substr(j, substringLength); 30 | var count = map.has(substr2) ? map.get(substr2) : 0; 31 | if (count > 0) { 32 | map.set(substr2, count - 1); 33 | match++; 34 | } 35 | } 36 | return (match * 2) / (str1.length + str2.length - ((substringLength - 1) * 2)); 37 | }; 38 | exports.stringSimilarity = stringSimilarity; 39 | exports.default = exports.stringSimilarity; 40 | //# sourceMappingURL=string-similarity.js.map -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/stephenjjbrown/string-similarity-js.svg?branch=master)](https://travis-ci.org/stephenjjbrown/string-similarity-js) 2 | [![codecov](https://codecov.io/gh/stephenjjbrown/string-similarity-js/branch/master/graph/badge.svg)](https://codecov.io/gh/stephenjjbrown/string-similarity-js) 3 | [![Wallaby.js](https://img.shields.io/badge/wallaby.js-configured-green.svg)](https://wallabyjs.com) 4 | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/1f8dbc1fcb584d818c21869f4742f936)](https://www.codacy.com/gh/stephenjjbrown/string-similarity-js/dashboard?utm_source=github.com&utm_medium=referral&utm_content=stephenjjbrown/string-similarity-js&utm_campaign=Badge_Grade) 5 | 6 | # String Similarity 7 | 8 | A simple, lightweight (~700 bytes minified) string similarity function based on comparing the number of bigrams in common between any two strings. Returns a score between 0 and 1 indicating the strength of the match. 9 | 10 | Based on the [Sørensen–Dice coefficient](https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient), this algorithm is most effective at detecting rearranged words or misspellings. It tends to be less effective with very short strings, unless perhaps you switch to comparing individual characters in common instead of bigrams. 11 | 12 | It is case insensitive unless you specify otherwise. Does not ignore punctuation or spaces. In some cases, removing punctuation beforehand may improve accuracy. 13 | 14 | ### Update 15 | Version 2.0 optimizes the algorithm from O(n2) time complexity to O(n), and switches from using an array for bigrams to a Map, which was found to be substantially faster in performance tests. 16 | 17 | ## Usage 18 | 19 | ### Requirements 20 | This library uses built-in [Map](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) data structure for optimal performance. Therefore, it requires at least IE11 or a polyfill for Map. 21 | 22 | ### Examples 23 | 24 | ```typescript 25 | import { stringSimilarity } from "string-similarity-js"; 26 | 27 | // Rearranged words 28 | stringSimilarity("Lorem ipsum", "Ipsum lorem") 29 | // Returns a score of 0.9 30 | 31 | // Typos 32 | stringSimilarity("The quick brown fox jumps over the lazy dog", "The quck brown fx jumps over the lazy dog") 33 | // 0.92 34 | 35 | // Even more different 36 | stringSimilarity("The quick brown fox jumps over the lazy dog", "The quack brain fax jomps odor the lady frog") 37 | // 0.65 38 | 39 | // Completely different strings 40 | stringSimilarity("The quick brown fox jumps over the lazy dog", "Lorem ipsum") 41 | // 0.07 42 | 43 | // Tiny strings are less effective with default settings 44 | stringSimilarity("DMV", "DNV") 45 | // Returns 0, because technically there are no bigrams in common between the two 46 | 47 | // Passing in a substring length of 1 may improve accuracy on tiny strings 48 | stringSimilarity("DMV", "DNV", 1) 49 | // Returns 0.67, the percentage of letters in common between the two 50 | ``` 51 | 52 | ## License 53 | 54 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details --------------------------------------------------------------------------------