├── .editorconfig ├── .eslintignore ├── .eslintrc.json ├── .gitignore ├── .mocharc.json ├── .prettierignore ├── .prettierrc ├── .vscode └── settings.json ├── CHANGELOG.md ├── LICENSE ├── README.md ├── package.json ├── src ├── core │ ├── interface │ │ └── Similarity.ts │ └── packages │ │ ├── Cosine.ts │ │ ├── DiceCoefficient.ts │ │ ├── JaccardIndex.ts │ │ ├── JaroWinkler.ts │ │ ├── Levenshtein.ts │ │ ├── LongestCommonSubsequence.ts │ │ └── MetricLCS.ts └── index.ts ├── test ├── Cosine.test.ts ├── DiceCoefficient.test.ts ├── JaccardIndex.test.ts ├── JaroWinkler.test.ts ├── Levenshtein.test.ts ├── LongestCommonSubsequence.test.ts └── MetricLCS.test.ts ├── tsconfig.json └── yaml ├── Cosine.yaml ├── DiceCoefficient.yaml ├── JaccardIndex.yaml ├── LongestCommonSubsequence.yaml └── case.yaml /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | charset = utf-8 7 | trim_trailing_whitespace = false 8 | insert_final_newline = false -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | **/node_modules/** 2 | dist 3 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "extends": ["alloy", "alloy/typescript"], 4 | "plugins": ["@typescript-eslint"], 5 | "env": { 6 | "browser": true, 7 | "node": true, 8 | "commonjs": true, 9 | "es6": true 10 | }, 11 | "parserOptions": { 12 | "sourceType": "module", 13 | "ecmaVersion": 2022 14 | }, 15 | "root": true, 16 | "rules": { 17 | "no-duplicate-imports": 0, 18 | "for-direction": "error", 19 | "getter-return": [ 20 | "error", 21 | { 22 | "allowImplicit": false 23 | } 24 | ], 25 | "no-await-in-loop": "off", 26 | "no-compare-neg-zero": "error", 27 | "no-cond-assign": ["error", "except-parens"], 28 | "no-console": "off", 29 | "no-constant-condition": [ 30 | "error", 31 | { 32 | "checkLoops": false 33 | } 34 | ], 35 | "semi": [2, "never"], 36 | "no-control-regex": "error", 37 | "no-debugger": "error", 38 | "no-dupe-args": "error", 39 | "no-dupe-keys": "error", 40 | "max-nested-callbacks": ["error", 5], 41 | "@typescript-eslint/consistent-type-definitions": ["error", "interface"] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | 3 | package-lock.json 4 | yarn.lock 5 | pnpm-lock.yaml 6 | 7 | lib 8 | 9 | dist 10 | *.log 11 | .cache -------------------------------------------------------------------------------- /.mocharc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extension": ["ts"], 3 | "spec": "test/*.test.ts", 4 | "require": "ts-node/register" 5 | } 6 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | dist 2 | .vscode 3 | yaml 4 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "semi": false 4 | } 5 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[typescript]": { 3 | "editor.formatOnSave": true 4 | }, 5 | "typescript.tsdk": "node_modules/typescript/lib", 6 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Release Notes 2 | 3 | ### 1.3.0 4 | * Feature: Support JaroWinkler 5 | 6 | ### 1.2.0 7 | * Feature: longestCommonSubsequence = lcs 8 | * Feature: metricLcs = mlcs 9 | 10 | 11 | ### 1.1.0 12 | * Feature: Support import&require 13 | * Optimize: JS to Ts 14 | * Optimize: babel to tsup 15 | 16 | 17 | ### 1.0.9 18 | * Feature: Basic building 19 | * Feature: Cosine 20 | * Feature: DiceCoefficient 21 | * Feature: JaccardIndex 22 | * Feature: Levenshtein 23 | * Feature: LongestCommonSubsequence 24 | * Feature: MetricLCS 25 | * Feature: Add function sortMatch() 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019-present Rabbitzzc 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # string-comparison 3 | ![npm bundle size](https://img.shields.io/bundlephobia/minzip/string-comparison) 4 | ![npm](https://img.shields.io/npm/dm/string-comparison) 5 | [![GitHub stars](https://img.shields.io/github/stars/Rabbitzzc/js-string-comparison)](https://github.com/Rabbitzzc/js-string-comparison/stargazers) 6 | [![GitHub license](https://img.shields.io/github/license/Rabbitzzc/js-string-comparison)](https://github.com/Rabbitzzc/js-string-comparison/blob/master/LICENCE) 7 | 8 | **JavaScript implementation of [tdebatty/java-string-similarity](https://github.com/tdebatty/java-string-similarity)** 9 | 10 | A library implementing different string similarity, distance and sortMatch measures. A dozen of algorithms (including Levenshtein edit distance and sibblings, Longest Common Subsequence, cosine similarity etc.) are currently implemented. Check the summary table below for the complete list... 11 | 12 | - [string-comparison](#string-comparison) 13 | - [Download \& Usage](#download--usage) 14 | - [OverView](#overview) 15 | - [Normalized, metric, similarity and distance](#normalized-metric-similarity-and-distance) 16 | - [(Normalized) similarity and distance](#normalized-similarity-and-distance) 17 | - [Levenshtein](#levenshtein) 18 | - [Longest Common Subsequence](#longest-common-subsequence) 19 | - [Metric Longest Common Subsequence](#metric-longest-common-subsequence) 20 | - [Cosine similarity](#cosine-similarity) 21 | - [Sorensen-Dice coefficient](#sorensen-dice-coefficient) 22 | - [Jaro-Winkler similarity](#jaro-winkler-similarity) 23 | - [API](#api) 24 | - [Methods](#methods) 25 | - [similarity](#similarity) 26 | - [params](#params) 27 | - [return](#return) 28 | - [distance](#distance) 29 | - [params](#params-1) 30 | - [return](#return-1) 31 | - [sortMatch](#sortmatch) 32 | - [params](#params-2) 33 | - [return](#return-2) 34 | - [CHANGELOG](#changelog) 35 | - [MIT](#mit) 36 | 37 | 38 | ## Download & Usage 39 | 40 | download 41 | 42 | ```shell 43 | npm install string-comparison --save 44 | yarn add string-comparison 45 | pnpm add string-comparison 46 | ``` 47 | usage 48 | 49 | ```js 50 | let stringComparison = require('string-comparison') 51 | // or import stringComparison from 'string-comparison' 52 | 53 | const Thanos = 'healed' 54 | const Rival = 'sealed' 55 | const Avengers = ['edward', 'sealed', 'theatre'] 56 | 57 | // use by cosine 58 | let cos = stringComparison.cosine 59 | 60 | console.log(cos.similarity(Thanos, Rival)) 61 | console.log(cos.distance(Thanos, Rival)) 62 | console.log(cos.sortMatch(Thanos, Avengers)) 63 | 64 | ``` 65 | 66 | ## OverView 67 | 68 | The main characteristics of each implemented algorithm are presented below. The "cost" column gives an estimation of the computational cost to compute the similarity between two strings of length m and n respectively. 69 | 70 | | | Measure(s) | Normalized? | Metric? | Type | Cost | Typical usage | 71 | | ------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------- | ----------- | ------- | ------- | ------ | --------------- | 72 | | [Jaccard index](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#jaccard-index) | similarity
distance
sortMatch | Yes | Yes | Set | O(m+n) | | 73 | | [Cosine similarity](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#cosine-similarity) | similarity
distance
sortMatch | Yes | No | Profile | O(m+n) | | 74 | | [Sorensen-Dice coefficient](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#sorensen-dice-coefficient) | similarity
distance
sortMatch | Yes | No | Set | O(m+n) | | 75 | | [Levenshtein](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#levenshtein) | similarity
distance
sortMatch | No | Yes | | O(m*n) | | 76 | | [Jaro-Winkler](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#jaro-winkler) | similarity distance
sortMatch | Yes | No | | O(m*n) | typo correction | 77 | 78 | ## Normalized, metric, similarity and distance 79 | 80 | Although the topic might seem simple, a lot of different algorithms exist to measure text similarity or distance. Therefore the library defines some interfaces to categorize them. 81 | 82 | ### (Normalized) similarity and distance 83 | 84 | - StringSimilarity : Implementing algorithms define a similarity between strings (0 means strings are completely different). 85 | - NormalizedStringSimilarity : Implementing algorithms define a similarity between 0.0 and 1.0, like Jaro-Winkler for example. 86 | - StringDistance : Implementing algorithms define a distance between strings (0 means strings are identical), like Levenshtein for example. The maximum distance value depends on the algorithm. 87 | - NormalizedStringDistance : This interface extends StringDistance. For implementing classes, the computed distance value is between 0.0 and 1.0. NormalizedLevenshtein is an example of NormalizedStringDistance. 88 | 89 | ## Levenshtein 90 | 91 | The Levenshtein distance between two words is the minimum number of single-character edits (insertions, deletions or substitutions) required to change one word into the other. 92 | 93 | It is a metric string distance. This implementation uses dynamic programming (Wagner–Fischer algorithm), with only 2 rows of data. The space requirement is thus O(m) and the algorithm runs in O(m.n). 94 | 95 | ```js 96 | import { levenshtein } from "string-comparison" 97 | import type {SortMatchResultType} from "string-comparison" 98 | 99 | const Thanos = 'healed' 100 | const Rival = 'sealed' 101 | const Avengers = ['edward', 'sealed', 'theatre'] 102 | 103 | console.log(levenshtein.similarity(Thanos, Rival)) 104 | console.log(levenshtein.distance(Thanos, Rival)) 105 | console.log(levenshtein.sortMatch(Thanos, Avengers) as SortMatchResultType) 106 | 107 | // output 108 | 0.8333333333333334 109 | 1 110 | [ 111 | { member: 'edward', index: 0, rating: 0.16666666666666663 }, 112 | { member: 'theatre', index: 2, rating: 0.4285714285714286 }, 113 | { member: 'sealed', index: 1, rating: 0.8333333333333334 } 114 | ] 115 | ``` 116 | 117 | 118 | ## Longest Common Subsequence 119 | 120 | The longest common subsequence (LCS) problem consists in finding the longest subsequence common to two (or more) sequences. It differs from problems of finding common substrings: unlike substrings, subsequences are not required to occupy consecutive positions within the original sequences. 121 | 122 | It is used by the diff utility, by Git for reconciling multiple changes, etc. 123 | 124 | The LCS distance between strings X (of length n) and Y (of length m) is n + m - 2 |LCS(X, Y)| 125 | min = 0 126 | max = n + m 127 | 128 | LCS distance is equivalent to Levenshtein distance when only insertion and deletion is allowed (no substitution), or when the cost of the substitution is the double of the cost of an insertion or deletion. 129 | 130 | This class implements the dynamic programming approach, which has a space requirement O(m.n), and computation cost O(m.n). 131 | 132 | In "Length of Maximal Common Subsequences", K.S. Larsen proposed an algorithm that computes the length of LCS in time O(log(m).log(n)). But the algorithm has a memory requirement O(m.n²) and was thus not implemented here. 133 | 134 | ```js 135 | import { longestCommonSubsequence } from "string-comparison" 136 | or 137 | import { lcs } from "string-comparison" 138 | 139 | 140 | const Thanos = 'healed' 141 | const Rival = 'sealed' 142 | const Avengers = ['edward', 'sealed', 'theatre'] 143 | 144 | console.log(lcs.similarity(Thanos, Rival)) 145 | console.log(lcs.distance(Thanos, Rival)) 146 | console.log(lcs.sortMatch(Thanos, Avengers)) 147 | 148 | // output 149 | 0.8333333333333334 150 | 2 151 | [ 152 | { member: 'edward', index: 0, rating: 0.5 }, 153 | { member: 'theatre', index: 2, rating: 0.6153846153846154 }, 154 | { member: 'sealed', index: 1, rating: 0.8333333333333334 } 155 | ] 156 | ``` 157 | 158 | ## Metric Longest Common Subsequence 159 | 160 | Distance metric based on Longest Common Subsequence, from the notes "An LCS-based string metric" by Daniel Bakkelund. 161 | http://heim.ifi.uio.no/~danielry/StringMetric.pdf 162 | 163 | The distance is computed as 1 - |LCS(s1, s2)| / max(|s1|, |s2|) 164 | 165 | ```js 166 | import { metricLcs } from "string-comparison" 167 | or 168 | import { mlcs } from "string-comparison" 169 | 170 | const Thanos = 'healed' 171 | const Rival = 'sealed' 172 | const Avengers = ['edward', 'sealed', 'theatre'] 173 | 174 | console.log(metricLcs.similarity(Thanos, Rival)) 175 | console.log(metricLcs.distance(Thanos, Rival)) 176 | console.log(metricLcs.sortMatch(Thanos, Avengers)) 177 | 178 | // output 179 | 0.8333333333333334 180 | 0.16666666666666663 181 | [ 182 | { member: 'edward', index: 0, rating: 0.5 }, 183 | { member: 'theatre', index: 2, rating: 0.5714285714285714 }, 184 | { member: 'sealed', index: 1, rating: 0.8333333333333334 } 185 | ] 186 | ``` 187 | 188 | ## Cosine similarity 189 | 190 | Like Q-Gram distance, the input strings are first converted into sets of n-grams (sequences of n characters, also called k-shingles), but this time the cardinality of each n-gram is not taken into account. Each input string is simply a set of n-grams. The Jaccard index is then computed as |V1 inter V2| / |V1 union V2|. 191 | 192 | Distance is computed as 1 - similarity. 193 | Jaccard index is a metric distance. 194 | 195 | ```js 196 | import { cosine } from "string-comparison" 197 | ``` 198 | 199 | ## Sorensen-Dice coefficient 200 | 201 | Similar to Jaccard index, but this time the similarity is computed as 2 * |V1 inter V2| / (|V1| + |V2|). 202 | 203 | Distance is computed as 1 - similarity. 204 | 205 | ```js 206 | import { diceCoefficient } from "string-comparison" 207 | ``` 208 | 209 | ## Jaro-Winkler similarity 210 | 211 | The Jaro-Winkler similarity is a string metric measuring edit distance between two strings. Jaro – Winkler Similarity is much similar to Jaro Similarity. They both differ when the prefix of two string match. Jaro – Winkler Similarity uses a prefix scale ‘p’ which gives a more accurate answer when the strings have a common prefix up to a defined maximum length l. 212 | 213 | ```js 214 | import { jaroWinkler } from "string-comparison" 215 | ``` 216 | 217 | ## API 218 | 219 | * `cosine` 220 | * `diceCoefficient` 221 | * `jaccardIndex` 222 | * `levenshtein` 223 | * `lcs` = `longestCommonSubsequence` 224 | * `mlcs` = `metricLcs` 225 | * `jaroWinkler` 226 | 227 | 228 | ## Methods 229 | * `similarity`. 230 | * `distance`. 231 | * `sortMatch` 232 | 233 | ### similarity 234 | 235 | Implementing algorithms define a similarity between strings 236 | 237 | #### params 238 | 239 | 1. thanos [String] 240 | 2. rival [String] 241 | 242 | #### return 243 | 244 | Return a similarity between 0.0 and 1.0 245 | 246 | ### distance 247 | 248 | Implementing algorithms define a distance between strings (0 means strings are identical) 249 | 250 | #### params 251 | 252 | 1. `thanos` [String] 253 | 2. `rival` [String] 254 | 255 | #### return 256 | 257 | Return a number 258 | 259 | ### sortMatch 260 | 261 | #### params 262 | 263 | 1. thanos [String] 264 | 2. avengers [...String] 265 | 266 | #### return 267 | 268 | Return an array of objects - `SortMatchResultType` ex: 269 | ```js 270 | [ 271 | { member: 'edward', rating: 0.16666666666666663 }, 272 | { member: 'theatre', rating: 0.4285714285714286 }, 273 | { member: 'mailed', rating: 0.5 }, 274 | { member: 'sealed', rating: 0.8333333333333334 } 275 | ] 276 | ``` 277 | 278 | ## CHANGELOG 279 | [CHANGELOG](./CHANGELOG.md) 280 | 281 | 282 | ## MIT 283 | [MIT](./LICENSE) 284 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "string-comparison", 3 | "version": "1.3.0", 4 | "description": "A library implementing different string similarity", 5 | "main": "dist/index.js", 6 | "module": "dist/index.mjs", 7 | "types": "dist/index.d.ts", 8 | "engines": { 9 | "node": "^16.0.0 || >=18.0.0" 10 | }, 11 | "exports": { 12 | ".": { 13 | "import": "./dist/index.mjs", 14 | "require": "./dist/index.js", 15 | "types": "./dist/index.d.ts" 16 | } 17 | }, 18 | "scripts": { 19 | "test": "mocha", 20 | "lint": "eslint -c .eslintrc.json \"src/**/*.ts\"", 21 | "lint:fix": "eslint -c .eslintrc.json \"src/**/*.ts\" --fix", 22 | "build": "tsup src/index.ts --dts --format cjs,esm --minify", 23 | "watch": "tsup src/index.ts --watch" 24 | }, 25 | "repository": { 26 | "type": "git", 27 | "url": "https://github.com/Rabbitzzc/js-string-comparision" 28 | }, 29 | "files": [ 30 | "dist" 31 | ], 32 | "keywords": [ 33 | "strings", 34 | "compare similarity", 35 | "similarity", 36 | "Dice's Coefficient", 37 | "Cosine", 38 | "Jaccard Index", 39 | "Levenshtein", 40 | "Longest Common Subsequence", 41 | "Metric Longest Common Subsequence", 42 | "difference", 43 | "compare", 44 | "comparision", 45 | "similar", 46 | "distance", 47 | "match", 48 | "sort match" 49 | ], 50 | "author": { 51 | "name": "Rabbitzzc", 52 | "email": "zzclovelcs@gmail.com" 53 | }, 54 | "license": "MIT", 55 | "devDependencies": { 56 | "@swc/core": "^1.3.76", 57 | "@types/mocha": "^10.0.1", 58 | "@types/node": "^20.5.0", 59 | "@typescript-eslint/eslint-plugin": "^6.3.0", 60 | "@typescript-eslint/parser": "^6.3.0", 61 | "async": "^3.2.4", 62 | "eslint": "^8.47.0", 63 | "eslint-config-alloy": "^5.1.1", 64 | "eslint-config-prettier": "^9.0.0", 65 | "eslint-plugin-prettier": "^5.0.0", 66 | "mocha": "^10.2.0", 67 | "npm-run-all": "^4.1.5", 68 | "prettier": "^3.0.1", 69 | "ts-node": "^10.9.1", 70 | "tsup": "^7.2.0", 71 | "typescript": "^5.1.6" 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/core/interface/Similarity.ts: -------------------------------------------------------------------------------- 1 | export interface SortMatchResultType { 2 | member: string 3 | index: number 4 | rating: number 5 | } 6 | 7 | export default abstract class Similarity { 8 | public static initParams(thanos: string, rival: string) { 9 | return [ 10 | thanos.replace(/\s+/g, '').toLowerCase(), 11 | rival.replace(/\s+/g, '').toLowerCase(), 12 | ] 13 | } 14 | 15 | protected static checkThanosType(thanos: string) { 16 | if (typeof thanos !== 'string') 17 | throw new Error('first argument should be a string') 18 | } 19 | 20 | protected static checkRivalType(rival: string) { 21 | if (typeof rival !== 'string') 22 | throw new Error('second argument should be a string') 23 | } 24 | 25 | protected static checkAvengersType(avengers: string[]) { 26 | if (!Array.isArray(avengers)) 27 | throw new Error('second argument should be an array of strings') 28 | if (avengers.find((s) => typeof s !== 'string')) 29 | throw new Error('second argument should be an array of strings') 30 | } 31 | 32 | /** 33 | * @description 寻找最佳匹配结果 34 | */ 35 | public sortMatch(thanos: string, avengers: string[]): SortMatchResultType[] { 36 | Similarity.checkThanosType(thanos) 37 | Similarity.checkAvengersType(avengers) 38 | 39 | return avengers 40 | .map((str, index) => { 41 | return { 42 | member: str, 43 | index, 44 | rating: this.similarity(thanos, str), 45 | } 46 | }) 47 | .sort((a, b) => a.rating - b.rating) 48 | } 49 | 50 | /** 51 | * @description 比较两个字符串 52 | */ 53 | public abstract similarity(thanos: string, rival: string): number 54 | 55 | // distance 56 | public abstract distance(thanos: string, rival: string): number 57 | } 58 | -------------------------------------------------------------------------------- /src/core/packages/Cosine.ts: -------------------------------------------------------------------------------- 1 | import Similarity from '../interface/Similarity' 2 | 3 | export default class Cosine extends Similarity { 4 | public similarity(pThanos: string, pRival: string) { 5 | Similarity.checkThanosType(pThanos) 6 | Similarity.checkRivalType(pRival) 7 | 8 | // clear white space characters & to low 9 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 10 | 11 | if (!thanos.length && !rival.length) return 1 12 | if (!thanos.length || !rival.length) return 0 13 | if (thanos === rival) return 1 14 | 15 | // string vectorization 16 | let common = Array.from(new Set(thanos.split('').concat(rival.split('')))) 17 | 18 | let vectorThanos = this.stringVectorization(thanos.split(''), common) 19 | let vectorRival = this.stringVectorization(rival.split(''), common) 20 | let [dotproduct, mThanos, mRival] = [0, 0, 0] 21 | 22 | for (let i = 0; i < vectorThanos.length; ++i) { 23 | dotproduct += vectorThanos[i] * vectorRival[i] 24 | mThanos += vectorThanos[i] * vectorThanos[i] 25 | mRival += vectorRival[i] * vectorRival[i] 26 | } 27 | mThanos = Math.sqrt(mThanos) 28 | mRival = Math.sqrt(mRival) 29 | return Number(dotproduct) / (mThanos * mRival) 30 | } 31 | 32 | public distance(thanos: string, rival: string) { 33 | return 1.0 - this.similarity(thanos, rival) 34 | } 35 | 36 | // string vectorization 37 | private stringVectorization(strArr: string | any[], common: any[]) { 38 | return common.map((v: any) => (strArr.includes(v) ? 1 : 0)) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/core/packages/DiceCoefficient.ts: -------------------------------------------------------------------------------- 1 | import Similarity from '../interface/Similarity' 2 | 3 | export default class DiceCoefficient extends Similarity { 4 | public similarity(pThanos: string, pRival: string): number { 5 | Similarity.checkThanosType(pThanos) 6 | Similarity.checkRivalType(pRival) 7 | 8 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 9 | 10 | let [length1, length2] = [thanos.length, rival.length] 11 | 12 | if ((!thanos.length && !rival.length) || thanos === rival) return 1 13 | 14 | if (length1 < 2 || length2 < 2) return 0 15 | 16 | let thanosBigrams = new Map() 17 | 18 | // get the intersecting character, two strings as a group 19 | for (let i = 0; i < length1 - 1; i++) { 20 | const bigram = thanos.slice(i, i + 2) 21 | const count = thanosBigrams.has(bigram) 22 | ? thanosBigrams.get(bigram) + 1 23 | : 1 24 | 25 | thanosBigrams.set(bigram, count) 26 | } 27 | let intersectionSize = 0 28 | for (let i = 0; i < length2 - 1; i++) { 29 | const bigram = rival.slice(i, i + 2) 30 | const count = thanosBigrams.has(bigram) ? thanosBigrams.get(bigram) : 0 31 | 32 | if (count > 0) { 33 | thanosBigrams.set(bigram, count - 1) 34 | ++intersectionSize 35 | } 36 | } 37 | 38 | return (2.0 * intersectionSize) / (length1 + length2 - 2) 39 | } 40 | 41 | public distance(thanos: string, rival: string): number { 42 | return 1.0 - this.similarity(thanos, rival) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/core/packages/JaccardIndex.ts: -------------------------------------------------------------------------------- 1 | import Similarity from '../interface/Similarity' 2 | 3 | export default class JaccardIndex extends Similarity { 4 | public similarity(pThanos: string, pRival: string): number { 5 | Similarity.checkThanosType(pThanos) 6 | Similarity.checkRivalType(pRival) 7 | 8 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 9 | 10 | if ((!thanos.length && !rival.length) || thanos === rival) return 1 11 | 12 | // split and Set 13 | let union = new Set(thanos.split('').concat(rival.split(''))) 14 | let intersection = new Set( 15 | thanos.split('').filter((v) => new Set(rival).has(v)), 16 | ) 17 | 18 | return Number(intersection.size) / union.size 19 | } 20 | 21 | public distance(thanos: string, rival: string): number { 22 | return 1.0 - this.similarity(thanos, rival) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/core/packages/JaroWinkler.ts: -------------------------------------------------------------------------------- 1 | import Similarity from '../interface/Similarity' 2 | 3 | // https://www.geeksforgeeks.org/jaro-and-jaro-winkler-similarity/ 4 | export default class JaroWinkler extends Similarity { 5 | public distance(pThanos: string, pRival: string): number { 6 | Similarity.checkThanosType(pThanos) 7 | Similarity.checkRivalType(pRival) 8 | 9 | // clear white space characters & to low 10 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 11 | 12 | if (!thanos.length && !rival.length) return 1 13 | if (!thanos.length || !rival.length) return 0 14 | if (thanos === rival) return 1 15 | 16 | if (thanos === rival) { 17 | return 1.0 18 | } 19 | let len1 = thanos.length 20 | let len2 = rival.length 21 | 22 | if (len1 === 0 || len2 === 0) { 23 | return 0.0 24 | } 25 | 26 | let max_dist = Math.floor(Math.max(len1, len2) / 2) - 1 27 | let match = 0 28 | let hash_s1 = new Array(thanos.length).fill(0) 29 | let hash_s2 = new Array(rival.length).fill(0) 30 | 31 | for (let i = 0; i < len1; i++) { 32 | for ( 33 | let j = Math.max(0, i - max_dist); 34 | j < Math.min(len2, i + max_dist + 1); 35 | j++ 36 | ) 37 | if (thanos[i] === rival[j] && hash_s2[j] === 0) { 38 | hash_s1[i] = 1 39 | hash_s2[j] = 1 40 | match++ 41 | break 42 | } 43 | } 44 | 45 | if (match === 0) { 46 | return 0 47 | } 48 | 49 | let t = 0 50 | let point = 0 51 | 52 | for (let i = 0; i < len1; i++) { 53 | if (hash_s1[i] === 1) { 54 | while (hash_s2[point] === 0) { 55 | point++ 56 | } 57 | if (thanos[i] !== rival[point++]) { 58 | t++ 59 | } 60 | } 61 | } 62 | t /= 2 63 | 64 | return (match / len1 + match / len2 + (match - t) / match) / 3.0 65 | } 66 | 67 | public similarity(thanos: string, rival: string): number { 68 | let jaroDist = this.distance(thanos, rival) 69 | 70 | if (jaroDist > 0.7) { 71 | let prefix = 0 72 | 73 | for (let i = 0; i < Math.min(thanos.length, rival.length); i++) { 74 | if (thanos[i] === rival[i]) { 75 | prefix++ 76 | } else { 77 | break 78 | } 79 | } 80 | 81 | prefix = Math.min(4, prefix) 82 | jaroDist += 0.1 * prefix * (1 - jaroDist) 83 | } 84 | 85 | return jaroDist 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/core/packages/Levenshtein.ts: -------------------------------------------------------------------------------- 1 | import Similarity from '../interface/Similarity' 2 | 3 | export default class Levenshtein extends Similarity { 4 | public similarity(pThanos: string, pRival: string): number { 5 | Similarity.checkThanosType(pThanos) 6 | Similarity.checkRivalType(pRival) 7 | 8 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 9 | return !thanos.length && !rival.length 10 | ? 1 11 | : 1 - 12 | Number(this.distance(thanos, rival)) / 13 | Math.max(thanos.length, rival.length) 14 | } 15 | 16 | public distance(pThanos: string, pRival: string) { 17 | Similarity.checkThanosType(pThanos) 18 | Similarity.checkRivalType(pRival) 19 | 20 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 21 | 22 | if (thanos === rival) return 0 23 | 24 | let [len1, len2] = [thanos.length, rival.length] 25 | if (!len1) return len2 26 | if (!len2) return len1 27 | 28 | // init array 29 | let dynamicArray = [...Array(len1 + 1)].map(() => Array(len2 + 1).fill(0)) 30 | 31 | for (let i = 0; i <= len1; ++i) { 32 | dynamicArray[i][0] = i 33 | } 34 | for (let j = 0; j <= len2; ++j) { 35 | dynamicArray[0][j] = j 36 | } 37 | let temp 38 | for (let i = 1; i <= len1; ++i) { 39 | for (let j = 1; j <= len2; ++j) { 40 | temp = thanos[i - 1] === rival[j - 1] ? 0 : 1 41 | 42 | // delete insert replace 43 | dynamicArray[i][j] = Math.min( 44 | dynamicArray[i - 1][j] + 1, 45 | dynamicArray[i][j - 1] + 1, 46 | dynamicArray[i - 1][j - 1] + temp, 47 | ) 48 | } 49 | } 50 | 51 | return dynamicArray[len1][len2] 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/core/packages/LongestCommonSubsequence.ts: -------------------------------------------------------------------------------- 1 | import Similarity from '../interface/Similarity' 2 | 3 | export default class LongestCommonSubsequence extends Similarity { 4 | // Return the length of Longest Common Subsequence (LCS) between strings thanos and rival 5 | public static lcsLength(pThanos: string, pRival: string): number { 6 | Similarity.checkThanosType(pThanos) 7 | Similarity.checkRivalType(pRival) 8 | 9 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 10 | // init array elements=0 11 | let [len1, len2] = [thanos.length, rival.length] 12 | let dynamicArray = [...Array(len1 + 1)].map(() => Array(len2 + 1).fill(0)) 13 | 14 | for (let i = 1; i <= len1; ++i) { 15 | for (let j = 1; j <= len2; ++j) { 16 | dynamicArray[i][j] = 17 | thanos[i - 1] === rival[j - 1] 18 | ? dynamicArray[i - 1][j - 1] + 1 19 | : Math.max(dynamicArray[i][j - 1], dynamicArray[i - 1][j]) 20 | } 21 | } 22 | return dynamicArray[len1][len2] 23 | } 24 | 25 | public similarity(pThanos: string, pRival: string): number { 26 | Similarity.checkThanosType(pThanos) 27 | Similarity.checkRivalType(pRival) 28 | 29 | // clear white space characters & to low 30 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 31 | 32 | if ((!thanos.length && !rival.length) || thanos === rival) return 1 33 | 34 | return ( 35 | (2.0 * LongestCommonSubsequence.lcsLength(thanos, rival)) / 36 | (thanos.length + rival.length) 37 | ) 38 | } 39 | 40 | public distance(pThanos: string, pRival: string) { 41 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 42 | return ( 43 | thanos.length + 44 | rival.length - 45 | 2 * LongestCommonSubsequence.lcsLength(thanos, rival) 46 | ) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/core/packages/MetricLCS.ts: -------------------------------------------------------------------------------- 1 | import Similarity from '../interface/Similarity' 2 | 3 | export default class MetricLCS extends Similarity { 4 | public static lcsLength(thanos: string, rival: string) { 5 | Similarity.checkThanosType(thanos) 6 | Similarity.checkRivalType(rival) 7 | 8 | // init array elements=0 9 | let [len1, len2] = [thanos.length, rival.length] 10 | let dynamicArray = [...Array(len1 + 1)].map(() => Array(len2 + 1).fill(0)) 11 | 12 | for (let i = 1; i <= len1; ++i) { 13 | for (let j = 1; j <= len2; ++j) { 14 | dynamicArray[i][j] = 15 | thanos[i - 1] === rival[j - 1] 16 | ? dynamicArray[i - 1][j - 1] + 1 17 | : Math.max(dynamicArray[i][j - 1], dynamicArray[i - 1][j]) 18 | } 19 | } 20 | return dynamicArray[len1][len2] 21 | } 22 | 23 | public similarity(pThanos: string, pRival: string): number { 24 | Similarity.checkThanosType(pThanos) 25 | Similarity.checkRivalType(pRival) 26 | 27 | // clear white space characters & to low 28 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 29 | 30 | if (!thanos.length && !rival.length) return 1 31 | 32 | return thanos === rival 33 | ? 1 34 | : Number(MetricLCS.lcsLength(thanos, rival)) / 35 | Math.max(thanos.length, rival.length) 36 | } 37 | 38 | public distance(pThanos: string, pRival: string) { 39 | Similarity.checkThanosType(pThanos) 40 | Similarity.checkRivalType(pRival) 41 | 42 | const [thanos, rival] = Similarity.initParams(pThanos, pRival) 43 | return thanos === rival ? 0 : 1.0 - this.similarity(thanos, rival) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import type { SortMatchResultType } from './core/interface/Similarity' 2 | import Cosine from './core/packages/Cosine' 3 | import DiceCoefficient from './core/packages/DiceCoefficient' 4 | import JaccardIndex from './core/packages/JaccardIndex' 5 | import Levenshtein from './core/packages/Levenshtein' 6 | import LongestCommonSubsequence from './core/packages/LongestCommonSubsequence' 7 | import MetricLCS from './core/packages/MetricLCS' 8 | import JaroWinkler from './core/packages/JaroWinkler' 9 | 10 | export type { SortMatchResultType } 11 | 12 | const cosine = new Cosine() 13 | const diceCoefficient = new DiceCoefficient() 14 | const jaccardIndex = new JaccardIndex() 15 | const levenshtein = new Levenshtein() 16 | const lcs = new LongestCommonSubsequence() 17 | const longestCommonSubsequence = lcs 18 | const mlcs = new MetricLCS() 19 | const metricLcs = mlcs 20 | const jaroWinkler = new JaroWinkler() 21 | 22 | const output = { 23 | cosine, 24 | diceCoefficient, 25 | jaccardIndex, 26 | levenshtein, 27 | lcs, 28 | longestCommonSubsequence, 29 | mlcs, 30 | metricLcs, 31 | jaroWinkler, 32 | } 33 | 34 | export default output 35 | -------------------------------------------------------------------------------- /test/Cosine.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert' 2 | import Cosine from '../src/core/packages/Cosine' 3 | const cosine = new Cosine() 4 | 5 | describe('test Cosine Similarity', () => { 6 | describe('similarity()', () => { 7 | const testData = [ 8 | { 9 | first: 'french', 10 | second: 'quebec', 11 | expected: 0.3651483716701107, 12 | }, 13 | { 14 | first: 'france', 15 | second: 'france', 16 | expected: 1, 17 | }, 18 | { 19 | first: 'healed', 20 | second: 'sealed', 21 | expected: 0.7999999999999998, 22 | }, 23 | { 24 | first: 'web applications', 25 | second: 'applications of the web', 26 | expected: 0.9258200997725515, 27 | }, 28 | { 29 | first: 'a', 30 | second: 'a', 31 | expected: 1, 32 | }, 33 | { 34 | first: 'a', 35 | second: 'b', 36 | expected: 0, 37 | }, 38 | { 39 | first: '', 40 | second: '', 41 | expected: 1, 42 | }, 43 | { 44 | first: 'a', 45 | second: '', 46 | expected: 0, 47 | }, 48 | { 49 | first: '', 50 | second: 'a', 51 | expected: 0, 52 | }, 53 | { 54 | first: 'apple event', 55 | second: 'apple event', 56 | expected: 1, 57 | }, 58 | { 59 | first: 'ab', 60 | second: 'ba', 61 | expected: 0.9999999999999998, 62 | }, 63 | ] 64 | testData.forEach((td) => { 65 | it(`should be ${td.expected}`, () => { 66 | assert.equal(cosine.similarity(td.first, td.second), td.expected) 67 | }) 68 | }) 69 | }) 70 | describe('sortMatch()', () => {}) 71 | }) 72 | -------------------------------------------------------------------------------- /test/DiceCoefficient.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert' 2 | import Cosine from '../src/core/packages/Cosine' 3 | const cosine = new Cosine() 4 | 5 | describe('test Cosine Similarity', () => { 6 | describe('similarity()', () => { 7 | const testData = [ 8 | { 9 | first: 'french', 10 | second: 'quebec', 11 | expected: 0.3651483716701107, 12 | }, 13 | { 14 | first: 'france', 15 | second: 'france', 16 | expected: 1, 17 | }, 18 | { 19 | first: 'healed', 20 | second: 'sealed', 21 | expected: 0.7999999999999998, 22 | }, 23 | { 24 | first: 'web applications', 25 | second: 'applications of the web', 26 | expected: 0.9258200997725515, 27 | }, 28 | { 29 | first: 'a', 30 | second: 'a', 31 | expected: 1, 32 | }, 33 | { 34 | first: 'a', 35 | second: 'b', 36 | expected: 0, 37 | }, 38 | { 39 | first: '', 40 | second: '', 41 | expected: 1, 42 | }, 43 | { 44 | first: 'a', 45 | second: '', 46 | expected: 0, 47 | }, 48 | { 49 | first: '', 50 | second: 'a', 51 | expected: 0, 52 | }, 53 | { 54 | first: 'apple event', 55 | second: 'apple event', 56 | expected: 1, 57 | }, 58 | { 59 | first: 'ab', 60 | second: 'ba', 61 | expected: 0.9999999999999998, 62 | }, 63 | ] 64 | testData.forEach((td) => { 65 | it(`should be ${td.expected}`, () => { 66 | assert.equal(cosine.similarity(td.first, td.second), td.expected) 67 | }) 68 | }) 69 | }) 70 | describe('sortMatch()', () => {}) 71 | }) 72 | -------------------------------------------------------------------------------- /test/JaccardIndex.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert' 2 | import Cosine from '../src/core/packages/Cosine' 3 | const cosine = new Cosine() 4 | 5 | describe('test Cosine Similarity', () => { 6 | describe('similarity()', () => { 7 | const testData = [ 8 | { 9 | first: 'french', 10 | second: 'quebec', 11 | expected: 0.3651483716701107, 12 | }, 13 | { 14 | first: 'france', 15 | second: 'france', 16 | expected: 1, 17 | }, 18 | { 19 | first: 'healed', 20 | second: 'sealed', 21 | expected: 0.7999999999999998, 22 | }, 23 | { 24 | first: 'web applications', 25 | second: 'applications of the web', 26 | expected: 0.9258200997725515, 27 | }, 28 | { 29 | first: 'a', 30 | second: 'a', 31 | expected: 1, 32 | }, 33 | { 34 | first: 'a', 35 | second: 'b', 36 | expected: 0, 37 | }, 38 | { 39 | first: '', 40 | second: '', 41 | expected: 1, 42 | }, 43 | { 44 | first: 'a', 45 | second: '', 46 | expected: 0, 47 | }, 48 | { 49 | first: '', 50 | second: 'a', 51 | expected: 0, 52 | }, 53 | { 54 | first: 'apple event', 55 | second: 'apple event', 56 | expected: 1, 57 | }, 58 | { 59 | first: 'ab', 60 | second: 'ba', 61 | expected: 0.9999999999999998, 62 | }, 63 | ] 64 | testData.forEach((td) => { 65 | it(`should be ${td.expected}`, () => { 66 | assert.equal(cosine.similarity(td.first, td.second), td.expected) 67 | }) 68 | }) 69 | }) 70 | describe('sortMatch()', () => {}) 71 | }) 72 | -------------------------------------------------------------------------------- /test/JaroWinkler.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert' 2 | import JaroWinkler from '../src/core/packages/JaroWinkler' 3 | const jaroWinkler = new JaroWinkler() 4 | 5 | describe('test JaroWinkler Similarity', () => { 6 | describe('similarity()', () => { 7 | const testData = [ 8 | { 9 | first: 'TRATE', 10 | second: 'TRACE', 11 | expected: 0.9066666666666667, 12 | }, 13 | { 14 | first: 'DwAyNE', 15 | second: 'DuANE', 16 | expected: 0.8400000000000001, 17 | }, 18 | { 19 | first: 'a', 20 | second: 'a', 21 | expected: 1, 22 | }, 23 | { 24 | first: 'a', 25 | second: 'b', 26 | expected: 0, 27 | }, 28 | { 29 | first: '', 30 | second: '', 31 | expected: 1, 32 | }, 33 | { 34 | first: 'a', 35 | second: '', 36 | expected: 0, 37 | }, 38 | { 39 | first: '', 40 | second: 'a', 41 | expected: 0, 42 | }, 43 | { 44 | first: 'apple event', 45 | second: 'apple event', 46 | expected: 1, 47 | }, 48 | { 49 | first: 'ab', 50 | second: 'ba', 51 | expected: 0, 52 | }, 53 | ] 54 | testData.forEach((td) => { 55 | it(`should be ${td.expected}`, () => { 56 | assert.equal(jaroWinkler.similarity(td.first, td.second), td.expected) 57 | }) 58 | }) 59 | }) 60 | describe('sortMatch()', () => {}) 61 | }) 62 | -------------------------------------------------------------------------------- /test/Levenshtein.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert' 2 | import Levenshtein from '../src/core/packages/Levenshtein' 3 | const levenshtein = new Levenshtein() 4 | 5 | describe('test Levenshtein', () => { 6 | describe('similarity()', () => { 7 | const testData = [ 8 | { 9 | first: 'french', 10 | second: 'quebec', 11 | expected: 0.16666666666666663, 12 | }, 13 | { 14 | first: 'france', 15 | second: 'france', 16 | expected: 1, 17 | }, 18 | { 19 | first: 'healed', 20 | second: 'sealed', 21 | expected: 0.8333333333333334, 22 | }, 23 | { 24 | first: 'web applications', 25 | second: 'applications of the web', 26 | expected: 0.44999999999999996, 27 | }, 28 | { 29 | first: 'a', 30 | second: 'a', 31 | expected: 1, 32 | }, 33 | { 34 | first: 'a', 35 | second: 'b', 36 | expected: 0, 37 | }, 38 | { 39 | first: '', 40 | second: '', 41 | expected: 1, 42 | }, 43 | { 44 | first: 'a', 45 | second: '', 46 | expected: 0, 47 | }, 48 | { 49 | first: '', 50 | second: 'a', 51 | expected: 0, 52 | }, 53 | { 54 | first: 'apple event', 55 | second: 'apple event', 56 | expected: 1, 57 | }, 58 | { 59 | first: 'ab', 60 | second: 'ba', 61 | expected: 0, 62 | }, 63 | ] 64 | testData.forEach((td) => { 65 | it(`should be ${td.expected}`, () => { 66 | assert.equal(levenshtein.similarity(td.first, td.second), td.expected) 67 | }) 68 | }) 69 | }) 70 | describe('sortMatch()', () => {}) 71 | }) 72 | -------------------------------------------------------------------------------- /test/LongestCommonSubsequence.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert' 2 | import LongestCommonSubsequence from '../src/core/packages/LongestCommonSubsequence' 3 | const lcs = new LongestCommonSubsequence() 4 | 5 | describe('test LongestCommonSubsequence', () => { 6 | describe('similarity()', () => { 7 | const testData = [ 8 | { 9 | first: 'french', 10 | second: 'quebec', 11 | expected: 0.3333333333333333, 12 | }, 13 | { 14 | first: 'france', 15 | second: 'france', 16 | expected: 1, 17 | }, 18 | { 19 | first: 'healed', 20 | second: 'sealed', 21 | expected: 0.8333333333333334, 22 | }, 23 | { 24 | first: 'web applications', 25 | second: 'applications of the web', 26 | expected: 0.6857142857142857, 27 | }, 28 | 29 | { 30 | first: 'a', 31 | second: 'a', 32 | expected: 1, 33 | }, 34 | { 35 | first: 'a', 36 | second: 'b', 37 | expected: 0, 38 | }, 39 | { 40 | first: '', 41 | second: '', 42 | expected: 1, 43 | }, 44 | { 45 | first: 'a', 46 | second: '', 47 | expected: 0, 48 | }, 49 | { 50 | first: '', 51 | second: 'a', 52 | expected: 0, 53 | }, 54 | { 55 | first: 'apple event', 56 | second: 'apple event', 57 | expected: 1, 58 | }, 59 | { 60 | first: 'ab', 61 | second: 'ba', 62 | expected: 0.5, 63 | }, 64 | ] 65 | testData.forEach((td) => { 66 | it(`should be ${td.expected}`, () => { 67 | assert.equal(lcs.similarity(td.first, td.second), td.expected) 68 | }) 69 | }) 70 | }) 71 | describe('sortMatch()', () => {}) 72 | describe('lcsLength()', () => { 73 | const testData = [ 74 | { 75 | first: 'french', 76 | second: 'quebec', 77 | expected: 2, 78 | }, 79 | { 80 | first: 'france', 81 | second: 'france', 82 | expected: 6, 83 | }, 84 | { 85 | first: 'healed', 86 | second: 'sealed', 87 | expected: 5, 88 | }, 89 | { 90 | first: 'web applications', 91 | second: 'applications of the web', 92 | expected: 12, 93 | }, 94 | { 95 | first: 'a', 96 | second: 'a', 97 | expected: 1, 98 | }, 99 | { 100 | first: 'a', 101 | second: 'b', 102 | expected: 0, 103 | }, 104 | { 105 | first: '', 106 | second: '', 107 | expected: 0, 108 | }, 109 | { 110 | first: 'a', 111 | second: '', 112 | expected: 0, 113 | }, 114 | { 115 | first: '', 116 | second: 'a', 117 | expected: 0, 118 | }, 119 | { 120 | first: 'apple event', 121 | second: 'apple event', 122 | expected: 10, 123 | }, 124 | { 125 | first: 'ab', 126 | second: 'ba', 127 | expected: 1, 128 | }, 129 | ] 130 | testData.forEach((td) => { 131 | it(`should be ${td.expected}`, () => { 132 | assert.equal( 133 | LongestCommonSubsequence.lcsLength(td.first, td.second), 134 | td.expected, 135 | ) 136 | }) 137 | }) 138 | }) 139 | }) 140 | -------------------------------------------------------------------------------- /test/MetricLCS.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert' 2 | import MetricLCS from '../src/core/packages/MetricLCS' 3 | const metricLCS = new MetricLCS() 4 | 5 | describe('test MetricLCS', () => { 6 | describe('similarity()', () => { 7 | const testData = [ 8 | { 9 | first: 'french', 10 | second: 'quebec', 11 | expected: 0.3333333333333333, 12 | }, 13 | { 14 | first: 'france', 15 | second: 'france', 16 | expected: 1, 17 | }, 18 | { 19 | first: 'healed', 20 | second: 'sealed', 21 | expected: 0.8333333333333334, 22 | }, 23 | { 24 | first: 'web applications', 25 | second: 'applications of the web', 26 | expected: 0.6, 27 | }, 28 | { 29 | first: 'a', 30 | second: 'a', 31 | expected: 1, 32 | }, 33 | { 34 | first: 'a', 35 | second: 'b', 36 | expected: 0, 37 | }, 38 | { 39 | first: '', 40 | second: '', 41 | expected: 1, 42 | }, 43 | { 44 | first: 'a', 45 | second: '', 46 | expected: 0, 47 | }, 48 | { 49 | first: '', 50 | second: 'a', 51 | expected: 0, 52 | }, 53 | { 54 | first: 'apple event', 55 | second: 'apple event', 56 | expected: 1, 57 | }, 58 | { 59 | first: 'ab', 60 | second: 'ba', 61 | expected: 0.5, 62 | }, 63 | ] 64 | testData.forEach((td) => { 65 | it(`should be ${td.expected}`, () => { 66 | assert.equal(metricLCS.similarity(td.first, td.second), td.expected) 67 | }) 68 | }) 69 | }) 70 | describe('sortMatch()', () => {}) 71 | }) 72 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "module": "umd", 5 | "moduleResolution": "Node", 6 | "declaration": true, 7 | "declarationDir": "dist/types", 8 | "outDir": "./dist", 9 | "lib": ["es6", "es7", "DOM", "ESNext"] 10 | }, 11 | "include": ["src/**/*"], 12 | "exclude": ["node_modules", "dist", "lib"] 13 | } 14 | -------------------------------------------------------------------------------- /yaml/Cosine.yaml: -------------------------------------------------------------------------------- 1 | testData: 2 | - expected: 0.3651483716701107 3 | - expected: 1 4 | - expected: 1 5 | - expected: 0.7999999999999998 6 | - expected: 0.9258200997725515 7 | - expected: 0.9660917830792959 8 | - expected: 0.8783100656536799 9 | - expected: 0.6343350474165466 10 | - expected: 0.6666666666666666 11 | - expected: 0.9574271077563381 12 | - expected: 1 13 | - expected: 0 14 | - expected: 1 15 | - expected: 0 16 | - expected: 0 17 | - expected: 1 18 | - expected: 0.9258200997725514 19 | - expected: 0.9999999999999998 20 | -------------------------------------------------------------------------------- /yaml/DiceCoefficient.yaml: -------------------------------------------------------------------------------- 1 | testData: 2 | - expected: 0 3 | - expected: 1 4 | - expected: 1 5 | - expected: 0.8 6 | - expected: 0.7878787878787878 7 | - expected: 0.92 8 | - expected: 0.6464646464646465 9 | - expected: 0.27906976744186046 10 | - expected: 0.1411764705882353 11 | - expected: 0.7741935483870968 12 | - expected: 1 13 | - expected: 0 14 | - expected: 1 15 | - expected: 0 16 | - expected: 0 17 | - expected: 1 18 | - expected: 0.9090909090909091 19 | - expected: 0 20 | -------------------------------------------------------------------------------- /yaml/JaccardIndex.yaml: -------------------------------------------------------------------------------- 1 | testData: 2 | - expected: 0.2222222222222222 3 | - expected: 1 4 | - expected: 1 5 | - expected: 0.6666666666666666 6 | - expected: 0.8571428571428571 7 | - expected: 0.9333333333333333 8 | - expected: 0.782608695652174 9 | - expected: 0.4642857142857143 10 | - expected: 0.5 11 | - expected: 0.9166666666666666 12 | - expected: 1 13 | - expected: 0 14 | - expected: 1 15 | - expected: 0 16 | - expected: 0 17 | - expected: 1 18 | - expected: 0.8571428571428571 19 | - expected: 1 20 | -------------------------------------------------------------------------------- /yaml/LongestCommonSubsequence.yaml: -------------------------------------------------------------------------------- 1 | testData: 2 | - expected: 0.3333333333333333 3 | - expected: 1 4 | - expected: 1 5 | - expected: 0.8333333333333334 6 | - expected: 0.6857142857142857 7 | - expected: 0.9615384615384616 8 | - expected: 0.5346534653465347 9 | - expected: 0.3409090909090909 10 | - expected: 0.27586206896551724 11 | - expected: 0.8484848484848485 12 | - expected: 1 13 | - expected: 0 14 | - expected: 1 15 | - expected: 0 16 | - expected: 0 17 | - expected: 1 18 | - expected: 0.9230769230769231 19 | - expected: 0.5 20 | -------------------------------------------------------------------------------- /yaml/case.yaml: -------------------------------------------------------------------------------- 1 | testData: 2 | - 3 | first: 'french' 4 | second: 'quebec' 5 | - 6 | first: 'france' 7 | second: 'france' 8 | - 9 | first: 'fRaNce' 10 | second: 'france' 11 | - 12 | first: 'healed' 13 | second: 'sealed' 14 | - 15 | first: 'web applications' 16 | second: 'applications of the web' 17 | - 18 | first: 'this will have a typo somewhere' 19 | second: 'this will huve a typo somewhere' 20 | - 21 | first: 'Olive-green table for sale, in extremely good condition.' 22 | second: 'For sale: table in very good condition, olive green in colour.' 23 | - 24 | first: 'Olive-green table for sale, in extremely good condition.' 25 | second: 'For sale: green Subaru Impreza, 210,000 miles' 26 | - 27 | first: 'Olive-green table for sale, in extremely good condition.' 28 | second: 'Wanted: mountain bike with at least 21 gears.' 29 | - 30 | first: 'this has one extra word' 31 | second: 'this has one word' 32 | - 33 | first: 'a' 34 | second: 'a' 35 | - 36 | first: 'a' 37 | second: 'b' 38 | - 39 | first: '' 40 | second: '' 41 | - 42 | first: '' 43 | second: 'a' 44 | - 45 | first: 'apple event' 46 | second: 'apple event' 47 | - 48 | first: 'iphone' 49 | second: 'iphone x' 50 | - 51 | first: 'ab' 52 | second: 'ba' --------------------------------------------------------------------------------