├── .editorconfig
├── .eslintignore
├── .eslintrc.json
├── .gitignore
├── .mocharc.json
├── .prettierignore
├── .prettierrc
├── .vscode
└── settings.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── package.json
├── src
├── core
│ ├── interface
│ │ └── Similarity.ts
│ └── packages
│ │ ├── Cosine.ts
│ │ ├── DiceCoefficient.ts
│ │ ├── JaccardIndex.ts
│ │ ├── JaroWinkler.ts
│ │ ├── Levenshtein.ts
│ │ ├── LongestCommonSubsequence.ts
│ │ └── MetricLCS.ts
└── index.ts
├── test
├── Cosine.test.ts
├── DiceCoefficient.test.ts
├── JaccardIndex.test.ts
├── JaroWinkler.test.ts
├── Levenshtein.test.ts
├── LongestCommonSubsequence.test.ts
└── MetricLCS.test.ts
├── tsconfig.json
└── yaml
├── Cosine.yaml
├── DiceCoefficient.yaml
├── JaccardIndex.yaml
├── LongestCommonSubsequence.yaml
└── case.yaml
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = space
5 | indent_size = 2
6 | charset = utf-8
7 | trim_trailing_whitespace = false
8 | insert_final_newline = false
--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | **/node_modules/**
2 | dist
3 |
--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "parser": "@typescript-eslint/parser",
3 | "extends": ["alloy", "alloy/typescript"],
4 | "plugins": ["@typescript-eslint"],
5 | "env": {
6 | "browser": true,
7 | "node": true,
8 | "commonjs": true,
9 | "es6": true
10 | },
11 | "parserOptions": {
12 | "sourceType": "module",
13 | "ecmaVersion": 2022
14 | },
15 | "root": true,
16 | "rules": {
17 | "no-duplicate-imports": 0,
18 | "for-direction": "error",
19 | "getter-return": [
20 | "error",
21 | {
22 | "allowImplicit": false
23 | }
24 | ],
25 | "no-await-in-loop": "off",
26 | "no-compare-neg-zero": "error",
27 | "no-cond-assign": ["error", "except-parens"],
28 | "no-console": "off",
29 | "no-constant-condition": [
30 | "error",
31 | {
32 | "checkLoops": false
33 | }
34 | ],
35 | "semi": [2, "never"],
36 | "no-control-regex": "error",
37 | "no-debugger": "error",
38 | "no-dupe-args": "error",
39 | "no-dupe-keys": "error",
40 | "max-nested-callbacks": ["error", 5],
41 | "@typescript-eslint/consistent-type-definitions": ["error", "interface"]
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
3 | package-lock.json
4 | yarn.lock
5 | pnpm-lock.yaml
6 |
7 | lib
8 |
9 | dist
10 | *.log
11 | .cache
--------------------------------------------------------------------------------
/.mocharc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extension": ["ts"],
3 | "spec": "test/*.test.ts",
4 | "require": "ts-node/register"
5 | }
6 |
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | dist
2 | .vscode
3 | yaml
4 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "singleQuote": true,
3 | "semi": false
4 | }
5 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "[typescript]": {
3 | "editor.formatOnSave": true
4 | },
5 | "typescript.tsdk": "node_modules/typescript/lib",
6 | }
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## Release Notes
2 |
3 | ### 1.3.0
4 | * Feature: Support JaroWinkler
5 |
6 | ### 1.2.0
7 | * Feature: longestCommonSubsequence = lcs
8 | * Feature: metricLcs = mlcs
9 |
10 |
11 | ### 1.1.0
12 | * Feature: Support import&require
13 | * Optimize: JS to Ts
14 | * Optimize: babel to tsup
15 |
16 |
17 | ### 1.0.9
18 | * Feature: Basic building
19 | * Feature: Cosine
20 | * Feature: DiceCoefficient
21 | * Feature: JaccardIndex
22 | * Feature: Levenshtein
23 | * Feature: LongestCommonSubsequence
24 | * Feature: MetricLCS
25 | * Feature: Add function sortMatch()
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2019-present Rabbitzzc
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # string-comparison
3 | 
4 | 
5 | [](https://github.com/Rabbitzzc/js-string-comparison/stargazers)
6 | [](https://github.com/Rabbitzzc/js-string-comparison/blob/master/LICENCE)
7 |
8 | **JavaScript implementation of [tdebatty/java-string-similarity](https://github.com/tdebatty/java-string-similarity)**
9 |
10 | A library implementing different string similarity, distance and sortMatch measures. A dozen of algorithms (including Levenshtein edit distance and sibblings, Longest Common Subsequence, cosine similarity etc.) are currently implemented. Check the summary table below for the complete list...
11 |
12 | - [string-comparison](#string-comparison)
13 | - [Download \& Usage](#download--usage)
14 | - [OverView](#overview)
15 | - [Normalized, metric, similarity and distance](#normalized-metric-similarity-and-distance)
16 | - [(Normalized) similarity and distance](#normalized-similarity-and-distance)
17 | - [Levenshtein](#levenshtein)
18 | - [Longest Common Subsequence](#longest-common-subsequence)
19 | - [Metric Longest Common Subsequence](#metric-longest-common-subsequence)
20 | - [Cosine similarity](#cosine-similarity)
21 | - [Sorensen-Dice coefficient](#sorensen-dice-coefficient)
22 | - [Jaro-Winkler similarity](#jaro-winkler-similarity)
23 | - [API](#api)
24 | - [Methods](#methods)
25 | - [similarity](#similarity)
26 | - [params](#params)
27 | - [return](#return)
28 | - [distance](#distance)
29 | - [params](#params-1)
30 | - [return](#return-1)
31 | - [sortMatch](#sortmatch)
32 | - [params](#params-2)
33 | - [return](#return-2)
34 | - [CHANGELOG](#changelog)
35 | - [MIT](#mit)
36 |
37 |
38 | ## Download & Usage
39 |
40 | download
41 |
42 | ```shell
43 | npm install string-comparison --save
44 | yarn add string-comparison
45 | pnpm add string-comparison
46 | ```
47 | usage
48 |
49 | ```js
50 | let stringComparison = require('string-comparison')
51 | // or import stringComparison from 'string-comparison'
52 |
53 | const Thanos = 'healed'
54 | const Rival = 'sealed'
55 | const Avengers = ['edward', 'sealed', 'theatre']
56 |
57 | // use by cosine
58 | let cos = stringComparison.cosine
59 |
60 | console.log(cos.similarity(Thanos, Rival))
61 | console.log(cos.distance(Thanos, Rival))
62 | console.log(cos.sortMatch(Thanos, Avengers))
63 |
64 | ```
65 |
66 | ## OverView
67 |
68 | The main characteristics of each implemented algorithm are presented below. The "cost" column gives an estimation of the computational cost to compute the similarity between two strings of length m and n respectively.
69 |
70 | | | Measure(s) | Normalized? | Metric? | Type | Cost | Typical usage |
71 | | ------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------- | ----------- | ------- | ------- | ------ | --------------- |
72 | | [Jaccard index](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#jaccard-index) | similarity
distance
sortMatch | Yes | Yes | Set | O(m+n) | |
73 | | [Cosine similarity](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#cosine-similarity) | similarity
distance
sortMatch | Yes | No | Profile | O(m+n) | |
74 | | [Sorensen-Dice coefficient](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#sorensen-dice-coefficient) | similarity
distance
sortMatch | Yes | No | Set | O(m+n) | |
75 | | [Levenshtein](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#levenshtein) | similarity
distance
sortMatch | No | Yes | | O(m*n) | |
76 | | [Jaro-Winkler](https://github.com/luozhouyang/python-string-similarity/blob/master/README.md#jaro-winkler) | similarity distance
sortMatch | Yes | No | | O(m*n) | typo correction |
77 |
78 | ## Normalized, metric, similarity and distance
79 |
80 | Although the topic might seem simple, a lot of different algorithms exist to measure text similarity or distance. Therefore the library defines some interfaces to categorize them.
81 |
82 | ### (Normalized) similarity and distance
83 |
84 | - StringSimilarity : Implementing algorithms define a similarity between strings (0 means strings are completely different).
85 | - NormalizedStringSimilarity : Implementing algorithms define a similarity between 0.0 and 1.0, like Jaro-Winkler for example.
86 | - StringDistance : Implementing algorithms define a distance between strings (0 means strings are identical), like Levenshtein for example. The maximum distance value depends on the algorithm.
87 | - NormalizedStringDistance : This interface extends StringDistance. For implementing classes, the computed distance value is between 0.0 and 1.0. NormalizedLevenshtein is an example of NormalizedStringDistance.
88 |
89 | ## Levenshtein
90 |
91 | The Levenshtein distance between two words is the minimum number of single-character edits (insertions, deletions or substitutions) required to change one word into the other.
92 |
93 | It is a metric string distance. This implementation uses dynamic programming (Wagner–Fischer algorithm), with only 2 rows of data. The space requirement is thus O(m) and the algorithm runs in O(m.n).
94 |
95 | ```js
96 | import { levenshtein } from "string-comparison"
97 | import type {SortMatchResultType} from "string-comparison"
98 |
99 | const Thanos = 'healed'
100 | const Rival = 'sealed'
101 | const Avengers = ['edward', 'sealed', 'theatre']
102 |
103 | console.log(levenshtein.similarity(Thanos, Rival))
104 | console.log(levenshtein.distance(Thanos, Rival))
105 | console.log(levenshtein.sortMatch(Thanos, Avengers) as SortMatchResultType)
106 |
107 | // output
108 | 0.8333333333333334
109 | 1
110 | [
111 | { member: 'edward', index: 0, rating: 0.16666666666666663 },
112 | { member: 'theatre', index: 2, rating: 0.4285714285714286 },
113 | { member: 'sealed', index: 1, rating: 0.8333333333333334 }
114 | ]
115 | ```
116 |
117 |
118 | ## Longest Common Subsequence
119 |
120 | The longest common subsequence (LCS) problem consists in finding the longest subsequence common to two (or more) sequences. It differs from problems of finding common substrings: unlike substrings, subsequences are not required to occupy consecutive positions within the original sequences.
121 |
122 | It is used by the diff utility, by Git for reconciling multiple changes, etc.
123 |
124 | The LCS distance between strings X (of length n) and Y (of length m) is n + m - 2 |LCS(X, Y)|
125 | min = 0
126 | max = n + m
127 |
128 | LCS distance is equivalent to Levenshtein distance when only insertion and deletion is allowed (no substitution), or when the cost of the substitution is the double of the cost of an insertion or deletion.
129 |
130 | This class implements the dynamic programming approach, which has a space requirement O(m.n), and computation cost O(m.n).
131 |
132 | In "Length of Maximal Common Subsequences", K.S. Larsen proposed an algorithm that computes the length of LCS in time O(log(m).log(n)). But the algorithm has a memory requirement O(m.n²) and was thus not implemented here.
133 |
134 | ```js
135 | import { longestCommonSubsequence } from "string-comparison"
136 | or
137 | import { lcs } from "string-comparison"
138 |
139 |
140 | const Thanos = 'healed'
141 | const Rival = 'sealed'
142 | const Avengers = ['edward', 'sealed', 'theatre']
143 |
144 | console.log(lcs.similarity(Thanos, Rival))
145 | console.log(lcs.distance(Thanos, Rival))
146 | console.log(lcs.sortMatch(Thanos, Avengers))
147 |
148 | // output
149 | 0.8333333333333334
150 | 2
151 | [
152 | { member: 'edward', index: 0, rating: 0.5 },
153 | { member: 'theatre', index: 2, rating: 0.6153846153846154 },
154 | { member: 'sealed', index: 1, rating: 0.8333333333333334 }
155 | ]
156 | ```
157 |
158 | ## Metric Longest Common Subsequence
159 |
160 | Distance metric based on Longest Common Subsequence, from the notes "An LCS-based string metric" by Daniel Bakkelund.
161 | http://heim.ifi.uio.no/~danielry/StringMetric.pdf
162 |
163 | The distance is computed as 1 - |LCS(s1, s2)| / max(|s1|, |s2|)
164 |
165 | ```js
166 | import { metricLcs } from "string-comparison"
167 | or
168 | import { mlcs } from "string-comparison"
169 |
170 | const Thanos = 'healed'
171 | const Rival = 'sealed'
172 | const Avengers = ['edward', 'sealed', 'theatre']
173 |
174 | console.log(metricLcs.similarity(Thanos, Rival))
175 | console.log(metricLcs.distance(Thanos, Rival))
176 | console.log(metricLcs.sortMatch(Thanos, Avengers))
177 |
178 | // output
179 | 0.8333333333333334
180 | 0.16666666666666663
181 | [
182 | { member: 'edward', index: 0, rating: 0.5 },
183 | { member: 'theatre', index: 2, rating: 0.5714285714285714 },
184 | { member: 'sealed', index: 1, rating: 0.8333333333333334 }
185 | ]
186 | ```
187 |
188 | ## Cosine similarity
189 |
190 | Like Q-Gram distance, the input strings are first converted into sets of n-grams (sequences of n characters, also called k-shingles), but this time the cardinality of each n-gram is not taken into account. Each input string is simply a set of n-grams. The Jaccard index is then computed as |V1 inter V2| / |V1 union V2|.
191 |
192 | Distance is computed as 1 - similarity.
193 | Jaccard index is a metric distance.
194 |
195 | ```js
196 | import { cosine } from "string-comparison"
197 | ```
198 |
199 | ## Sorensen-Dice coefficient
200 |
201 | Similar to Jaccard index, but this time the similarity is computed as 2 * |V1 inter V2| / (|V1| + |V2|).
202 |
203 | Distance is computed as 1 - similarity.
204 |
205 | ```js
206 | import { diceCoefficient } from "string-comparison"
207 | ```
208 |
209 | ## Jaro-Winkler similarity
210 |
211 | The Jaro-Winkler similarity is a string metric measuring edit distance between two strings. Jaro – Winkler Similarity is much similar to Jaro Similarity. They both differ when the prefix of two string match. Jaro – Winkler Similarity uses a prefix scale ‘p’ which gives a more accurate answer when the strings have a common prefix up to a defined maximum length l.
212 |
213 | ```js
214 | import { jaroWinkler } from "string-comparison"
215 | ```
216 |
217 | ## API
218 |
219 | * `cosine`
220 | * `diceCoefficient`
221 | * `jaccardIndex`
222 | * `levenshtein`
223 | * `lcs` = `longestCommonSubsequence`
224 | * `mlcs` = `metricLcs`
225 | * `jaroWinkler`
226 |
227 |
228 | ## Methods
229 | * `similarity`.
230 | * `distance`.
231 | * `sortMatch`
232 |
233 | ### similarity
234 |
235 | Implementing algorithms define a similarity between strings
236 |
237 | #### params
238 |
239 | 1. thanos [String]
240 | 2. rival [String]
241 |
242 | #### return
243 |
244 | Return a similarity between 0.0 and 1.0
245 |
246 | ### distance
247 |
248 | Implementing algorithms define a distance between strings (0 means strings are identical)
249 |
250 | #### params
251 |
252 | 1. `thanos` [String]
253 | 2. `rival` [String]
254 |
255 | #### return
256 |
257 | Return a number
258 |
259 | ### sortMatch
260 |
261 | #### params
262 |
263 | 1. thanos [String]
264 | 2. avengers [...String]
265 |
266 | #### return
267 |
268 | Return an array of objects - `SortMatchResultType` ex:
269 | ```js
270 | [
271 | { member: 'edward', rating: 0.16666666666666663 },
272 | { member: 'theatre', rating: 0.4285714285714286 },
273 | { member: 'mailed', rating: 0.5 },
274 | { member: 'sealed', rating: 0.8333333333333334 }
275 | ]
276 | ```
277 |
278 | ## CHANGELOG
279 | [CHANGELOG](./CHANGELOG.md)
280 |
281 |
282 | ## MIT
283 | [MIT](./LICENSE)
284 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "string-comparison",
3 | "version": "1.3.0",
4 | "description": "A library implementing different string similarity",
5 | "main": "dist/index.js",
6 | "module": "dist/index.mjs",
7 | "types": "dist/index.d.ts",
8 | "engines": {
9 | "node": "^16.0.0 || >=18.0.0"
10 | },
11 | "exports": {
12 | ".": {
13 | "import": "./dist/index.mjs",
14 | "require": "./dist/index.js",
15 | "types": "./dist/index.d.ts"
16 | }
17 | },
18 | "scripts": {
19 | "test": "mocha",
20 | "lint": "eslint -c .eslintrc.json \"src/**/*.ts\"",
21 | "lint:fix": "eslint -c .eslintrc.json \"src/**/*.ts\" --fix",
22 | "build": "tsup src/index.ts --dts --format cjs,esm --minify",
23 | "watch": "tsup src/index.ts --watch"
24 | },
25 | "repository": {
26 | "type": "git",
27 | "url": "https://github.com/Rabbitzzc/js-string-comparision"
28 | },
29 | "files": [
30 | "dist"
31 | ],
32 | "keywords": [
33 | "strings",
34 | "compare similarity",
35 | "similarity",
36 | "Dice's Coefficient",
37 | "Cosine",
38 | "Jaccard Index",
39 | "Levenshtein",
40 | "Longest Common Subsequence",
41 | "Metric Longest Common Subsequence",
42 | "difference",
43 | "compare",
44 | "comparision",
45 | "similar",
46 | "distance",
47 | "match",
48 | "sort match"
49 | ],
50 | "author": {
51 | "name": "Rabbitzzc",
52 | "email": "zzclovelcs@gmail.com"
53 | },
54 | "license": "MIT",
55 | "devDependencies": {
56 | "@swc/core": "^1.3.76",
57 | "@types/mocha": "^10.0.1",
58 | "@types/node": "^20.5.0",
59 | "@typescript-eslint/eslint-plugin": "^6.3.0",
60 | "@typescript-eslint/parser": "^6.3.0",
61 | "async": "^3.2.4",
62 | "eslint": "^8.47.0",
63 | "eslint-config-alloy": "^5.1.1",
64 | "eslint-config-prettier": "^9.0.0",
65 | "eslint-plugin-prettier": "^5.0.0",
66 | "mocha": "^10.2.0",
67 | "npm-run-all": "^4.1.5",
68 | "prettier": "^3.0.1",
69 | "ts-node": "^10.9.1",
70 | "tsup": "^7.2.0",
71 | "typescript": "^5.1.6"
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/core/interface/Similarity.ts:
--------------------------------------------------------------------------------
1 | export interface SortMatchResultType {
2 | member: string
3 | index: number
4 | rating: number
5 | }
6 |
7 | export default abstract class Similarity {
8 | public static initParams(thanos: string, rival: string) {
9 | return [
10 | thanos.replace(/\s+/g, '').toLowerCase(),
11 | rival.replace(/\s+/g, '').toLowerCase(),
12 | ]
13 | }
14 |
15 | protected static checkThanosType(thanos: string) {
16 | if (typeof thanos !== 'string')
17 | throw new Error('first argument should be a string')
18 | }
19 |
20 | protected static checkRivalType(rival: string) {
21 | if (typeof rival !== 'string')
22 | throw new Error('second argument should be a string')
23 | }
24 |
25 | protected static checkAvengersType(avengers: string[]) {
26 | if (!Array.isArray(avengers))
27 | throw new Error('second argument should be an array of strings')
28 | if (avengers.find((s) => typeof s !== 'string'))
29 | throw new Error('second argument should be an array of strings')
30 | }
31 |
32 | /**
33 | * @description 寻找最佳匹配结果
34 | */
35 | public sortMatch(thanos: string, avengers: string[]): SortMatchResultType[] {
36 | Similarity.checkThanosType(thanos)
37 | Similarity.checkAvengersType(avengers)
38 |
39 | return avengers
40 | .map((str, index) => {
41 | return {
42 | member: str,
43 | index,
44 | rating: this.similarity(thanos, str),
45 | }
46 | })
47 | .sort((a, b) => a.rating - b.rating)
48 | }
49 |
50 | /**
51 | * @description 比较两个字符串
52 | */
53 | public abstract similarity(thanos: string, rival: string): number
54 |
55 | // distance
56 | public abstract distance(thanos: string, rival: string): number
57 | }
58 |
--------------------------------------------------------------------------------
/src/core/packages/Cosine.ts:
--------------------------------------------------------------------------------
1 | import Similarity from '../interface/Similarity'
2 |
3 | export default class Cosine extends Similarity {
4 | public similarity(pThanos: string, pRival: string) {
5 | Similarity.checkThanosType(pThanos)
6 | Similarity.checkRivalType(pRival)
7 |
8 | // clear white space characters & to low
9 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
10 |
11 | if (!thanos.length && !rival.length) return 1
12 | if (!thanos.length || !rival.length) return 0
13 | if (thanos === rival) return 1
14 |
15 | // string vectorization
16 | let common = Array.from(new Set(thanos.split('').concat(rival.split(''))))
17 |
18 | let vectorThanos = this.stringVectorization(thanos.split(''), common)
19 | let vectorRival = this.stringVectorization(rival.split(''), common)
20 | let [dotproduct, mThanos, mRival] = [0, 0, 0]
21 |
22 | for (let i = 0; i < vectorThanos.length; ++i) {
23 | dotproduct += vectorThanos[i] * vectorRival[i]
24 | mThanos += vectorThanos[i] * vectorThanos[i]
25 | mRival += vectorRival[i] * vectorRival[i]
26 | }
27 | mThanos = Math.sqrt(mThanos)
28 | mRival = Math.sqrt(mRival)
29 | return Number(dotproduct) / (mThanos * mRival)
30 | }
31 |
32 | public distance(thanos: string, rival: string) {
33 | return 1.0 - this.similarity(thanos, rival)
34 | }
35 |
36 | // string vectorization
37 | private stringVectorization(strArr: string | any[], common: any[]) {
38 | return common.map((v: any) => (strArr.includes(v) ? 1 : 0))
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/core/packages/DiceCoefficient.ts:
--------------------------------------------------------------------------------
1 | import Similarity from '../interface/Similarity'
2 |
3 | export default class DiceCoefficient extends Similarity {
4 | public similarity(pThanos: string, pRival: string): number {
5 | Similarity.checkThanosType(pThanos)
6 | Similarity.checkRivalType(pRival)
7 |
8 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
9 |
10 | let [length1, length2] = [thanos.length, rival.length]
11 |
12 | if ((!thanos.length && !rival.length) || thanos === rival) return 1
13 |
14 | if (length1 < 2 || length2 < 2) return 0
15 |
16 | let thanosBigrams = new Map()
17 |
18 | // get the intersecting character, two strings as a group
19 | for (let i = 0; i < length1 - 1; i++) {
20 | const bigram = thanos.slice(i, i + 2)
21 | const count = thanosBigrams.has(bigram)
22 | ? thanosBigrams.get(bigram) + 1
23 | : 1
24 |
25 | thanosBigrams.set(bigram, count)
26 | }
27 | let intersectionSize = 0
28 | for (let i = 0; i < length2 - 1; i++) {
29 | const bigram = rival.slice(i, i + 2)
30 | const count = thanosBigrams.has(bigram) ? thanosBigrams.get(bigram) : 0
31 |
32 | if (count > 0) {
33 | thanosBigrams.set(bigram, count - 1)
34 | ++intersectionSize
35 | }
36 | }
37 |
38 | return (2.0 * intersectionSize) / (length1 + length2 - 2)
39 | }
40 |
41 | public distance(thanos: string, rival: string): number {
42 | return 1.0 - this.similarity(thanos, rival)
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/core/packages/JaccardIndex.ts:
--------------------------------------------------------------------------------
1 | import Similarity from '../interface/Similarity'
2 |
3 | export default class JaccardIndex extends Similarity {
4 | public similarity(pThanos: string, pRival: string): number {
5 | Similarity.checkThanosType(pThanos)
6 | Similarity.checkRivalType(pRival)
7 |
8 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
9 |
10 | if ((!thanos.length && !rival.length) || thanos === rival) return 1
11 |
12 | // split and Set
13 | let union = new Set(thanos.split('').concat(rival.split('')))
14 | let intersection = new Set(
15 | thanos.split('').filter((v) => new Set(rival).has(v)),
16 | )
17 |
18 | return Number(intersection.size) / union.size
19 | }
20 |
21 | public distance(thanos: string, rival: string): number {
22 | return 1.0 - this.similarity(thanos, rival)
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/core/packages/JaroWinkler.ts:
--------------------------------------------------------------------------------
1 | import Similarity from '../interface/Similarity'
2 |
3 | // https://www.geeksforgeeks.org/jaro-and-jaro-winkler-similarity/
4 | export default class JaroWinkler extends Similarity {
5 | public distance(pThanos: string, pRival: string): number {
6 | Similarity.checkThanosType(pThanos)
7 | Similarity.checkRivalType(pRival)
8 |
9 | // clear white space characters & to low
10 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
11 |
12 | if (!thanos.length && !rival.length) return 1
13 | if (!thanos.length || !rival.length) return 0
14 | if (thanos === rival) return 1
15 |
16 | if (thanos === rival) {
17 | return 1.0
18 | }
19 | let len1 = thanos.length
20 | let len2 = rival.length
21 |
22 | if (len1 === 0 || len2 === 0) {
23 | return 0.0
24 | }
25 |
26 | let max_dist = Math.floor(Math.max(len1, len2) / 2) - 1
27 | let match = 0
28 | let hash_s1 = new Array(thanos.length).fill(0)
29 | let hash_s2 = new Array(rival.length).fill(0)
30 |
31 | for (let i = 0; i < len1; i++) {
32 | for (
33 | let j = Math.max(0, i - max_dist);
34 | j < Math.min(len2, i + max_dist + 1);
35 | j++
36 | )
37 | if (thanos[i] === rival[j] && hash_s2[j] === 0) {
38 | hash_s1[i] = 1
39 | hash_s2[j] = 1
40 | match++
41 | break
42 | }
43 | }
44 |
45 | if (match === 0) {
46 | return 0
47 | }
48 |
49 | let t = 0
50 | let point = 0
51 |
52 | for (let i = 0; i < len1; i++) {
53 | if (hash_s1[i] === 1) {
54 | while (hash_s2[point] === 0) {
55 | point++
56 | }
57 | if (thanos[i] !== rival[point++]) {
58 | t++
59 | }
60 | }
61 | }
62 | t /= 2
63 |
64 | return (match / len1 + match / len2 + (match - t) / match) / 3.0
65 | }
66 |
67 | public similarity(thanos: string, rival: string): number {
68 | let jaroDist = this.distance(thanos, rival)
69 |
70 | if (jaroDist > 0.7) {
71 | let prefix = 0
72 |
73 | for (let i = 0; i < Math.min(thanos.length, rival.length); i++) {
74 | if (thanos[i] === rival[i]) {
75 | prefix++
76 | } else {
77 | break
78 | }
79 | }
80 |
81 | prefix = Math.min(4, prefix)
82 | jaroDist += 0.1 * prefix * (1 - jaroDist)
83 | }
84 |
85 | return jaroDist
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/src/core/packages/Levenshtein.ts:
--------------------------------------------------------------------------------
1 | import Similarity from '../interface/Similarity'
2 |
3 | export default class Levenshtein extends Similarity {
4 | public similarity(pThanos: string, pRival: string): number {
5 | Similarity.checkThanosType(pThanos)
6 | Similarity.checkRivalType(pRival)
7 |
8 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
9 | return !thanos.length && !rival.length
10 | ? 1
11 | : 1 -
12 | Number(this.distance(thanos, rival)) /
13 | Math.max(thanos.length, rival.length)
14 | }
15 |
16 | public distance(pThanos: string, pRival: string) {
17 | Similarity.checkThanosType(pThanos)
18 | Similarity.checkRivalType(pRival)
19 |
20 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
21 |
22 | if (thanos === rival) return 0
23 |
24 | let [len1, len2] = [thanos.length, rival.length]
25 | if (!len1) return len2
26 | if (!len2) return len1
27 |
28 | // init array
29 | let dynamicArray = [...Array(len1 + 1)].map(() => Array(len2 + 1).fill(0))
30 |
31 | for (let i = 0; i <= len1; ++i) {
32 | dynamicArray[i][0] = i
33 | }
34 | for (let j = 0; j <= len2; ++j) {
35 | dynamicArray[0][j] = j
36 | }
37 | let temp
38 | for (let i = 1; i <= len1; ++i) {
39 | for (let j = 1; j <= len2; ++j) {
40 | temp = thanos[i - 1] === rival[j - 1] ? 0 : 1
41 |
42 | // delete insert replace
43 | dynamicArray[i][j] = Math.min(
44 | dynamicArray[i - 1][j] + 1,
45 | dynamicArray[i][j - 1] + 1,
46 | dynamicArray[i - 1][j - 1] + temp,
47 | )
48 | }
49 | }
50 |
51 | return dynamicArray[len1][len2]
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/core/packages/LongestCommonSubsequence.ts:
--------------------------------------------------------------------------------
1 | import Similarity from '../interface/Similarity'
2 |
3 | export default class LongestCommonSubsequence extends Similarity {
4 | // Return the length of Longest Common Subsequence (LCS) between strings thanos and rival
5 | public static lcsLength(pThanos: string, pRival: string): number {
6 | Similarity.checkThanosType(pThanos)
7 | Similarity.checkRivalType(pRival)
8 |
9 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
10 | // init array elements=0
11 | let [len1, len2] = [thanos.length, rival.length]
12 | let dynamicArray = [...Array(len1 + 1)].map(() => Array(len2 + 1).fill(0))
13 |
14 | for (let i = 1; i <= len1; ++i) {
15 | for (let j = 1; j <= len2; ++j) {
16 | dynamicArray[i][j] =
17 | thanos[i - 1] === rival[j - 1]
18 | ? dynamicArray[i - 1][j - 1] + 1
19 | : Math.max(dynamicArray[i][j - 1], dynamicArray[i - 1][j])
20 | }
21 | }
22 | return dynamicArray[len1][len2]
23 | }
24 |
25 | public similarity(pThanos: string, pRival: string): number {
26 | Similarity.checkThanosType(pThanos)
27 | Similarity.checkRivalType(pRival)
28 |
29 | // clear white space characters & to low
30 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
31 |
32 | if ((!thanos.length && !rival.length) || thanos === rival) return 1
33 |
34 | return (
35 | (2.0 * LongestCommonSubsequence.lcsLength(thanos, rival)) /
36 | (thanos.length + rival.length)
37 | )
38 | }
39 |
40 | public distance(pThanos: string, pRival: string) {
41 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
42 | return (
43 | thanos.length +
44 | rival.length -
45 | 2 * LongestCommonSubsequence.lcsLength(thanos, rival)
46 | )
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/core/packages/MetricLCS.ts:
--------------------------------------------------------------------------------
1 | import Similarity from '../interface/Similarity'
2 |
3 | export default class MetricLCS extends Similarity {
4 | public static lcsLength(thanos: string, rival: string) {
5 | Similarity.checkThanosType(thanos)
6 | Similarity.checkRivalType(rival)
7 |
8 | // init array elements=0
9 | let [len1, len2] = [thanos.length, rival.length]
10 | let dynamicArray = [...Array(len1 + 1)].map(() => Array(len2 + 1).fill(0))
11 |
12 | for (let i = 1; i <= len1; ++i) {
13 | for (let j = 1; j <= len2; ++j) {
14 | dynamicArray[i][j] =
15 | thanos[i - 1] === rival[j - 1]
16 | ? dynamicArray[i - 1][j - 1] + 1
17 | : Math.max(dynamicArray[i][j - 1], dynamicArray[i - 1][j])
18 | }
19 | }
20 | return dynamicArray[len1][len2]
21 | }
22 |
23 | public similarity(pThanos: string, pRival: string): number {
24 | Similarity.checkThanosType(pThanos)
25 | Similarity.checkRivalType(pRival)
26 |
27 | // clear white space characters & to low
28 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
29 |
30 | if (!thanos.length && !rival.length) return 1
31 |
32 | return thanos === rival
33 | ? 1
34 | : Number(MetricLCS.lcsLength(thanos, rival)) /
35 | Math.max(thanos.length, rival.length)
36 | }
37 |
38 | public distance(pThanos: string, pRival: string) {
39 | Similarity.checkThanosType(pThanos)
40 | Similarity.checkRivalType(pRival)
41 |
42 | const [thanos, rival] = Similarity.initParams(pThanos, pRival)
43 | return thanos === rival ? 0 : 1.0 - this.similarity(thanos, rival)
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | import type { SortMatchResultType } from './core/interface/Similarity'
2 | import Cosine from './core/packages/Cosine'
3 | import DiceCoefficient from './core/packages/DiceCoefficient'
4 | import JaccardIndex from './core/packages/JaccardIndex'
5 | import Levenshtein from './core/packages/Levenshtein'
6 | import LongestCommonSubsequence from './core/packages/LongestCommonSubsequence'
7 | import MetricLCS from './core/packages/MetricLCS'
8 | import JaroWinkler from './core/packages/JaroWinkler'
9 |
10 | export type { SortMatchResultType }
11 |
12 | const cosine = new Cosine()
13 | const diceCoefficient = new DiceCoefficient()
14 | const jaccardIndex = new JaccardIndex()
15 | const levenshtein = new Levenshtein()
16 | const lcs = new LongestCommonSubsequence()
17 | const longestCommonSubsequence = lcs
18 | const mlcs = new MetricLCS()
19 | const metricLcs = mlcs
20 | const jaroWinkler = new JaroWinkler()
21 |
22 | const output = {
23 | cosine,
24 | diceCoefficient,
25 | jaccardIndex,
26 | levenshtein,
27 | lcs,
28 | longestCommonSubsequence,
29 | mlcs,
30 | metricLcs,
31 | jaroWinkler,
32 | }
33 |
34 | export default output
35 |
--------------------------------------------------------------------------------
/test/Cosine.test.ts:
--------------------------------------------------------------------------------
1 | import * as assert from 'assert'
2 | import Cosine from '../src/core/packages/Cosine'
3 | const cosine = new Cosine()
4 |
5 | describe('test Cosine Similarity', () => {
6 | describe('similarity()', () => {
7 | const testData = [
8 | {
9 | first: 'french',
10 | second: 'quebec',
11 | expected: 0.3651483716701107,
12 | },
13 | {
14 | first: 'france',
15 | second: 'france',
16 | expected: 1,
17 | },
18 | {
19 | first: 'healed',
20 | second: 'sealed',
21 | expected: 0.7999999999999998,
22 | },
23 | {
24 | first: 'web applications',
25 | second: 'applications of the web',
26 | expected: 0.9258200997725515,
27 | },
28 | {
29 | first: 'a',
30 | second: 'a',
31 | expected: 1,
32 | },
33 | {
34 | first: 'a',
35 | second: 'b',
36 | expected: 0,
37 | },
38 | {
39 | first: '',
40 | second: '',
41 | expected: 1,
42 | },
43 | {
44 | first: 'a',
45 | second: '',
46 | expected: 0,
47 | },
48 | {
49 | first: '',
50 | second: 'a',
51 | expected: 0,
52 | },
53 | {
54 | first: 'apple event',
55 | second: 'apple event',
56 | expected: 1,
57 | },
58 | {
59 | first: 'ab',
60 | second: 'ba',
61 | expected: 0.9999999999999998,
62 | },
63 | ]
64 | testData.forEach((td) => {
65 | it(`should be ${td.expected}`, () => {
66 | assert.equal(cosine.similarity(td.first, td.second), td.expected)
67 | })
68 | })
69 | })
70 | describe('sortMatch()', () => {})
71 | })
72 |
--------------------------------------------------------------------------------
/test/DiceCoefficient.test.ts:
--------------------------------------------------------------------------------
1 | import * as assert from 'assert'
2 | import Cosine from '../src/core/packages/Cosine'
3 | const cosine = new Cosine()
4 |
5 | describe('test Cosine Similarity', () => {
6 | describe('similarity()', () => {
7 | const testData = [
8 | {
9 | first: 'french',
10 | second: 'quebec',
11 | expected: 0.3651483716701107,
12 | },
13 | {
14 | first: 'france',
15 | second: 'france',
16 | expected: 1,
17 | },
18 | {
19 | first: 'healed',
20 | second: 'sealed',
21 | expected: 0.7999999999999998,
22 | },
23 | {
24 | first: 'web applications',
25 | second: 'applications of the web',
26 | expected: 0.9258200997725515,
27 | },
28 | {
29 | first: 'a',
30 | second: 'a',
31 | expected: 1,
32 | },
33 | {
34 | first: 'a',
35 | second: 'b',
36 | expected: 0,
37 | },
38 | {
39 | first: '',
40 | second: '',
41 | expected: 1,
42 | },
43 | {
44 | first: 'a',
45 | second: '',
46 | expected: 0,
47 | },
48 | {
49 | first: '',
50 | second: 'a',
51 | expected: 0,
52 | },
53 | {
54 | first: 'apple event',
55 | second: 'apple event',
56 | expected: 1,
57 | },
58 | {
59 | first: 'ab',
60 | second: 'ba',
61 | expected: 0.9999999999999998,
62 | },
63 | ]
64 | testData.forEach((td) => {
65 | it(`should be ${td.expected}`, () => {
66 | assert.equal(cosine.similarity(td.first, td.second), td.expected)
67 | })
68 | })
69 | })
70 | describe('sortMatch()', () => {})
71 | })
72 |
--------------------------------------------------------------------------------
/test/JaccardIndex.test.ts:
--------------------------------------------------------------------------------
1 | import * as assert from 'assert'
2 | import Cosine from '../src/core/packages/Cosine'
3 | const cosine = new Cosine()
4 |
5 | describe('test Cosine Similarity', () => {
6 | describe('similarity()', () => {
7 | const testData = [
8 | {
9 | first: 'french',
10 | second: 'quebec',
11 | expected: 0.3651483716701107,
12 | },
13 | {
14 | first: 'france',
15 | second: 'france',
16 | expected: 1,
17 | },
18 | {
19 | first: 'healed',
20 | second: 'sealed',
21 | expected: 0.7999999999999998,
22 | },
23 | {
24 | first: 'web applications',
25 | second: 'applications of the web',
26 | expected: 0.9258200997725515,
27 | },
28 | {
29 | first: 'a',
30 | second: 'a',
31 | expected: 1,
32 | },
33 | {
34 | first: 'a',
35 | second: 'b',
36 | expected: 0,
37 | },
38 | {
39 | first: '',
40 | second: '',
41 | expected: 1,
42 | },
43 | {
44 | first: 'a',
45 | second: '',
46 | expected: 0,
47 | },
48 | {
49 | first: '',
50 | second: 'a',
51 | expected: 0,
52 | },
53 | {
54 | first: 'apple event',
55 | second: 'apple event',
56 | expected: 1,
57 | },
58 | {
59 | first: 'ab',
60 | second: 'ba',
61 | expected: 0.9999999999999998,
62 | },
63 | ]
64 | testData.forEach((td) => {
65 | it(`should be ${td.expected}`, () => {
66 | assert.equal(cosine.similarity(td.first, td.second), td.expected)
67 | })
68 | })
69 | })
70 | describe('sortMatch()', () => {})
71 | })
72 |
--------------------------------------------------------------------------------
/test/JaroWinkler.test.ts:
--------------------------------------------------------------------------------
1 | import * as assert from 'assert'
2 | import JaroWinkler from '../src/core/packages/JaroWinkler'
3 | const jaroWinkler = new JaroWinkler()
4 |
5 | describe('test JaroWinkler Similarity', () => {
6 | describe('similarity()', () => {
7 | const testData = [
8 | {
9 | first: 'TRATE',
10 | second: 'TRACE',
11 | expected: 0.9066666666666667,
12 | },
13 | {
14 | first: 'DwAyNE',
15 | second: 'DuANE',
16 | expected: 0.8400000000000001,
17 | },
18 | {
19 | first: 'a',
20 | second: 'a',
21 | expected: 1,
22 | },
23 | {
24 | first: 'a',
25 | second: 'b',
26 | expected: 0,
27 | },
28 | {
29 | first: '',
30 | second: '',
31 | expected: 1,
32 | },
33 | {
34 | first: 'a',
35 | second: '',
36 | expected: 0,
37 | },
38 | {
39 | first: '',
40 | second: 'a',
41 | expected: 0,
42 | },
43 | {
44 | first: 'apple event',
45 | second: 'apple event',
46 | expected: 1,
47 | },
48 | {
49 | first: 'ab',
50 | second: 'ba',
51 | expected: 0,
52 | },
53 | ]
54 | testData.forEach((td) => {
55 | it(`should be ${td.expected}`, () => {
56 | assert.equal(jaroWinkler.similarity(td.first, td.second), td.expected)
57 | })
58 | })
59 | })
60 | describe('sortMatch()', () => {})
61 | })
62 |
--------------------------------------------------------------------------------
/test/Levenshtein.test.ts:
--------------------------------------------------------------------------------
1 | import * as assert from 'assert'
2 | import Levenshtein from '../src/core/packages/Levenshtein'
3 | const levenshtein = new Levenshtein()
4 |
5 | describe('test Levenshtein', () => {
6 | describe('similarity()', () => {
7 | const testData = [
8 | {
9 | first: 'french',
10 | second: 'quebec',
11 | expected: 0.16666666666666663,
12 | },
13 | {
14 | first: 'france',
15 | second: 'france',
16 | expected: 1,
17 | },
18 | {
19 | first: 'healed',
20 | second: 'sealed',
21 | expected: 0.8333333333333334,
22 | },
23 | {
24 | first: 'web applications',
25 | second: 'applications of the web',
26 | expected: 0.44999999999999996,
27 | },
28 | {
29 | first: 'a',
30 | second: 'a',
31 | expected: 1,
32 | },
33 | {
34 | first: 'a',
35 | second: 'b',
36 | expected: 0,
37 | },
38 | {
39 | first: '',
40 | second: '',
41 | expected: 1,
42 | },
43 | {
44 | first: 'a',
45 | second: '',
46 | expected: 0,
47 | },
48 | {
49 | first: '',
50 | second: 'a',
51 | expected: 0,
52 | },
53 | {
54 | first: 'apple event',
55 | second: 'apple event',
56 | expected: 1,
57 | },
58 | {
59 | first: 'ab',
60 | second: 'ba',
61 | expected: 0,
62 | },
63 | ]
64 | testData.forEach((td) => {
65 | it(`should be ${td.expected}`, () => {
66 | assert.equal(levenshtein.similarity(td.first, td.second), td.expected)
67 | })
68 | })
69 | })
70 | describe('sortMatch()', () => {})
71 | })
72 |
--------------------------------------------------------------------------------
/test/LongestCommonSubsequence.test.ts:
--------------------------------------------------------------------------------
1 | import * as assert from 'assert'
2 | import LongestCommonSubsequence from '../src/core/packages/LongestCommonSubsequence'
3 | const lcs = new LongestCommonSubsequence()
4 |
5 | describe('test LongestCommonSubsequence', () => {
6 | describe('similarity()', () => {
7 | const testData = [
8 | {
9 | first: 'french',
10 | second: 'quebec',
11 | expected: 0.3333333333333333,
12 | },
13 | {
14 | first: 'france',
15 | second: 'france',
16 | expected: 1,
17 | },
18 | {
19 | first: 'healed',
20 | second: 'sealed',
21 | expected: 0.8333333333333334,
22 | },
23 | {
24 | first: 'web applications',
25 | second: 'applications of the web',
26 | expected: 0.6857142857142857,
27 | },
28 |
29 | {
30 | first: 'a',
31 | second: 'a',
32 | expected: 1,
33 | },
34 | {
35 | first: 'a',
36 | second: 'b',
37 | expected: 0,
38 | },
39 | {
40 | first: '',
41 | second: '',
42 | expected: 1,
43 | },
44 | {
45 | first: 'a',
46 | second: '',
47 | expected: 0,
48 | },
49 | {
50 | first: '',
51 | second: 'a',
52 | expected: 0,
53 | },
54 | {
55 | first: 'apple event',
56 | second: 'apple event',
57 | expected: 1,
58 | },
59 | {
60 | first: 'ab',
61 | second: 'ba',
62 | expected: 0.5,
63 | },
64 | ]
65 | testData.forEach((td) => {
66 | it(`should be ${td.expected}`, () => {
67 | assert.equal(lcs.similarity(td.first, td.second), td.expected)
68 | })
69 | })
70 | })
71 | describe('sortMatch()', () => {})
72 | describe('lcsLength()', () => {
73 | const testData = [
74 | {
75 | first: 'french',
76 | second: 'quebec',
77 | expected: 2,
78 | },
79 | {
80 | first: 'france',
81 | second: 'france',
82 | expected: 6,
83 | },
84 | {
85 | first: 'healed',
86 | second: 'sealed',
87 | expected: 5,
88 | },
89 | {
90 | first: 'web applications',
91 | second: 'applications of the web',
92 | expected: 12,
93 | },
94 | {
95 | first: 'a',
96 | second: 'a',
97 | expected: 1,
98 | },
99 | {
100 | first: 'a',
101 | second: 'b',
102 | expected: 0,
103 | },
104 | {
105 | first: '',
106 | second: '',
107 | expected: 0,
108 | },
109 | {
110 | first: 'a',
111 | second: '',
112 | expected: 0,
113 | },
114 | {
115 | first: '',
116 | second: 'a',
117 | expected: 0,
118 | },
119 | {
120 | first: 'apple event',
121 | second: 'apple event',
122 | expected: 10,
123 | },
124 | {
125 | first: 'ab',
126 | second: 'ba',
127 | expected: 1,
128 | },
129 | ]
130 | testData.forEach((td) => {
131 | it(`should be ${td.expected}`, () => {
132 | assert.equal(
133 | LongestCommonSubsequence.lcsLength(td.first, td.second),
134 | td.expected,
135 | )
136 | })
137 | })
138 | })
139 | })
140 |
--------------------------------------------------------------------------------
/test/MetricLCS.test.ts:
--------------------------------------------------------------------------------
1 | import * as assert from 'assert'
2 | import MetricLCS from '../src/core/packages/MetricLCS'
3 | const metricLCS = new MetricLCS()
4 |
5 | describe('test MetricLCS', () => {
6 | describe('similarity()', () => {
7 | const testData = [
8 | {
9 | first: 'french',
10 | second: 'quebec',
11 | expected: 0.3333333333333333,
12 | },
13 | {
14 | first: 'france',
15 | second: 'france',
16 | expected: 1,
17 | },
18 | {
19 | first: 'healed',
20 | second: 'sealed',
21 | expected: 0.8333333333333334,
22 | },
23 | {
24 | first: 'web applications',
25 | second: 'applications of the web',
26 | expected: 0.6,
27 | },
28 | {
29 | first: 'a',
30 | second: 'a',
31 | expected: 1,
32 | },
33 | {
34 | first: 'a',
35 | second: 'b',
36 | expected: 0,
37 | },
38 | {
39 | first: '',
40 | second: '',
41 | expected: 1,
42 | },
43 | {
44 | first: 'a',
45 | second: '',
46 | expected: 0,
47 | },
48 | {
49 | first: '',
50 | second: 'a',
51 | expected: 0,
52 | },
53 | {
54 | first: 'apple event',
55 | second: 'apple event',
56 | expected: 1,
57 | },
58 | {
59 | first: 'ab',
60 | second: 'ba',
61 | expected: 0.5,
62 | },
63 | ]
64 | testData.forEach((td) => {
65 | it(`should be ${td.expected}`, () => {
66 | assert.equal(metricLCS.similarity(td.first, td.second), td.expected)
67 | })
68 | })
69 | })
70 | describe('sortMatch()', () => {})
71 | })
72 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es5",
4 | "module": "umd",
5 | "moduleResolution": "Node",
6 | "declaration": true,
7 | "declarationDir": "dist/types",
8 | "outDir": "./dist",
9 | "lib": ["es6", "es7", "DOM", "ESNext"]
10 | },
11 | "include": ["src/**/*"],
12 | "exclude": ["node_modules", "dist", "lib"]
13 | }
14 |
--------------------------------------------------------------------------------
/yaml/Cosine.yaml:
--------------------------------------------------------------------------------
1 | testData:
2 | - expected: 0.3651483716701107
3 | - expected: 1
4 | - expected: 1
5 | - expected: 0.7999999999999998
6 | - expected: 0.9258200997725515
7 | - expected: 0.9660917830792959
8 | - expected: 0.8783100656536799
9 | - expected: 0.6343350474165466
10 | - expected: 0.6666666666666666
11 | - expected: 0.9574271077563381
12 | - expected: 1
13 | - expected: 0
14 | - expected: 1
15 | - expected: 0
16 | - expected: 0
17 | - expected: 1
18 | - expected: 0.9258200997725514
19 | - expected: 0.9999999999999998
20 |
--------------------------------------------------------------------------------
/yaml/DiceCoefficient.yaml:
--------------------------------------------------------------------------------
1 | testData:
2 | - expected: 0
3 | - expected: 1
4 | - expected: 1
5 | - expected: 0.8
6 | - expected: 0.7878787878787878
7 | - expected: 0.92
8 | - expected: 0.6464646464646465
9 | - expected: 0.27906976744186046
10 | - expected: 0.1411764705882353
11 | - expected: 0.7741935483870968
12 | - expected: 1
13 | - expected: 0
14 | - expected: 1
15 | - expected: 0
16 | - expected: 0
17 | - expected: 1
18 | - expected: 0.9090909090909091
19 | - expected: 0
20 |
--------------------------------------------------------------------------------
/yaml/JaccardIndex.yaml:
--------------------------------------------------------------------------------
1 | testData:
2 | - expected: 0.2222222222222222
3 | - expected: 1
4 | - expected: 1
5 | - expected: 0.6666666666666666
6 | - expected: 0.8571428571428571
7 | - expected: 0.9333333333333333
8 | - expected: 0.782608695652174
9 | - expected: 0.4642857142857143
10 | - expected: 0.5
11 | - expected: 0.9166666666666666
12 | - expected: 1
13 | - expected: 0
14 | - expected: 1
15 | - expected: 0
16 | - expected: 0
17 | - expected: 1
18 | - expected: 0.8571428571428571
19 | - expected: 1
20 |
--------------------------------------------------------------------------------
/yaml/LongestCommonSubsequence.yaml:
--------------------------------------------------------------------------------
1 | testData:
2 | - expected: 0.3333333333333333
3 | - expected: 1
4 | - expected: 1
5 | - expected: 0.8333333333333334
6 | - expected: 0.6857142857142857
7 | - expected: 0.9615384615384616
8 | - expected: 0.5346534653465347
9 | - expected: 0.3409090909090909
10 | - expected: 0.27586206896551724
11 | - expected: 0.8484848484848485
12 | - expected: 1
13 | - expected: 0
14 | - expected: 1
15 | - expected: 0
16 | - expected: 0
17 | - expected: 1
18 | - expected: 0.9230769230769231
19 | - expected: 0.5
20 |
--------------------------------------------------------------------------------
/yaml/case.yaml:
--------------------------------------------------------------------------------
1 | testData:
2 | -
3 | first: 'french'
4 | second: 'quebec'
5 | -
6 | first: 'france'
7 | second: 'france'
8 | -
9 | first: 'fRaNce'
10 | second: 'france'
11 | -
12 | first: 'healed'
13 | second: 'sealed'
14 | -
15 | first: 'web applications'
16 | second: 'applications of the web'
17 | -
18 | first: 'this will have a typo somewhere'
19 | second: 'this will huve a typo somewhere'
20 | -
21 | first: 'Olive-green table for sale, in extremely good condition.'
22 | second: 'For sale: table in very good condition, olive green in colour.'
23 | -
24 | first: 'Olive-green table for sale, in extremely good condition.'
25 | second: 'For sale: green Subaru Impreza, 210,000 miles'
26 | -
27 | first: 'Olive-green table for sale, in extremely good condition.'
28 | second: 'Wanted: mountain bike with at least 21 gears.'
29 | -
30 | first: 'this has one extra word'
31 | second: 'this has one word'
32 | -
33 | first: 'a'
34 | second: 'a'
35 | -
36 | first: 'a'
37 | second: 'b'
38 | -
39 | first: ''
40 | second: ''
41 | -
42 | first: ''
43 | second: 'a'
44 | -
45 | first: 'apple event'
46 | second: 'apple event'
47 | -
48 | first: 'iphone'
49 | second: 'iphone x'
50 | -
51 | first: 'ab'
52 | second: 'ba'
--------------------------------------------------------------------------------