├── .eslintignore ├── .gitignore ├── .eslintrc ├── test ├── exhaustive-test.ts ├── benchmark.js ├── metatesting.ts ├── randomized-test.ts ├── special-test.ts └── test-utils.ts ├── LICENSE.md ├── tsconfig.json ├── package.json ├── src └── index.ts └── README.md /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | bin -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | node_modules 3 | .vscode -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "parser": "@typescript-eslint/parser", 4 | "parserOptions": { 5 | "ecmaVersion": 2020, 6 | "sourceType": "module" 7 | }, 8 | "extends": [ 9 | "plugin:@typescript-eslint/recommended" 10 | ], 11 | "rules": { 12 | "semi": ["error", "always"], 13 | "@typescript-eslint/explicit-function-return-type": "off", 14 | "@typescript-eslint/explicit-module-boundary-types": "off", 15 | "@typescript-eslint/no-explicit-any": "off", 16 | "@typescript-eslint/ban-types": "off", 17 | "@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_"}] 18 | } 19 | } -------------------------------------------------------------------------------- /test/exhaustive-test.ts: -------------------------------------------------------------------------------- 1 | import 'mocha'; 2 | import {expect} from 'chai'; 3 | import {applyPatch, calcPatch, diff} from '../src'; 4 | import * as tu from "./test-utils"; 5 | 6 | 7 | describe("Exhaustive patch tests", () => { 8 | for (let N = 1; N < 5; ++N) { 9 | for (let M = 0; M < 5; ++M) { 10 | describe(`all sequences of sizes N=${N}, M=${M}`, () => { 11 | // It can be made tight 12 | const complexityBound = (N + M + 1) * (N + M + 1) * 1000; 13 | for (const [xs, ys] of tu.allPairs(N, M)) { 14 | const [xsw, ysw] = tu.accessWatchDog(complexityBound, [xs, ys]); 15 | it(`patch '${xs.join('')}' -> '${ys.join('')}'`, () => { 16 | const es = diff(xsw, ysw); 17 | const edited = tu.edit(xs, ys, es).join(''); 18 | expect(edited).eqls(ys.join('')); 19 | const patched = [...applyPatch(xs, calcPatch(xs, ys))].map(x => x.join('')).join(''); 20 | expect(patched).eqls(ys.join('')); 21 | }); 22 | } 23 | }); 24 | } 25 | } 26 | }); 27 | 28 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2021 Logan R. Kearsley 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2015", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */ 4 | "module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */ 5 | "declaration": true, /* Generates corresponding '.d.ts' file. */ 6 | "outDir": "bin", /* Redirect output structure to the directory. */ 7 | "rootDir": "src", 8 | "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ 9 | "strict": true, 10 | "noUnusedLocals": true, /* Report errors on unused locals. */ 11 | "noUnusedParameters": true, /* Report errors on unused parameters. */ 12 | "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 13 | "esModuleInterop": true /* Enables experimental support for emitting type metadata for decorators. */ 14 | }, 15 | "include": [ 16 | "src/**/*" 17 | ], 18 | "exclude": [ 19 | "node_modules", 20 | "test/**/*" 21 | ] 22 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fast-myers-diff", 3 | "version": "3.2.0", 4 | "description": "A fast, minimal, memory-efficient diff algorithm on strings, arrays, and typed arrays.", 5 | "main": "bin/index.js", 6 | "files": [ 7 | "bin/index.js", 8 | "bin/index.d.ts" 9 | ], 10 | "scripts": { 11 | "test": "mocha -r ts-node/register test/**/*-test.ts", 12 | "build": "tsc", 13 | "lint": "eslint . --ext .ts --fix", 14 | "minify": "jsmin -o bin/index.min.js bin/index.js && del bin\\index.js && move bin\\index.min.js bin\\index.js", 15 | "prepare": "tsc && npm run minify", 16 | "benchmark": "tsc && node test/benchmark.js" 17 | }, 18 | "keywords": [ 19 | "myers", 20 | "diff", 21 | "lcs" 22 | ], 23 | "author": "Logan R. Kearsley", 24 | "license": "MIT", 25 | "devDependencies": { 26 | "@types/benchmark": "2.1.0", 27 | "@types/chai": "^4.2.14", 28 | "@types/mocha": "^8.0.4", 29 | "@types/seedrandom": "^2.4.28", 30 | "@typescript-eslint/eslint-plugin": "^4.10.0", 31 | "@typescript-eslint/parser": "^4.10.0", 32 | "benchmark": "^2.1.4", 33 | "benchtable": "^0.1.0", 34 | "chai": "^4.2.0", 35 | "eslint": "^7.15.0", 36 | "fast-array-diff": "1.0.1", 37 | "fast-diff": "1.2.0", 38 | "fast-myers-diff": "2.0.0", 39 | "jsmin": "^1.0.1", 40 | "microtime": "^3.0.0", 41 | "mocha": "^10.1.0", 42 | "myers-diff": "^2.0.1", 43 | "seedrandom": "^3.0.5", 44 | "ts-node": "^9.1.1", 45 | "typescript": "^3.9.7" 46 | }, 47 | "repository": { 48 | "type": "git", 49 | "url": "git+https://github.com/gliese1337/fast-myers-diff.git" 50 | }, 51 | "bugs": { 52 | "url": "https://github.com/gliese1337/fast-myers-diff/issues" 53 | }, 54 | "homepage": "https://github.com/gliese1337/fast-myers-diff#readme" 55 | } 56 | -------------------------------------------------------------------------------- /test/benchmark.js: -------------------------------------------------------------------------------- 1 | const Benchtable = require('benchtable'); 2 | const suite = new Benchtable('diffs', {isTransposed : true}); 3 | const fastMyersDiff = require('../bin'); 4 | const myersDiff = require('myers-diff'); 5 | const prev_version = require('fast-myers-diff'); 6 | const fastDiff = require('fast-diff'); 7 | const fastArray = require('fast-array-diff').diff; 8 | const seedRandom = require('seedrandom'); 9 | seedRandom('benchmark', {global: true}); 10 | 11 | function repeat(s1, n) { 12 | let ans = ''; 13 | let ss = s1; 14 | while(n >= 1){ 15 | if(n % 2 === 1){ 16 | ans += ss; 17 | } 18 | n >>= 1; 19 | if(n >= 1) ss = ss + ss; 20 | } 21 | return ans; 22 | } 23 | 24 | function insertions(insertion, into){ 25 | let ans = into; 26 | for(let i = 0; i < insertion.length; ++i){ 27 | const pos = Math.random() * ans.length; 28 | ans = ans.slice(0, pos) + insertion[i] + ans.slice(pos); 29 | } 30 | return ans; 31 | } 32 | 33 | suite.addFunction('myers-diff-2.0.1', (x, y, lcs) => { 34 | let n = x.length + y.length 35 | for(const t of myersDiff.diff(x, y, {compare: 'chars'})){ 36 | n -= t.lhs.del + t.rhs.add; 37 | } 38 | if(typeof lcs === 'number' && n !== 2*lcs) throw 'Wrong result'; 39 | }); 40 | 41 | suite.addFunction('fast-myers-diff', (x, y) => { 42 | let n = x.length + y.length; 43 | for(const [xs,xe,ys,ye] of fastMyersDiff.diff(x, y)){ 44 | n -= (xe - xs) + (ye - ys); 45 | } 46 | if(typeof lcs === 'number' && n !== 2*lcs) throw 'Wrong result'; 47 | }); 48 | 49 | suite.addFunction('fast-diff-1.2.0', (x, y) => { 50 | let n = 0; 51 | for(const [side, ] of fastDiff(x, y)){ 52 | n += side; 53 | } 54 | if(typeof lcs === 'number' && n !== lcs) throw 'Wrong result'; 55 | }); 56 | 57 | suite.addFunction('fast-array-diff-1.0.1', (x, y) => { 58 | let n = 0; 59 | try { 60 | const { removed: _r, added: _a } = fastArray(x.split(''), y.split('')); 61 | } catch(e) { console.log(e); } 62 | if(typeof lcs === 'number' && n !== lcs) throw 'Wrong result'; 63 | }); 64 | 65 | suite.addFunction('fast-myers-diff-2.0.0', (x, y) => { 66 | let n = x.length + y.length; 67 | if(n >= 256){ 68 | throw 'Lengths above 256 not supported' 69 | } 70 | for(const [xs,xe,ys,ye] of prev_version.diff(x, y)){ 71 | n -= (xe - xs) + (ye - ys); 72 | } 73 | if(typeof lcs === 'number' && n !== 2*lcs) throw 'Wrong result'; 74 | }); 75 | 76 | for(const [n, c1, c2] of [ 77 | [10, 100, 100], 78 | [10, 4, 200], 79 | [100, 10, 10], 80 | [100, 20, 0], 81 | [100, 0, 20], 82 | [10, 1000, 1000], 83 | [10000, 100, 100], 84 | [10000, 200, 0], 85 | [10000, 0, 200], 86 | [10000, 10, 10], 87 | [10000, 20, 0], 88 | [10000, 0, 20] 89 | 90 | ]){ 91 | const lcs = repeat('a', n); 92 | const x = insertions(repeat('d', c1), lcs); 93 | const y = insertions(repeat('i', c2), lcs); 94 | suite.addInput(`n=${n}, +${c1}, delete=${c2}`, [x, y, n]); 95 | } 96 | 97 | suite.on("cycle", function (evt) { 98 | console.log(" - " + evt.target); 99 | }); 100 | 101 | suite.on("complete", () => { 102 | console.log('Fastest is ' + suite.filter('fastest').map('name')); 103 | console.log(suite.table.toString()); 104 | }); 105 | 106 | console.log("Is it really fast?"); 107 | console.log(new Array(30).join("-")); 108 | suite.run(); -------------------------------------------------------------------------------- /test/metatesting.ts: -------------------------------------------------------------------------------- 1 | import 'mocha'; 2 | import {expect} from 'chai'; 3 | import * as tu from './test-utils'; 4 | 5 | describe('Meta testing', () => { 6 | describe('test generation', () => { 7 | it('the distribution of x', () => { 8 | // http://oeis.org/A000110 9 | // The number of equivalence relations that can be defined on a set of n elements. 10 | const N = 7; 11 | const counts = [...Array(N)].map((_, n) => [...tu.equivalencyClasses(n)].length); 12 | expect(counts).eql([1, 1, 2, 5, 15, 52, 203, 877, 4140, 21147, 115975, 678570, 4213597, 27644437, 190899322].slice(0, N)); 13 | }); 14 | it('the sequences with just one class', () => { 15 | expect([...tu.equivalencyClasses(4, 1, 1)]).eql([[1, ['a', 'a', 'a', 'a']]]); 16 | expect([...tu.equivalencyClasses(4, 10, 1)]).eql([[1, ['a', 'a', 'a', 'a']]]); 17 | }); 18 | describe('the distribution of y', () => { 19 | for (let c = 1; c < 5; ++c) { 20 | for (let n = 0; n < 4; ++n) { 21 | it(`n=${n}, c=${c}, k=${c + 1}, ${Math.pow(c + 1, n)} elements`, () => { 22 | expect([...tu.equivalencyClasses(n, c, c + 1)].length).eql(Math.pow(c + 1, n)); 23 | }); 24 | } 25 | } 26 | }); 27 | describe('saves a few checks :)', () => { 28 | for (let n1 = 0; n1 < 5; ++n1) { 29 | for (let n2 = 0; n2 < 5; ++n2) { 30 | it(`${n1}, ${n2}`, () => { 31 | expect([...tu.allPairs(n2, n1)].length - 1).lessThan([...tu.equivalencyClasses(n1 + n2)].length); 32 | }); 33 | } 34 | } 35 | }); 36 | describe('inputs x and y are symmetric', () => { 37 | for (let n1 = 0; n1 < 5; ++n1) { 38 | for (let n2 = 0; n2 < 5; ++n2) { 39 | it(`${n1}, ${n2} vs ${n2}, ${n1}`, () => { 40 | expect([...tu.allPairs(n2, n1)].length - 1).lessThan([...tu.allPairsCore(n1, n2)].length); 41 | }); 42 | } 43 | } 44 | }); 45 | }); 46 | 47 | describe('CharArray', () => { 48 | it('must be an UInt16Array', () => { 49 | expect(new tu.CharArray([]) instanceof Uint16Array) 50 | .eqls(true, 'CharArray must be instance of Uint16Array'); 51 | }); 52 | it('must be mutable', () => { 53 | const v = new tu.CharArray([48, 49, 50, 51]); 54 | expect(v.toString()) 55 | .eqls('0123', 'Must be initialized accordingly to constructor argument'); 56 | v[1] += 5; 57 | expect(v.toString()).eqls('0623', 'Must be changed after mutation'); 58 | expect(v.array().join('')).eqls('0623', 'Must provide an array of single char strings'); 59 | }); 60 | }); 61 | 62 | describe("Randomized tests", () => { 63 | it('expect consistent sizes and types', () => { 64 | const [x, y] = tu.subsequences(100, 14, 20); 65 | expect(x.length).eqls(86, 'incorrect size for x'); 66 | expect(y.length).eqls(80, 'incorrect size for y'); 67 | expect(typeof x.toString()).eqls('string'); 68 | expect(typeof y.toString()).eqls('string'); 69 | expect(Array.isArray(x.array())).eqls(true); 70 | }); 71 | it('size of substrings', () => { 72 | const z = new tu.CharArray(10); 73 | z.fill('0'.charCodeAt(0), 0, 10); 74 | expect(z.length).eql(10); 75 | expect(tu.substring(z, 5).toString()).eqls('00000'); 76 | }); 77 | }); 78 | 79 | describe('Predicted diffs', () => { 80 | it('Check the sequence excluding the differences', () => { 81 | for (let c1 = 1; c1 < 20; ++c1) { 82 | for (let c2 = 1; c2 < 20; ++c2) { 83 | for(const {s1, s2, x, y, diffs} of [ 84 | tu.sparseBinaryPredictable((c1+c2)*(c1+c2+1)+1000, c1, c2), 85 | tu.densePredictable(2*(c1+c2)+2, c1, c2) 86 | ]){ 87 | const n = x.length - c1; 88 | const [xc, yc] = tu.excludeDiff([...x], [...y], diffs); 89 | expect(xc.length).eqls(n); 90 | expect(yc.length).eqls(n); 91 | expect(s1.length).eqls(c1); 92 | expect(s2.length).eqls(c2); 93 | expect(x.length).eqls(n + c1); 94 | expect(y.length).eqls(n + c2); 95 | expect(xc.map((_, i) => xc[i] === yc[i]).reduce((a, b) => a && b)) 96 | .eqls(true, 'Excluding the differences the both sides must converge to a common string'); 97 | } 98 | } 99 | } 100 | }); 101 | }); 102 | }); -------------------------------------------------------------------------------- /test/randomized-test.ts: -------------------------------------------------------------------------------- 1 | import seedRandom from "seedrandom"; 2 | import * as tu from "./test-utils"; 3 | import {expect} from "chai"; 4 | import {diff} from "../src"; 5 | 6 | seedRandom('diff', {global: true}); 7 | 8 | describe("Randomized edits in small strings", () => { 9 | for (let n = 15; n < 25; ++n) { 10 | for (let d1 = 0; d1 < 10; ++d1) { 11 | for (let d2 = 0; d2 < 10; ++d2) { 12 | // It can be made tight 13 | const complexityBound = 2 * (2 * n + d1 + d2) * (d1 + d2 + 1); 14 | const [xs, ys] = tu.subsequences(n, d1, d2); 15 | const [xst, yst] = [xs.toString(), ys.toString()]; 16 | const [xsw, ysw] = tu.accessWatchDog(complexityBound, [xs.array(), ys.array()]); 17 | it(`patch (${n}, ${d1}, ${d2}) '${xst}' -> '${yst}'`, () => { 18 | // this will throw an error if the number of accesses exceeds 19 | // the complexity bound 20 | expect(xs.length).eqls(n - d1); 21 | expect(ys.length).eqls(n - d2); 22 | let es: number[][] = []; 23 | try { 24 | es = [...diff(xsw, ysw)]; 25 | expect(tu.diffSize(es)).lessThan(d1 + d2 + 1); 26 | } catch(e) { 27 | if(e.message.indexOf('Too many operations')){ 28 | expect.fail({xst, yst}.toString() + '\nToo many operations'); 29 | }else{ 30 | throw e; 31 | } 32 | } 33 | const edited = tu.edit(xs.array(), ys.array(), es as any).join(''); 34 | expect(edited).eqls(ys.toString()); 35 | }); 36 | } 37 | } 38 | } 39 | }); 40 | 41 | 42 | describe('Diff pieces', () => { 43 | 44 | describe('sparse inputs with predictable results', () => { 45 | for (let c1 = 2; c1 <= 100; c1 += 5) { 46 | for (let c2 = 2; c2 <= 100; c2 += 5) { 47 | for (let n = (c1 + c2 + 1) * (c1 + c2 + 1); n <= 1000; n += 100) { 48 | it(JSON.stringify({c1, c2, n}), () => { 49 | const {x, y, s1, s2, diffs} = tu.sparseBinaryPredictable(n, c1, c2); 50 | let seen: number[][] = []; 51 | try { 52 | seen = tu.checkDiffComputation(x, y, 400 * n * (c1 + c2)); 53 | } catch (e) { 54 | if (e.message.indexOf('Too many operations')) { 55 | throw new Error(JSON.stringify({n, s1:[...s1], s2: [...s2]})); 56 | } else { 57 | throw e; 58 | } 59 | } 60 | expect(seen).eqls(diffs); 61 | }); 62 | } 63 | } 64 | } 65 | }); 66 | describe('dense inputs with predictable results', () => { 67 | for (let c1 = 1; c1 <= 10; c1 += 1) { 68 | for (let c2 = 1; c2 <= 10; c2 += 1) { 69 | for (let n = 2*(c1 + c2 + 1); n <= 30; n += 1) { 70 | it(JSON.stringify({c1, c2, n}), () => { 71 | const {x, y, s1, s2, diffs} = tu.densePredictable(n, c1, c2); 72 | let seen: number[][] = []; 73 | try { 74 | seen = tu.checkDiffComputation(x, y, 4 * n * (c1 + c2)); 75 | } catch (e) { 76 | if (e.message.indexOf('Too many operations')) { 77 | throw new Error(JSON.stringify({n, s1:[...s1], s2: [...s2]})); 78 | } else { 79 | throw e; 80 | } 81 | } 82 | expect(seen).eqls(diffs); 83 | 84 | }); 85 | } 86 | } 87 | } 88 | }); 89 | }); 90 | 91 | 92 | describe("Search good examples", () => { 93 | 94 | for (let d1 = 5; d1 <= 6; d1 += 3) { 95 | for (let d2 = 5; d2 <= 6; d2 += 3) { 96 | for (let n = 200; n < 2100; n += 100) { 97 | // It can be made tight 98 | const complexityBound = 100 * n * (d1 + d2 + 1); 99 | it(`patch (${n}, ${d1}, ${d2}) `, () => { 100 | for (let k = 0; k * n < 1000; ++k) { 101 | const [xs, ys] = tu.subsequences(n, d1, d2); 102 | tu.checkDiffComputation(xs, ys, complexityBound); 103 | } 104 | }); 105 | } 106 | } 107 | } 108 | }); 109 | 110 | describe("Randomized edits in big strings", () => { 111 | for (let n = 5000; n < 10000; n += 500) { 112 | for (let d1 = 0; d1 < 50; d1 += 10) { 113 | for (let d2 = 0; d2 < 50; d2 += 10) { 114 | // It can be made tight 115 | const complexityBound = 40 * n * (d1 + d2 + 1); 116 | it(`patch (${n}, ${d1}, ${d2})`, () => { 117 | const [xs, ys] = tu.subsequences(n, d1, d2); 118 | tu.checkDiffComputation(xs, ys, complexityBound); 119 | }); 120 | } 121 | } 122 | } 123 | }); 124 | -------------------------------------------------------------------------------- /test/special-test.ts: -------------------------------------------------------------------------------- 1 | import 'mocha'; 2 | import { expect } from 'chai'; 3 | import { calcPatch, applyPatch, diff, lcs, Sliceable, calcSlices } from '../src'; 4 | 5 | function extract(ys: Sliceable, indices: [number, number, number][]) { 6 | return indices.map(([, s, l]) => ys.slice(s, s + l)).join(''); 7 | } 8 | 9 | const tests: [string, string, string[]][] = [ 10 | ['', '', ['']], 11 | ['a', '', ['']], 12 | ['', 'b', ['']], 13 | ['a', 'b', ['']], 14 | ['a', 'bb', ['']], 15 | ['a', 'bc', ['']], 16 | ['a', 'bac', ['a']], 17 | ['a', 'baa', ['a']], 18 | ['a', 'bab', ['a']], 19 | ['a', 'bbb', [''] ], 20 | ['aa', 'ba', ['a']], 21 | ['aa', 'bba', ['a']], 22 | ['aa','aaaa', ['aa']], 23 | ['ab', 'bb', ['b']], 24 | ['ab', 'cb', ['b']], 25 | ['ab', 'baa', ['b', 'a']], 26 | ['ab', 'bbb', ['b']], 27 | ['ab', 'bbc', ['b']], 28 | ['ab', 'bcb', ['b']], 29 | ['ab', 'caa', ['a']], 30 | ['ab', 'cbb', ['b']], 31 | ['ab', 'ccb', ['b']], 32 | ['bab', 'a', ['a']], 33 | ['bbb', 'a', ['']], 34 | ['bab', 'aa', ['a']], 35 | ['bba', 'aa', ['a']], 36 | ['abb', 'b', ['b']], 37 | ['bb', 'a', [''] ], 38 | ['abc', 'abc', ['abc']], 39 | ['abcd', 'obce', ['bc']], 40 | ['abc', 'ab', ['ab']], 41 | ['abc', 'bc', ['bc']], 42 | ['abcde', 'zbodf', ['bd']], 43 | ['preabmcdpost', 'prezxmywpost', ['prempost']], 44 | ['abcfboopqxyz', 'abcgbooprxyz', ['abcboopxyz']], 45 | ['GTCGTTCGGAATGCCGTTGCTCTGTAAA', 'ACCGGTCGAGTGCGCGGAAGCCGGCCGAA', ['GTCGTCGGAAGCCGGCCGAA']], 46 | ]; 47 | 48 | describe('Special tests', () => { 49 | it('should detect a cyclic rotation', () => { 50 | for (let l = 2; l < 1024; l += l) { 51 | const x = new Uint8Array(l); 52 | const y = new Uint8Array(l); 53 | x.fill(0, 1, l); 54 | y.fill(0, 0, l - 1); 55 | x[0] = y[l - 1] = 1; 56 | const es = [...diff(x, y)]; 57 | expect(es).eqls([[0, 1, 0, 0], [l, l, l - 1, l]]); 58 | } 59 | }); 60 | 61 | it('binary sparse vector', () => { 62 | const s1 = [59, 495, 567]; 63 | const s2 = [176, 746, 861]; 64 | const diffs = [ 65 | [59, 60, 59, 59], 66 | [177, 177, 176, 177], 67 | [495, 496, 495, 495], 68 | [567, 568, 566, 566], 69 | [748, 748, 746, 747], 70 | [862, 862, 861, 862] 71 | ]; 72 | const x = new Int8Array(1000); 73 | const y = new Int8Array(1000); 74 | for (const i of s1) x[i] = 1; 75 | for (const i of s2) y[i] = 1; 76 | const seen = [...diff(x, y)]; 77 | expect(seen).eqls(diffs); 78 | }); 79 | 80 | it('works with a custom comparator', () => { 81 | const a = "ABab"; 82 | const b = "abAB"; 83 | const case_ignore_eq = (i: number, j: number) => a[i].toLocaleLowerCase() === b[j].toLocaleLowerCase(); 84 | const diffs = diff(a, b, case_ignore_eq); 85 | expect(diffs).to.be.empty; 86 | }); 87 | }); 88 | 89 | describe('handcrafted examples', () => { 90 | it('avoid fragmentation (1 char)', () => { 91 | // Other valid solutions are 92 | // [[0,1,0,0], [1,1,0,1]], [[0,0,0,1],[0,1,1,1]] 93 | // But this increases the size and makes it more difficult to 94 | // interpret the results 95 | expect([...diff('a', 'b')]).eqls([[0,1,0,1]]); 96 | }); 97 | 98 | it('random tests', () => { 99 | const x = new Array(10000).fill('a'); 100 | const y = x.slice(); 101 | let j = 0; 102 | for(let i = 0; i < 50; ++i){ 103 | let t; 104 | do{ 105 | do{ 106 | t = 1 + ~~((y.length - 2) * Math.random()); 107 | }while(y[t] === 'b'); 108 | y[t] = 'b'; 109 | if(++j === y.length){ 110 | // Prevent an infinite loop. 111 | return; 112 | } 113 | }while(y[t-1] === 'b' || y[t+1] === 'b'); 114 | expect([...diff(x, y)].length).eqls(i+1); 115 | } 116 | expect([...diff('a', 'b')]).eqls([[0,1,0,1]]); 117 | }); 118 | 119 | for (const {n, s1, s2, diffs} of [ 120 | { 121 | n: 8, 122 | s1: [0], 123 | s2: [2, 5], 124 | diffs: [[0, 1, 0, 0], [3, 3, 2, 3], [5, 5, 5, 6]] 125 | }, 126 | { 127 | n: 12, 128 | s1: [0, 5, 8], 129 | s2: [2, 6], 130 | diffs: [ 131 | [0, 1, 0, 0], 132 | [3, 3, 2, 3], 133 | [5, 6, 5, 5], 134 | [7, 7, 6, 7], 135 | [8, 9, 8, 8] 136 | ] 137 | } 138 | ]) { 139 | const x = new Array(n+s1.length); 140 | const y = new Array(n+s2.length); 141 | x.fill('a', 0, n + s1.length); 142 | y.fill('a', 0, n + s2.length); 143 | for (const i of s1) x[i] = 'b'; 144 | for (const i of s2) y[i] = 'c'; 145 | const yt = y.join(''); 146 | const xt = x.join(''); 147 | it(`Diff for ${xt} ${yt}`, () => { 148 | const seen = [...diff(x, y)]; 149 | console.log(x.join('')); 150 | console.log(y.join('')); 151 | expect(seen).eqls(diffs); 152 | }); 153 | } 154 | }); 155 | 156 | describe("LCS", () => { 157 | for (const [xs, ys, ans] of tests) { 158 | it(`should calculate lcs for '${xs}', '${ys}'`, () => { 159 | const seq = [...lcs(xs, ys)]; 160 | const common = extract(ys, seq); 161 | expect(ans).to.include(common); 162 | }); 163 | } 164 | }); 165 | 166 | describe('patch', () => { 167 | for (const [xs, ys] of tests) { 168 | it(`should calculate patch for ${ xs }, ${ ys }`, () => { 169 | const edit = [...applyPatch(xs, calcPatch(xs, ys))].join(''); 170 | expect(edit).to.eql(ys); 171 | }); 172 | } 173 | }); 174 | 175 | describe('slices', () => { 176 | it('should calculate slices for handcrafted cases', () => { 177 | for (const [xs, ys, diffs] of [ 178 | [ 179 | ['a', 'b', 'c', 'd'], 180 | ['a', 'e', 'x', 'b', 'c', 'f'], 181 | [[0, ['a']], [1, ['e', 'x']], [0, ['b', 'c']], [-1, ['d']], [1, ['f']]] 182 | ], 183 | [ 184 | 'ab cdeee', 185 | 'x ab cdee', 186 | [[1, 'x '], [0, 'ab cd'], [-1, 'e'], [0, 'ee']] 187 | ] 188 | ]) { 189 | const result = [...calcSlices(xs, ys)]; 190 | expect(result).to.eql(diffs); 191 | } 192 | }); 193 | 194 | for (const [xs, ys] of tests) { 195 | it(`should calculate slices for ${ xs }, ${ ys }`, () => { 196 | const nx: string[] = []; 197 | const ny: string[] = []; 198 | for (const [type, str] of calcSlices(xs, ys)) { 199 | switch (type) { 200 | case 0: 201 | ny.push(str); 202 | case -1: 203 | nx.push(str); 204 | break; 205 | case 1: 206 | ny.push(str); 207 | break; 208 | } 209 | } 210 | expect(nx.join('')).to.eql(xs); 211 | expect(ny.join('')).to.eql(ys); 212 | }); 213 | } 214 | }); -------------------------------------------------------------------------------- /test/test-utils.ts: -------------------------------------------------------------------------------- 1 | import {diff} from "../src"; 2 | import {expect} from "chai"; 3 | export class CharArray extends Uint16Array { 4 | constructor(v: any) { 5 | super(typeof v === 'string' ? v.split('').map(x => x.charCodeAt(0)) : v); 6 | Object.defineProperties(this, {length: {writable: false, value: super.length}}); 7 | } 8 | 9 | toString() { 10 | return String.fromCharCode(...this.codeArray()); 11 | } 12 | 13 | slice(start?: number, end?: number): CharArray { 14 | return new CharArray(this.subarray(start, end)); 15 | } 16 | 17 | array(): string[] { 18 | return this.toString().split(''); 19 | } 20 | 21 | codeArray(): number[] { 22 | return Array.from(this); 23 | } 24 | } 25 | 26 | export function string(n: number) { 27 | const u = new CharArray(n); 28 | for (let i = 0; i < n; ++i) { 29 | u[i] = 65 + 20 * Math.random(); 30 | } 31 | return u; 32 | } 33 | 34 | /** 35 | * Produces an array with nSamples values between 0 <= v[i] < end 36 | * @param nSamples 37 | * @param end 38 | */ 39 | export function sample(nSamples: number, end: number): Int32Array { 40 | const _result = new Int32Array(nSamples); 41 | if (2 * nSamples > end) { 42 | const skip = sample(end - nSamples, end); 43 | let skipped = 0; 44 | for (let i = 0; i < nSamples; ++i) { 45 | if (i + skipped === skip[skipped]) { 46 | ++skipped; 47 | } 48 | _result[i] = i + skipped; 49 | } 50 | } else { 51 | for (let i = 0; i < nSamples; ++i) { 52 | _result[i] = ~~(Math.random() * (end - nSamples)); 53 | } 54 | _result.sort(); 55 | for (let i = 0; i < nSamples; ++i) { 56 | _result[i] += i; 57 | } 58 | } 59 | return _result; 60 | } 61 | 62 | export function substring(input: CharArray, totalLength: number): CharArray { 63 | const out = new CharArray(totalLength); 64 | const pos = sample(totalLength, input.length); 65 | for (let i = 0; i < totalLength; ++i) { 66 | out[i] = input[pos[i]]; 67 | } 68 | return out; 69 | } 70 | 71 | 72 | /** 73 | * Starting from a sequence z produces two sequences 74 | * x and y by removing symbols 75 | * @param n: length of the initial string 76 | * @param d1: number of characters deleted to produce x 77 | * @param d2: number of characters deleted to produce y 78 | * @returns [number[], number[] 79 | */ 80 | export function subsequences(n: number | CharArray, d1: number, d2: number): [CharArray, CharArray] { 81 | const z = typeof n === 'number' ? string(n) : n; 82 | const x = substring(z, z.length - d1); 83 | const y = substring(z, z.length - d2); 84 | return [x, y]; 85 | } 86 | 87 | type DiffPredictionInfo = { x: CharArray, y: CharArray, s1: Int32Array, s2: Int32Array, diffs: number[][] } 88 | 89 | function direcDiffPrediction(n: number, c1: number, c2: number, margin: number, v1: number, v2: number) { 90 | const x = new CharArray(n + c1); 91 | const y = new CharArray(n + c2); 92 | const s1 = sample(c1, n + c1 - (c1 + c2) * margin); 93 | const s2 = sample(c2, n + c2 - (c1 + c2) * margin); 94 | let offset = 0; 95 | const diffs = []; 96 | let i1 = 0; 97 | let i2 = 0; 98 | while (i1 < c1 || i2 < c2) { 99 | if (i1 < c1 && (i2 >= c2 || s1[i1] - i1 <= s2[i2] - i2)) { 100 | const t = (s1[i1++] += offset + 1); 101 | diffs.push([t, t + 1, t + 1 + (i2 - i1), t + 1 + (i2 - i1)]); 102 | x[t] = v1; 103 | } else { 104 | const t = (s2[i2++] += offset + 1); 105 | diffs.push([t + 1 - (i2 - i1), t + 1 - (i2 - i1), t, t + 1]); 106 | y[t] = v2; 107 | } 108 | offset += margin; 109 | } 110 | return {x, y, s1, s2, diffs}; 111 | } 112 | 113 | /** 114 | * Generates two sparse sequences with a few ones each separated by a 115 | * number of zeros that makes only one alignment reasonable. 116 | * @param n 117 | * @param c1 118 | * @param c2 119 | */ 120 | export function sparseBinaryPredictable(n: number, c1: number, c2: number): DiffPredictionInfo { 121 | if ((c1 + c2) * (c1 + c2 + 1) > n) { 122 | throw new Error('The changes must be sparser'); 123 | } 124 | const margin = c1 + c2 + 1; 125 | const v1 = 1, v2 = 1; 126 | return direcDiffPrediction(n, c1, c2, margin, v1, v2); 127 | } 128 | 129 | /** 130 | * Generates two sequences with a few values set to distinct values 131 | * so that there no match except for the common subsequence 132 | * gives a margin of 1 ensuring that x and y are not changed 133 | * at the same position, to prevent ambiguity on the order of 134 | * the operations. 135 | */ 136 | export function densePredictable(n: number, c1: number, c2: number): DiffPredictionInfo { 137 | if ((c1 + c2) * 2 > n) { 138 | throw new Error('More changes than the vector length'); 139 | } 140 | const v1 = 1, v2 = 2; 141 | return direcDiffPrediction(n, c1, c2, 1, v1, v2); 142 | } 143 | 144 | 145 | const chars = 'abcdefghijklmnopqrstuvwxyz01234567890'; 146 | 147 | /** 148 | * Let E(x) = [0..n).map( i => [0..n).map(j => x[i] == y[j] )) 149 | * 150 | * Generates sequences such that for every x of length n, there is one 151 | * representative output r, such that E(r, r) equals E(x, r) 152 | * 153 | * If k is given then it will produce at most k distinct elements 154 | * If c is given then produces the representatives such that given 155 | * a sequence x with n elements and a sequence y with c distinct elements 156 | * one of the outputs r will have E([0..c), r) = E(uniq(y), x) 157 | * where uniq remove repeated elements from y. 158 | * 159 | */ 160 | export function * equivalencyClasses(n: number, c = 0, k = Infinity): 161 | Generator<[number, string[]]> { 162 | const seq: number[] = []; 163 | function * work(i: number, j: number): Generator<[number, string[]]> { 164 | if (i == n) { 165 | yield [j, seq.map(i => chars[i])]; 166 | } else { 167 | for (seq[i] = 0; seq[i] < j; ++seq[i]) { 168 | yield * work(i + 1, j); 169 | } 170 | if (j < k) { 171 | yield * work(i + 1, j + 1); 172 | } 173 | } 174 | } 175 | yield * work(0, Math.min(c, k)); 176 | } 177 | 178 | 179 | export function checkDiffComputation(xs: CharArray, ys: CharArray, B: number): number[][] { 180 | const [xsw, ysw] = accessWatchDog(B, [xs.array(), ys.array()]); 181 | let es = []; 182 | try { 183 | es = [...diff(xsw, ysw)]; 184 | } catch { 185 | throw new Error(JSON.stringify({message: 'Too many operations', x: [...xs], y: [...ys]}, null, 2)); 186 | } 187 | const edited = edit(xs.array(), ys.array(), es).join(''); 188 | expect(edited).eqls(ys.toString()); 189 | return es; 190 | } 191 | 192 | export function diffSize(diffs: number[][]): number { 193 | let s = 0; 194 | for (const [xs, xe, ys, ye] of diffs) { 195 | s += (xe - xs) + (ye - ys); 196 | } 197 | return s; 198 | } 199 | 200 | export function * allPairsCore(n1: number, n2: number): Generator<[string[], string[]]> { 201 | for (const [c, v1] of equivalencyClasses(n1)) { 202 | for (const [, v2] of equivalencyClasses(n2, c, c + 1)) { 203 | yield [v1, v2]; 204 | } 205 | } 206 | } 207 | 208 | export function * allPairs(n1: number, n2: number): Generator<[string[], string[]]> { 209 | // promote less redundancy 210 | if (n1 > n2) { 211 | for (const [v2, v1] of allPairsCore(n2, n1)) { 212 | yield [v1, v2]; 213 | } 214 | } else { 215 | yield * allPairsCore(n1, n2); 216 | } 217 | } 218 | 219 | export function accessWatchDog(max: number, arrays: T[]): T[] { 220 | let counter = 0; 221 | const handler = { 222 | get: function (target: object, prop: PropertyKey, receiver: any): any { 223 | if (/^\d+$/.test(prop.toString())) { 224 | if (++counter >= max) { 225 | throw new Error('Too many operations'); 226 | } 227 | } 228 | return Reflect.get(target, prop, receiver); 229 | } 230 | }; 231 | return arrays.map(x => { 232 | return new Proxy(x, handler); 233 | }); 234 | } 235 | 236 | 237 | export function edit(xs: T[], ys: T[], es: Iterable<[number, number, number, number]>) { 238 | let i = 0; 239 | const result: T[] = []; 240 | for (const [sx, ex, sy, ey] of es) { 241 | while (i < sx) result.push(xs[i++]); 242 | if (sx < ex) { 243 | i = ex; // delete 244 | } 245 | if (sy < ey) { 246 | result.push(...ys.slice(sy, ey)); // insert 247 | } 248 | } 249 | result.push(...xs.slice(i)); 250 | return result; 251 | } 252 | 253 | /** 254 | * Compute the portion of xs and ys that is not marked as different 255 | * in an actual diff the two returned arrays must be the LCS. 256 | * @param xs 257 | * @param ys 258 | * @param es 259 | */ 260 | export function excludeDiff(xs: T[], ys: T[], es: Iterable): [T[], T[]]{ 261 | let ix = 0; 262 | let iy = 0; 263 | const rx: T[] = []; 264 | const ry: T[] = []; 265 | for (const [sx, ex, sy, ey] of es) { 266 | while (ix < sx) rx.push(xs[ix++]); 267 | while (iy < sy) ry.push(ys[iy++]); 268 | [ix, iy] = [ex, ey]; 269 | } 270 | for(const c of xs.slice(ix))rx.push(c); 271 | for(const c of ys.slice(iy))ry.push(c); 272 | return [rx, ry]; 273 | } 274 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export type GenericIndexable = { [key: number]: T, readonly length: number }; 2 | type TypedArray = 3 | Int8Array 4 | | Int16Array 5 | | Int32Array 6 | | Uint8Array 7 | | Uint16Array 8 | | Uint32Array 9 | | Float32Array 10 | | Float64Array; 11 | export type Indexable = string | T[] | TypedArray | GenericIndexable; 12 | 13 | export interface Sliceable extends GenericIndexable { 14 | slice(start: number, end?: number): this; 15 | } 16 | 17 | type Vec4 = [number, number, number, number]; 18 | type Vec3 = [number, number, number]; 19 | 20 | type Comparator = (i: number, j: number) => boolean; 21 | 22 | type DiffState = { 23 | i: number; 24 | N: number; 25 | j: number; 26 | M: number; 27 | Z: number; 28 | b: TypedArray; 29 | eq: (x: number, y: number) => boolean; 30 | pxs: number; 31 | pxe: number; 32 | pys: number; 33 | pye: number; 34 | oxs: number; 35 | oxe: number; 36 | oys: number; 37 | oye: number; 38 | stack_top: number; 39 | stack_base: number[]; 40 | } 41 | 42 | // Find the list of differences between 2 lists by 43 | // recursive subdivision, requring O(min(N,M)) space 44 | // and O(min(N,M)*D) worst-case execution time where 45 | // D is the number of differences. 46 | function diff_internal(state: DiffState, c: number): number { 47 | const { b, eq, stack_base } = state; 48 | let { i, N, j, M, Z, stack_top } = state; 49 | for (;;) { 50 | switch(c) { 51 | case 0: { 52 | Z_block: while (N > 0 && M > 0) { 53 | b.fill(0, 0, 2 * Z); 54 | 55 | const W = N - M; 56 | const L = N + M; 57 | const parity = L & 1; 58 | const offsetx = i + N - 1; 59 | const offsety = j + M - 1; 60 | const hmax = (L + parity) / 2; 61 | let z: number; 62 | h_loop: for (let h = 0; h <= hmax; h++) { 63 | const kmin = 2 * Math.max(0, h - M) - h; 64 | const kmax = h - 2 * Math.max(0, h - N); 65 | 66 | // Forward pass 67 | for (let k = kmin; k <= kmax; k += 2) { 68 | const gkm = b[k - 1 - Z * Math.floor((k - 1)/Z)]; 69 | const gkp = b[k + 1 - Z * Math.floor((k + 1)/Z)]; 70 | const u = (k === -h || (k !== h && gkm < gkp)) ? gkp : gkm + 1; 71 | const v = u - k; 72 | let x = u; 73 | let y = v; 74 | while (x < N && y < M && eq(i + x, j + y)) x++, y++; 75 | b[k - Z * Math.floor(k/Z)] = x; 76 | if (parity === 1 && (z = W - k) >= 1 - h && z < h && x + b[Z + z - Z * Math.floor(z/Z)] >= N) { 77 | if (h > 1 || x !== u) { 78 | stack_base[stack_top++] = i + x; 79 | stack_base[stack_top++] = N - x; 80 | stack_base[stack_top++] = j + y; 81 | stack_base[stack_top++] = M - y; 82 | N = u; 83 | M = v; 84 | Z = 2 * (Math.min(N, M) + 1); 85 | continue Z_block; 86 | } else break h_loop; 87 | } 88 | } 89 | 90 | // Reverse pass 91 | for (let k = kmin; k <= kmax; k += 2) { 92 | const pkm = b[Z + k - 1 - Z * Math.floor((k - 1)/Z)]; 93 | const pkp = b[Z + k + 1 - Z * Math.floor((k + 1)/Z)]; 94 | const u = (k === -h || (k !== h && pkm < pkp)) ? pkp : pkm + 1; 95 | const v = u - k; 96 | let x = u; 97 | let y = v; 98 | while (x < N && y < M && eq(offsetx - x, offsety - y)) x++, y++; 99 | b[Z + k - Z * Math.floor(k/Z)] = x; 100 | if (parity === 0 && (z = W - k) >= -h && z <= h && x + b[z - Z * Math.floor(z/Z)] >= N) { 101 | if (h > 0 || x !== u) { 102 | stack_base[stack_top++] = i + N - u; 103 | stack_base[stack_top++] = u; 104 | stack_base[stack_top++] = j + M - v; 105 | stack_base[stack_top++] = v; 106 | N = N - x; 107 | M = M - y; 108 | Z = 2 * (Math.min(N, M) + 1); 109 | continue Z_block; 110 | } else break h_loop; 111 | } 112 | } 113 | } 114 | 115 | if (N === M) continue; 116 | if (M > N) { 117 | i += N; 118 | j += N; 119 | M -= N; 120 | N = 0; 121 | } else { 122 | i += M; 123 | j += M; 124 | N -= M; 125 | M = 0; 126 | } 127 | 128 | // We already know either N or M is zero, so we can 129 | // skip the extra check at the top of the loop. 130 | break; 131 | } 132 | 133 | // yield delete_start, delete_end, insert_start, insert_end 134 | // At this point, at least one of N & M is zero, or we 135 | // wouldn't have gotten out of the preceding loop yet. 136 | if (N + M !== 0) { 137 | if (state.pxe === i || state.pye === j) { 138 | // it is a contiguous difference extend the existing one 139 | state.pxe = i + N; 140 | state.pye = j + M; 141 | } else { 142 | const sx = state.pxs; 143 | state.oxs = state.pxs; 144 | state.oxe = state.pxe; 145 | state.oys = state.pys; 146 | state.oye = state.pye; 147 | 148 | // Defer this one until we can check the next one 149 | state.pxs = i; 150 | state.pxe = i + N; 151 | state.pys = j; 152 | state.pye = j + M; 153 | 154 | if(sx >= 0) { 155 | state.i = i; 156 | state.N = N; 157 | state.j = j; 158 | state.M = M; 159 | state.Z = Z; 160 | state.stack_top = stack_top; 161 | return 1; 162 | } 163 | } 164 | } 165 | } 166 | case 1: { 167 | if (stack_top === 0) return 2; 168 | 169 | M = stack_base[--stack_top]; 170 | j = stack_base[--stack_top]; 171 | N = stack_base[--stack_top]; 172 | i = stack_base[--stack_top]; 173 | Z = 2 * (Math.min(N, M) + 1); 174 | c = 0; 175 | } 176 | } 177 | } 178 | } 179 | 180 | class DiffGen implements IterableIterator { 181 | private c = 0; 182 | private result: IteratorResult = 183 | { value: null as any, done: false }; 184 | 185 | constructor(private state: DiffState) {} 186 | 187 | [Symbol.iterator]() { return this; } 188 | 189 | next() { 190 | const { state, result } = this; 191 | if (this.c > 1) { 192 | result.done = true; 193 | result.value = undefined; 194 | return result; 195 | } 196 | const c = diff_internal(state, this.c); 197 | this.c = c; 198 | if (c === 1) { 199 | result.value = [state.oxs, state.oxe, state.oys, state.oye]; 200 | return result; 201 | } 202 | if (state.pxs >= 0) { 203 | result.value = [state.pxs, state.pxe, state.pys, state.pye]; 204 | return result; 205 | } 206 | result.done = true; 207 | result.value = undefined; 208 | return result; 209 | } 210 | } 211 | 212 | export function diff_core( 213 | i: number, N: number, j: number, M: number, 214 | eq: Comparator, 215 | ): IterableIterator { 216 | const Z = (Math.min(N, M) + 1) * 2; 217 | const L = N + M; 218 | const b = new (L < 256 ? Uint8Array : L < 65536 ? Uint16Array : Uint32Array)(2 * Z); 219 | 220 | return new DiffGen({ 221 | i, N, j, M, Z, b, eq, 222 | pxs: -1, pxe: -1, pys: -1, pye: -1, 223 | oxs: -1, oxe: -1, oys: -1, oye: -1, 224 | stack_top: 0, stack_base: [], 225 | }); 226 | } 227 | 228 | export function diff>(xs: T, ys: T, eq?: Comparator): IterableIterator { 229 | let [i, N, M] = [0, xs.length, ys.length]; 230 | 231 | if (typeof eq === 'function') { 232 | // eliminate common prefix 233 | while (i < N && i < M && eq(i, i)) i++; 234 | 235 | // check for equality 236 | if (i === N && i === M) return [][Symbol.iterator](); 237 | 238 | // eliminate common suffix 239 | while (eq(--N, --M) && N > i && M > i); 240 | 241 | } else { 242 | // eliminate common prefix 243 | while (i < N && i < M && xs[i] === ys[i]) i++; 244 | 245 | // check for equality 246 | if (i === N && i === M) return [][Symbol.iterator](); 247 | 248 | // eliminate common suffix 249 | while (xs[--N] === ys[--M] && N > i && M > i); 250 | 251 | eq = (i, j) => xs[i] === ys[j]; 252 | } 253 | 254 | return diff_core(i, N + 1 - i, i, M + 1 - i, eq); 255 | } 256 | 257 | class LCSGen implements IterableIterator { 258 | private i = 0; 259 | private j = 0; 260 | 261 | constructor(private diff: IterableIterator, private N: number) {} 262 | 263 | [Symbol.iterator]() { return this; } 264 | 265 | next() { 266 | // Convert diffs into the dual similar-aligned representation. 267 | // In each iteration, i and j will be aligned at the beginning 268 | // of a shared section. This section is yielded, and i and j 269 | // are re-aligned at the end of the succeeding unique sections. 270 | const rec = this.diff.next(); 271 | if (rec.done) { 272 | const { i, j, N } = this; 273 | if (i < N) { 274 | rec.done = false as any; 275 | rec.value = [i, j, N - i] as any; 276 | this.i = N; 277 | } 278 | return rec as IteratorResult; 279 | } 280 | const v = rec.value; 281 | const sx = v[0]; 282 | const ex = v[1]; 283 | const ey = v[3]; 284 | const { i, j } = this; 285 | if (i !== sx) { 286 | v.length--; // re-use the vec4 as a vec3 to avoid allocation 287 | v[0] = i; 288 | v[1] = j; 289 | v[2] = sx - i; 290 | } 291 | 292 | this.i = ex; 293 | this.j = ey; 294 | 295 | return rec as unknown as IteratorResult; 296 | } 297 | } 298 | 299 | export function lcs>(xs: T, ys: T, eq?: Comparator): IterableIterator { 300 | return new LCSGen(diff(xs, ys, eq), xs.length); 301 | } 302 | 303 | export function * calcPatch>(xs: S, ys: S, eq?: Comparator): Generator<[number, number, S]> { 304 | // Taking subarrays is cheaper than slicing for TypedArrays. 305 | const slice = ArrayBuffer.isView(xs) ? 306 | Uint8Array.prototype.subarray as unknown as typeof xs.slice : xs.slice; 307 | for (const v of diff(xs, ys, eq)) { 308 | v[2] = slice.call(ys, v[2], v[3]) as any; 309 | yield v as any; 310 | } 311 | } 312 | 313 | export function * applyPatch>(xs: S, patch: Iterable<[number, number, S]>): Generator { 314 | let i = 0; // Taking subarrays is cheaper than slicing for TypedArrays. 315 | const slice = ArrayBuffer.isView(xs) ? 316 | Uint8Array.prototype.subarray as unknown as typeof xs.slice : xs.slice; 317 | for (const [dels, dele, ins] of patch) { 318 | if (i < dels) yield slice.call(xs, i, dels); 319 | if (ins.length > 0) yield ins; 320 | i = dele; 321 | } 322 | if (i < xs.length) yield slice.call(xs, i); 323 | } 324 | 325 | export function * calcSlices>(xs: S, ys: S, eq?: Comparator): Generator<[-1|0|1, S]> { 326 | let i = 0; // Taking subarrays is cheaper than slicing for TypedArrays. 327 | const slice = ArrayBuffer.isView(xs) ? 328 | Uint8Array.prototype.subarray as unknown as typeof xs.slice : xs.slice; 329 | for (const [dels, dele, inss, inse] of diff(xs, ys, eq)) { 330 | if (i < dels) yield [0, slice.call(xs, i, dels)]; 331 | if (dels < dele) yield [-1, slice.call(xs, dels, dele)]; 332 | if (inss < inse) yield [1, slice.call(ys, inss, inse)]; 333 | i = dele; 334 | } 335 | if (i < xs.length) yield [0, xs.slice(i)]; 336 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Fast-Myers-Diff 2 | ================ 3 | 4 | This is a fast, compact, memory efficient implementation of the O(ND) Myers diff algorithm. 5 | Minified and including type definitions, the published library is less than 4KB. 6 | 7 | This implementation improves on a naive implementation of Myers recursive algorithm in several ways: 8 | * By using circular buffers for k-line computations, we achieve bounds of O(min(N,M) + D) space and O(min(N,M) * D) time, 9 | where N and M are the lengths of the input sequences and D is the number of differences. 10 | * The original recursive algorithm is replaced by an iterative version with a minimal stack storing the altered parameters for right-recursion. 11 | All other recursive calls are tail calls replaced with simple jumps (via `break` or `continue`). Huge inputs may blow the heap, but you'll never overflow the stack! 12 | * Allocation is minimized by pre-allocating buffer space to be re-used by each simulated recursive call, re-using stack slots, and tracking indices into the original inputs. The core diff algorithm performs no slice operations or other copying of data. This also minimizes garbage production and GC pause time. 13 | * Buffers are allocated contiguously (using typed arrays) to improve cache locality. 14 | * Buffers use the smallest numeric type possible for the input length; note that this results in discontinuous bumps in memory usage at input sizes of 256 and 65536. 15 | 16 | Because the core algorithm does not slice or copy data, it depends only on being able to compare elements of the inputs at arbitrary indices. 17 | Thus, it automatically operates equally well on any indexable type--strings, basic arrays, or any flavor of typed array. 18 | Additionally, the library permits optimizing total application memory usage by producing output in the form of generators, rather than forcing you to accumulate the full output up-front. 19 | 20 | ### Comparison With Other Lbraries 21 | - [myers-diff](https://www.npmjs.com/package/myers-diff/v/2.0.1) is focused on strings and does the tokenization internally, supporting `'words'`, `'chars'` or `'line'` compare modes as well as custom regular expressions. 22 | - [fast-diff](https://www.npmjs.com/package/fast-diff/v/1.2.1) is specialized on character mode, using substrings instead of comparing characters one by one. 23 | - [fast-array-diff](https://www.npmjs.com/package/fast-array-diff) is specialized for arrays. 24 | - **fast-myers-diff**: is type agnostic and uses an iterative implementation. 25 | 26 | All `myers-diff`, `fast-diff`, and `fast-myers-diff` all have the ability to compute character differences between strings. 27 | 28 | ### Interface 29 | 30 | The library exports the following interface: 31 | 32 | ```ts 33 | type GenericIndexable = { 34 | [key: number]: unknown; 35 | readonly length: number; 36 | }; 37 | type Indexable = string | unknown[] | TypedArray | GenericIndexable; 38 | interface Sliceable extends GenericIndexable { 39 | slice(start: number, end?: number): this; 40 | } 41 | 42 | declare function diff_core(i: number, N: number, j: number, M: number, eq: (i: number, j: number) => boolean): Generator<[number, number, number, number]>; 43 | declare function diff(xs: T, ys: T, eq?: (i: number, j: number) => boolean): Generator<[number, number, number, number]>; 44 | declare function lcs(xs: T, ys: T, eq?: (i: number, j: number) => boolean): Generator<[number, number, number]>; 45 | 46 | declare function calcPatch(xs: T, ys: T, eq?: (i: number, j: number) => boolean): Generator<[number, number, T]>; 47 | declare function applyPatch(xs: T, patch: Iterable<[number, number, T]>): Generator; 48 | 49 | declare function calcSlices>(xs: S, ys: S, eq?: Comparator): Generator<[-1 | 0 | 1, S]>; 50 | ``` 51 | 52 | `diff_core(i, N, j, M, eq)` is the core of the library; given starting indices `i` and `j`, and slice-lengths `N` and `M` (i.e., the remaining length of the relevane sequence after the starting index), it produces a sequence of quadruples `[sx, ex, sy, ey]`, where [sx, ex) indicates a range to delete from `xs` and [sy, ey) indicates a range from `ys` to replace the deleted material with. Simple deletions are indicated when `sy === ey` and simple insertions when `sx === ex`. Note that direct access to the sequences themselves is not required; instead, `diff_core`, take a callback function `eq` which is used to determine whether the relevant sequences are equal at given indices. Note that lacking access to the actual sequences being diffed *ensures* that the library cannot sacrifice efficiency by making temporary copies. 53 | 54 | By writing your own `eq` implementation, it is possible to compute diffs of sequences of types which are not normally comparable (e.g., arrays of objects where you wish to use value equality rather than reference equality), and even to get diffs of data structures which are not natively indexable. Despite the overhead of making a function call for comparisons, this diff implementation is still significantly faster than `fast-diff` when the size of the diff is significant, as the speed of`fast-diff`'s native string comparisons becomes less important. 55 | 56 | `diff(xs, ys[, eq])` is a wrapper around `diff_core` which checks for common affixes (reducing the memory consumption and time spent in the core diff algorithm) and calculates `i`, `j`, `N`, `M` and `eq` (if it is not supplied) automatically. 57 | 58 | `lcs(xs, ys[, eq])` calls `diff` internally, but pre-processes the output to produce triples of the form `[sx, sy, l]`, where `sx` and `sy` are the starting idices in `xs` and `ys` respectively of an aligned common substring, and `l` is the length of said substring. Indexing into the original input sequences can be used to retrieve the actual Longest Common Subsequence from this information, but the `lcs` function itself does not attempt to take slices of the inputs. 59 | 60 | `calcPatch(xs, ys[, eq])` is a thin wrapper over `diff` which replaces the [sy, ey) indices with the relevant slice of `ys`. This can be used to reconstitute `ys` given `xs`. Once again, pure insertions are indicated when `sx === ex`, but pure deletions are indicated by an empty slice--i.e., an empty string, a zero-length array, etc. The insert slices are of the same type as the original `ys`. If `ys` is a string or an array, they are produced with the `slice` methods of strings or arrays, which will result in a shallow copy. If `ys` is a typed array, slices will be produced with `TypedArray.prototype.subarray`, which re-uses the existing underlying memory. 61 | 62 | `applyPatch(xs, patch)` takes the output of `calcPatch(xs, ys)` and uses it to reconstitute the original elements of `ys`. The output is not, however, a single reconstituted `Indexable`, but a sequence of chunks taken alternately from `xs` and from the `patch` data. This is done for two reasons: 63 | 1. It avoids special-case code for joining each possible `Indexable` type; 64 | 2. As with all of the other library functions, it permits stream processing without deciding *for* you to allocate enough memory to hold the entire result at once. 65 | 66 | `calcSlices(xs, ys)` is a thin wrapper over `diff` which uses the calculated indices to return the complete list of segments of `xs` and `ys` coded by whether they are unique to `xs` (deletions from `xs` to `ys`), components of the longest common subsequence, or unique to `ys` (insertions from `xs` to `ys`). Replacements at the same location result in yeilding the slice of `xs` first, followed by the slice of `ys`. The output elements are pairs of `[type, slice]`, where a type of -1 indicates the slice comes from `xs`, a type of 0 indicates that the slice is common, and a type of 1 indicates that the slice comes from `ys`. This is useful for displaying diffs in a UI, where you want all components shown with deletions and insertions highlighted. 67 | 68 | `diff` and `lcs` will work with custom container types, as long as your container objects have a numeric `length` property. `calcPatch`, `applyPatch`, and `calcSlices` will work with custom types provided that they also implement a suitable `slice(start[, end])` method. 69 | 70 | ### Empirical results 71 | 72 | The table below gives the number of operations per second reported by 73 | [benchmark](https://www.npmjs.com/package/benchmark/v/2.1.4) on a 74 | Windows 10 with Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz. 75 | 76 | | input | fast-myers-diff | fast-diff-1.2.0 | myers-diff-2.0.1 | fast-array-diff-1.0.1 | fast-myers-diff-2.0.0 | 77 | | ----------------- | --------------- | --------------- | ---------------- | --------------------- | --------------------- | 78 | | 10, +100, -100 | 1,139 ops/sec | 2,724 ops/sec | 768 ops/sec | 17.38 ops/sec | 1,115 ops/sec | 79 | | 10, +4, -200 | 4,217 ops/sec | 9,094 ops/sec | 875 ops/sec | 10.26 ops/sec | 4,119 ops/sec | 80 | | 100, +10, -10 | 40,825 ops/sec | 14,531 ops/sec | 1,049 ops/sec | 92.39 ops/sec | 42,327 ops/sec | 81 | | 100, +20, -0 | 43,265 ops/sec | 18,649 ops/sec | 976 ops/sec | 127 ops/sec | 44,582 ops/sec | 82 | | 100, +0, -20 | 45,387 ops/sec | 15,867 ops/sec | 988 ops/sec | 92.10 ops/sec | 48,545 ops/sec | 83 | | 10, +1000, -1000 | 12.06 ops/sec | 32.86 ops/sec | 7.23 ops/sec | 0.18 ops/sec | Not supported | 84 | | 10000, +100, -100 | 587 ops/sec | 99.70 ops/sec | 0.23 ops/sec | 0.14 ops/sec | Not supported | 85 | | 10000, +200, -0 | 685 ops/sec | 95.26 ops/sec | 0.23 ops/sec | 0.13 ops/sec | Not supported | 86 | | 10000, +0, -200 | 705 ops/sec | 106 ops/sec | 0.24 ops/sec | 0.13 ops/sec | Not supported | 87 | | 10000, +10, -10 | 2,905 ops/sec | 64.11 ops/sec | 0.28 ops/sec | 1.13 ops/sec | Not supported | 88 | | 10000, +20, -0 | 3,378 ops/sec | 68.45 ops/sec | 0.26 ops/sec | 1.19 ops/sec | Not supported | 89 | | 10000, +0, -20 | 3,730 ops/sec | 59.50 ops/sec | 0.27 ops/sec | 1.19 ops/sec | Not supported | 90 | 91 | `fast-myers-diff@2.0.0` and earlier used `Uint8Array` to save indices, so it can only correctly handle inputs with added length less than 256. 92 | 93 | `fast-diff` is faster than `fast-myers-diff` for inputs in which the longest common string is a small portion of the sequences. For differences of 20% `fast-myers-diff` is about 6x faster, for differences of 2% about 50x faster. 94 | Results for `fast-array-diff` may be depressed due to the need to convert test strings to arrays. 95 | 96 | Another benchmarking run shows very similar results, with the latest version of fast-meyers-diff being the fastest by a large margin across most inputs, and fast-diff-1.2.0 pulling slightly ahead for inputs with very long edit scripts. 97 | 98 | | input | fast-myers-diff | fast-diff-1.2.0 | myers-diff-2.0.1 | fast-array-diff-1.0.1 | fast-myers-diff-2.0.0 | 99 | | ----------------- | --------------- | --------------- | ---------------- | --------------------- | --------------------- | 100 | | 10, +100, -100 | 1,155 ops/sec | 1,415 ops/sec | 398 ops/sec | 14.35 ops/sec | 610 ops/sec | 101 | | 10, +4, -200 | 4,217 ops/sec | 4,776 ops/sec | 413 ops/sec | 9.76 ops/sec | 2,106 ops/sec | 102 | | 100, +10, -10 | 39,941 ops/sec | 4,980 ops/sec | 520 ops/sec | 80.47 ops/sec | 23,352 ops/sec | 103 | | 100, +20, -0 | 42,264 ops/sec | 9,178 ops/sec | 430 ops/sec | 95.52 ops/sec | 24,884 ops/sec | 104 | | 100, +0, -20 | 45,564 ops/sec | 4,304 ops/sec | 480 ops/sec | 53.13 ops/sec | 25,206 ops/sec | 105 | | 10, +1000, -1000 | 9.14 ops/sec | 12.51 ops/sec | 4.40 ops/sec | 0.14 ops/sec | Not Supported | 106 | | 10000, +100, -100 | 357 ops/sec | 29.94 ops/sec | 0.13 ops/sec | 0.12 ops/sec | Not Supported | 107 | | 10000, +200, -0 | 350 ops/sec | 48.94 ops/sec | 0.13 ops/sec | 0.11 ops/sec | Not Supported | 108 | | 10000, +0, -200 | 575 ops/sec | 51.99 ops/sec | 0.13 ops/sec | 0.13 ops/sec | Not Supported | 109 | | 10000, +10, -10 | 2,108 ops/sec | 33.35 ops/sec | 0.14 ops/sec | 1.17 ops/sec | Not Supported | 110 | | 10000, +20, -0 | 2,065 ops/sec | 34.75 ops/sec | 0.14 ops/sec | 1.32 ops/sec | Not Supported | 111 | | 10000, +0, -20 | 2,410 ops/sec | 26.34 ops/sec | 0.15 ops/sec | 1.24 ops/sec | Not Supported | 112 | --------------------------------------------------------------------------------