├── .eslintignore ├── .prettierignore ├── .gitignore ├── tsconfig.commonjs.json ├── tsconfig.dev.json ├── src ├── index.ts ├── util.ts ├── find_position.ts ├── cursors.ts ├── ids.ts └── position_source.ts ├── .mocharc.jsonc ├── tsconfig.json ├── test ├── ids.test.ts ├── util.ts ├── find_position.test.ts ├── fuzz.test.ts ├── cursors.test.ts └── manual.test.ts ├── LICENSE ├── .eslintrc.js ├── package.json ├── stats.md ├── algorithm.md ├── benchmarks └── main.ts └── README.md /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build 3 | test 4 | benchmarks 5 | *.js 6 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | /build/* 2 | /benchmark_results/* 3 | real_text_trace_edits.json 4 | LICENSE 5 | stats.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | build 4 | .vscode/ 5 | .idea/ 6 | *.tsbuildinfo 7 | benchmark_results -------------------------------------------------------------------------------- /tsconfig.commonjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "module": "commonjs", 5 | "outDir": "build/commonjs" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tsconfig.dev.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.commonjs.json", 3 | "compilerOptions": { "resolveJsonModule": true }, 4 | "include": ["src", "test", "benchmarks"] 5 | } 6 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./cursors"; 2 | export * from "./ids"; 3 | export * from "./position_source"; 4 | export * from "./find_position"; 5 | // utils.ts is internal only. 6 | -------------------------------------------------------------------------------- /.mocharc.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | // To run tests written in TypeScript without compiling them, we need to run them in ts-node. 3 | "require": ["ts-node/register"], 4 | 5 | // Any *.test.ts file in ./test will be run as a test 6 | "spec": "test/**/*.test.ts", 7 | 8 | // A change in sources or tests should trigger test re-run 9 | "watch-files": ["test/**", "src/**"] 10 | } 11 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "rootDir": "src", 4 | "outDir": "build/esm", 5 | "target": "es2021", 6 | "module": "es2015", 7 | /* Needed with module: es2015 or else stuff breaks. */ 8 | "moduleResolution": "node", 9 | /* Enable strict type checking. */ 10 | "strict": true, 11 | /* Enable interop with dependencies using different module systems. */ 12 | "esModuleInterop": true, 13 | /* Emit .d.ts files. */ 14 | "declaration": true, 15 | /* Emit sourcemap files. */ 16 | "sourceMap": true 17 | /* Don't turn on importHelpers, so we can avoid tslib dependency. */ 18 | }, 19 | "include": ["src"] 20 | } 21 | -------------------------------------------------------------------------------- /test/ids.test.ts: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import { PositionSource } from "../src"; 3 | 4 | describe("IDs", () => { 5 | describe("validate", () => { 6 | it("rejects period", () => { 7 | assert.throws(() => new PositionSource({ ID: "ali.ce" })); 8 | }); 9 | 10 | it("rejects comma", () => { 11 | assert.throws(() => new PositionSource({ ID: "ali,ce" })); 12 | }); 13 | 14 | it("rejects LAST or greater", () => { 15 | assert.throws(() => new PositionSource({ ID: PositionSource.LAST })); 16 | assert.throws( 17 | () => new PositionSource({ ID: PositionSource.LAST + "alice" }) 18 | ); 19 | }); 20 | }); 21 | }); 22 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | export function precond( 2 | statement: boolean, 3 | message: string, 4 | ...optionalParams: unknown[] 5 | ): asserts statement is true { 6 | if (!statement) { 7 | if (optionalParams.length === 0) { 8 | throw new Error(message); 9 | } else { 10 | throw new Error( 11 | message + " " + optionalParams.map((value) => String(value)).join(" ") 12 | ); 13 | } 14 | } 15 | } 16 | 17 | export function assert( 18 | statement: boolean, 19 | message?: string, 20 | ...optionalParams: unknown[] 21 | ): asserts statement is true { 22 | if (!statement) { 23 | if (message === undefined) { 24 | precond(statement, "Assertion failed", ...optionalParams); 25 | } else { 26 | precond(statement, "Assertion failed: " + message, ...optionalParams); 27 | } 28 | } 29 | } 30 | 31 | /** 32 | * [[PositionSource.LAST]] copy that avoids circular dependencies 33 | * (PositionSource <-> IDs). 34 | */ 35 | export const LastInternal = "~"; 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright © 2023 Matthew Weidner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /test/util.ts: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import seedrandom from "seedrandom"; 3 | import { IDs, PositionSource } from "../src"; 4 | 5 | export function assertIsOrdered(list: string[]) { 6 | for (let i = 0; i < list.length - 1; i++) { 7 | assert(list[i] < list[i + 1], `Out of order: ${list[i]} !< ${list[i + 1]}`); 8 | } 9 | } 10 | 11 | export function newSources( 12 | rng: seedrandom.prng, 13 | count: number 14 | ): PositionSource[] { 15 | const sources: PositionSource[] = []; 16 | for (let i = 0; i < count; i++) { 17 | sources.push(new PositionSource({ ID: IDs.pseudoRandom(rng) })); 18 | } 19 | return sources; 20 | } 21 | 22 | export function testUniqueAfterDelete(list: string[], source: PositionSource) { 23 | // In each slot, create two positions with same left & right, 24 | // simulating that the first was deleted. Then make sure they 25 | // are still distinct, in case the first is resurrected. 26 | for (let i = 0; i <= list.length; i++) { 27 | const a = source.createBetween(list[i - 1], list[i]); 28 | const b = source.createBetween(list[i - 1], list[i]); 29 | assert.notStrictEqual(a, b); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/find_position.ts: -------------------------------------------------------------------------------- 1 | import { assert } from "./util"; 2 | 3 | /** 4 | * Returns `{ index, isPresent }`, where: 5 | * - `index` is the current index of `position` in `positions`, 6 | * or where it would be if added. 7 | * - `isPresent` is true if `position` is present in `positions`. 8 | * 9 | * If this method is inconvenient (e.g., the positions are in a database 10 | * instead of an array), you can instead compute 11 | * `index` by finding the number of positions less than `position`. 12 | * For example, in SQL, use: 13 | * ```sql 14 | * SELECT COUNT(*) FROM table WHERE position < $position 15 | * ``` 16 | * 17 | * See also: `Cursors.toIndex`. 18 | * 19 | * @param positions The target list's positions, in lexicographic order. 20 | * There should be no duplicate positions. 21 | */ 22 | export function findPosition( 23 | position: string, 24 | positions: ArrayLike 25 | ): { index: number; isPresent: boolean } { 26 | // Binary search: index is the "rank" of position, computed using 27 | // https://en.wikipedia.org/wiki/Binary_search_algorithm#Procedure_for_finding_the_leftmost_element 28 | let L = 0; 29 | let R = positions.length; 30 | while (L < R) { 31 | const m = Math.floor((L + R) / 2); 32 | if (positions[m] < position) L = m + 1; 33 | else R = m; 34 | } 35 | 36 | assert( 37 | (L === 0 || positions[L - 1] < position) && 38 | (L === positions.length || positions[L] >= position), 39 | "Bad binary search (positions out of order?):", 40 | position, 41 | L 42 | ); 43 | return { index: L, isPresent: positions[L] === position }; 44 | } 45 | -------------------------------------------------------------------------------- /test/find_position.test.ts: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import seedrandom from "seedrandom"; 3 | import { findPosition, IDs, PositionSource } from "../src"; 4 | 5 | describe("findPosition", () => { 6 | let rng!: seedrandom.prng; 7 | let source!: PositionSource; 8 | 9 | beforeEach(() => { 10 | rng = seedrandom("42"); 11 | source = new PositionSource({ ID: IDs.pseudoRandom(rng) }); 12 | }); 13 | 14 | function testLength(len: number) { 15 | let list!: string[]; 16 | 17 | describe(`length ${len}`, () => { 18 | beforeEach(() => { 19 | list = []; 20 | for (let i = 0; i < len; i++) { 21 | list.push(source.createBetween(list.at(-1), undefined)); 22 | } 23 | }); 24 | 25 | it("present", () => { 26 | for (let i = 0; i < list.length; i++) { 27 | assert.deepStrictEqual(findPosition(list[i], list), { 28 | index: i, 29 | isPresent: true, 30 | }); 31 | } 32 | }); 33 | 34 | it("not present", () => { 35 | for (let i = 0; i <= list.length; i++) { 36 | const newPos = source.createBetween(list[i - 1], list[i]); 37 | // newPos would be at index i if present (between the current 38 | // i - 1 & i). 39 | assert.deepStrictEqual(findPosition(newPos, list), { 40 | index: i, 41 | isPresent: false, 42 | }); 43 | } 44 | }); 45 | }); 46 | } 47 | 48 | testLength(32); 49 | testLength(31); 50 | testLength(33); 51 | testLength(23); 52 | testLength(1); 53 | testLength(0); 54 | }); 55 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | env: { 4 | browser: true, 5 | es2021: true, 6 | node: true, 7 | }, 8 | parser: "@typescript-eslint/parser", 9 | parserOptions: { 10 | tsconfigRootDir: __dirname, 11 | project: ["./tsconfig.json"], 12 | sourceType: "module", 13 | }, 14 | plugins: ["@typescript-eslint", "import"], 15 | extends: [ 16 | "eslint:recommended", 17 | "plugin:@typescript-eslint/recommended", 18 | "plugin:@typescript-eslint/recommended-requiring-type-checking", 19 | "plugin:import/typescript", 20 | "prettier", 21 | ], 22 | rules: { 23 | // Allow inference in function return type. 24 | "@typescript-eslint/explicit-function-return-type": "off", 25 | "@typescript-eslint/explicit-module-boundary-types": "off", 26 | // I like non-null assertions. 27 | "@typescript-eslint/no-non-null-assertion": "off", 28 | // Disallow default exports; only allow named exports. 29 | "import/no-default-export": "error", 30 | // Impose alphabetically ordered imports. 31 | "import/order": "error", 32 | // Allow implicit string casts in template literals. 33 | "@typescript-eslint/restrict-template-expressions": "off", 34 | // Allow ts-ignore with justification. 35 | "@typescript-eslint/ban-ts-comment": [ 36 | "error", 37 | { 38 | "ts-expect-error": "allow-with-description", 39 | }, 40 | ], 41 | "@typescript-eslint/no-unused-vars": [ 42 | "warn", 43 | { 44 | // Allow unused parameter names that start with _, 45 | // like TypeScript does. 46 | argsIgnorePattern: "^_", 47 | }, 48 | ], 49 | }, 50 | }; 51 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "position-strings", 3 | "version": "2.0.1", 4 | "description": "Lexicographically-ordered position strings for collaborative lists and text", 5 | "author": "Matthew Weidner", 6 | "license": "MIT", 7 | "bugs": { 8 | "url": "https://github.com/mweidner037/position-strings/issues" 9 | }, 10 | "homepage": "https://github.com/mweidner037/position-strings/tree/master/#readme", 11 | "repository": { 12 | "type": "git", 13 | "url": "git+https://github.com/mweidner037/position-strings.git" 14 | }, 15 | "keywords": [ 16 | "CRDT", 17 | "text editing", 18 | "collaboration", 19 | "fractional indexing" 20 | ], 21 | "module": "build/esm/index.js", 22 | "browser": "build/esm/index.js", 23 | "types": "build/esm/index.d.ts", 24 | "main": "build/commonjs/index.js", 25 | "files": [ 26 | "/build", 27 | "/src" 28 | ], 29 | "directories": { 30 | "lib": "src" 31 | }, 32 | "publishConfig": { 33 | "access": "public" 34 | }, 35 | "sideEffects": false, 36 | "dependencies": { 37 | "@types/seedrandom": "^2.4.28" 38 | }, 39 | "devDependencies": { 40 | "@types/chai": "^4.3.4", 41 | "@types/functional-red-black-tree": "^1.0.2", 42 | "@types/mocha": "^10.0.1", 43 | "@typescript-eslint/eslint-plugin": "^5.52.0", 44 | "@typescript-eslint/parser": "^5.52.0", 45 | "chai": "^4.3.7", 46 | "cross-env": "^7.0.3", 47 | "eslint": "^8.34.0", 48 | "eslint-config-prettier": "^8.6.0", 49 | "eslint-plugin-import": "^2.27.5", 50 | "functional-red-black-tree": "^1.0.1", 51 | "mocha": "^10.2.0", 52 | "npm-run-all": "^4.1.5", 53 | "prettier": "^2.8.4", 54 | "rimraf": "^4.1.2", 55 | "seedrandom": "^3.0.5", 56 | "ts-node": "^10.9.1", 57 | "typescript": "^4.9.5" 58 | }, 59 | "scripts": { 60 | "prepack": "npm run clean && npm run build && npm run test", 61 | "build": "npm-run-all build:*", 62 | "build:ts": "tsc -p tsconfig.json && tsc -p tsconfig.commonjs.json", 63 | "test": "npm-run-all test:*", 64 | "test:lint": "eslint --ext .ts,.js .", 65 | "test:unit": "cross-env TS_NODE_PROJECT='./tsconfig.dev.json' mocha", 66 | "test:format": "prettier --check .", 67 | "fix": "npm-run-all fix:*", 68 | "fix:format": "prettier --write .", 69 | "benchmarks": "ts-node --project tsconfig.dev.json benchmarks/main.ts", 70 | "clean": "rimraf generated generated_esm build" 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /stats.md: -------------------------------------------------------------------------------- 1 | 2 | > position-strings@2.0.0 benchmarks 3 | > ts-node --project tsconfig.dev.json benchmarks/main.ts 4 | 5 | ## Run: all ops; rotate never 6 | 7 | ### length 8 | 9 | - Average: 33 10 | - Median: 32 11 | - 99th percentile: 51 12 | - Max: 55 13 | 14 | ### longNames 15 | 16 | - Average: 1 17 | - Median: 1 18 | - 99th percentile: 1 19 | - Max: 1 20 | 21 | ### waypoints 22 | 23 | - Average: 9 24 | - Median: 8 25 | - 99th percentile: 15 26 | - Max: 17 27 | 28 | ### valueIndex 29 | 30 | - Average: 615 31 | - Median: 208 32 | - 99th percentile: 5780 33 | - Max: 7603 34 | 35 | ### PositionSource memory usage 36 | 37 | - Map size: 3333 38 | - Sum of map key lengths: 112034 39 | 40 | ## Run: all ops; rotate every 1000 ops 41 | 42 | ### length 43 | 44 | - Average: 111 45 | - Median: 109 46 | - 99th percentile: 206 47 | - Max: 237 48 | 49 | ### longNames 50 | 51 | - Average: 8 52 | - Median: 8 53 | - 99th percentile: 16 54 | - Max: 18 55 | 56 | ### waypoints 57 | 58 | - Average: 13 59 | - Median: 13 60 | - 99th percentile: 24 61 | - Max: 26 62 | 63 | ### valueIndex 64 | 65 | - Average: 185 66 | - Median: 108 67 | - 99th percentile: 851 68 | - Max: 999 69 | 70 | ### PositionSource memory usage 71 | 72 | - Map size: 20 73 | - Sum of map key lengths: 2574 74 | 75 | ## Run: 10000 ops; rotate never 76 | 77 | ### length 78 | 79 | - Average: 23 80 | - Median: 25 81 | - 99th percentile: 32 82 | - Max: 35 83 | 84 | ### longNames 85 | 86 | - Average: 1 87 | - Median: 1 88 | - 99th percentile: 1 89 | - Max: 1 90 | 91 | ### waypoints 92 | 93 | - Average: 5 94 | - Median: 6 95 | - 99th percentile: 8 96 | - Max: 9 97 | 98 | ### valueIndex 99 | 100 | - Average: 293 101 | - Median: 183 102 | - 99th percentile: 1029 103 | - Max: 1069 104 | 105 | ### PositionSource memory usage 106 | 107 | - Map size: 151 108 | - Sum of map key lengths: 3666 109 | 110 | ## Run: 10000 ops; rotate every 1000 ops 111 | 112 | ### length 113 | 114 | - Average: 50 115 | - Median: 49 116 | - 99th percentile: 86 117 | - Max: 86 118 | 119 | ### longNames 120 | 121 | - Average: 3 122 | - Median: 3 123 | - 99th percentile: 6 124 | - Max: 6 125 | 126 | ### waypoints 127 | 128 | - Average: 7 129 | - Median: 7 130 | - 99th percentile: 11 131 | - Max: 12 132 | 133 | ### valueIndex 134 | 135 | - Average: 173 136 | - Median: 113 137 | - 99th percentile: 686 138 | - Max: 759 139 | 140 | ### PositionSource memory usage 141 | 142 | - Map size: 7 143 | - Sum of map key lengths: 580 144 | 145 | -------------------------------------------------------------------------------- /src/cursors.ts: -------------------------------------------------------------------------------- 1 | import { findPosition } from "./find_position"; 2 | import { PositionSource } from "./position_source"; 3 | import { precond } from "./util"; 4 | 5 | /** 6 | * Utilities for working with cursors in a collaborative list 7 | * or text string. 8 | * 9 | * A *cursor* points to a particular spot in a list, in between 10 | * two list elements (or text characters). This class handles 11 | * cursors for lists that use our position strings. 12 | * 13 | * A cursor is represented as a string. 14 | * Specifically, it is the position of the element 15 | * to its left, or `PositionSource.FIRST` if it is at the beginning 16 | * of the list. If that position is later deleted, the cursor stays the 17 | * same, but its index shifts to next element on its left. 18 | * 19 | * You can use cursor strings as ordinary cursors, selection endpoints, 20 | * range endpoints for a comment or formatting span, etc. 21 | */ 22 | export class Cursors { 23 | private constructor() { 24 | // Not instantiable. 25 | } 26 | 27 | /** 28 | * Returns the cursor at `index` within the given list of positions. Invert with `Cursors.toIndex`. 29 | * 30 | * That is, the cursor is between the list elements at `index - 1` and `index`. 31 | * 32 | * If this method is inconvenient (e.g., the positions are in a database 33 | * instead of an array), you can instead run the following algorithm yourself: 34 | * - If `index` is 0, return `PositionSource.FIRST = ""`. 35 | * - Else return `positions[index - 1]`. 36 | * 37 | * @param positions The target list's positions, in lexicographic order. 38 | * There should be no duplicate positions. 39 | */ 40 | static fromIndex(index: number, positions: ArrayLike): string { 41 | precond( 42 | index >= 0 && index <= positions.length, 43 | "Index out of bounds:", 44 | index, 45 | positions.length 46 | ); 47 | return index === 0 ? PositionSource.FIRST : positions[index - 1]; 48 | } 49 | 50 | /** 51 | * Returns the current index of `cursor` within the given list of 52 | * positions. Inverse of `Cursors.fromIndex`. 53 | * 54 | * That is, the cursor is between the list elements at `index - 1` and `index`. 55 | * 56 | * If this method is inconvenient (e.g., the positions are in a database 57 | * instead of an array), you can instead compute 58 | * `index` by finding the number of positions less than 59 | * or equal to `position`. 60 | * For example, in SQL, use: 61 | * ```sql 62 | * SELECT COUNT(*) FROM table WHERE position <= $position 63 | * ``` 64 | * 65 | * See also: `findPosition`. 66 | * 67 | * @param positions The target list's positions, in lexicographic order. 68 | * There should be no duplicate positions. 69 | */ 70 | static toIndex(cursor: string, positions: ArrayLike): number { 71 | const { index, isPresent } = findPosition(cursor, positions); 72 | // findPosition gives < elements, but we want <= elements. 73 | // So if there's an == element, add 1. 74 | return isPresent ? index + 1 : index; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /test/fuzz.test.ts: -------------------------------------------------------------------------------- 1 | import seedrandom from "seedrandom"; 2 | import { assertIsOrdered, newSources, testUniqueAfterDelete } from "./util"; 3 | 4 | describe("fuzz", () => { 5 | describe("sequential", () => { 6 | describe("1 user", () => sequential(1)); 7 | describe("10 users", () => sequential(10)); 8 | }); 9 | }); 10 | 11 | function sequential(numUsers: number) { 12 | let rng!: seedrandom.prng; 13 | 14 | beforeEach(() => { 15 | rng = seedrandom("42"); 16 | }); 17 | 18 | it("random", () => { 19 | const sources = newSources(rng, numUsers); 20 | 21 | // Randomly create positions in a single list, simulating sequential access. 22 | const list: string[] = []; 23 | for (let i = 0; i < 1000; i++) { 24 | const source = sources[Math.floor(rng() * sources.length)]; 25 | const index = Math.floor(rng() * (list.length + 1)); 26 | // Out-of-bounds okay. 27 | const newPosition = source.createBetween(list[index - 1], list[index]); 28 | list.splice(index, 0, newPosition); 29 | } 30 | 31 | assertIsOrdered(list); 32 | testUniqueAfterDelete(list, sources[0]); 33 | }); 34 | 35 | it("random LtR runs", () => { 36 | const sources = newSources(rng, numUsers); 37 | 38 | // Randomly create positions in a single list, simulating sequential access. 39 | // This time, create short LtR runs at a time. 40 | const list: string[] = []; 41 | for (let i = 0; i < 200; i++) { 42 | const source = sources[Math.floor(rng() * sources.length)]; 43 | const index = Math.floor(rng() * (list.length + 1)); 44 | // Out-of-bounds okay. 45 | for (let j = 0; j < 5; j++) { 46 | const newPosition = source.createBetween( 47 | list[index - 1 + j], 48 | list[index + j] 49 | ); 50 | list.splice(index + j, 0, newPosition); 51 | } 52 | } 53 | 54 | assertIsOrdered(list); 55 | testUniqueAfterDelete(list, sources[0]); 56 | }); 57 | 58 | it("random RtL runs", () => { 59 | const sources = newSources(rng, numUsers); 60 | 61 | // Randomly create positions in a single list, simulating sequential access. 62 | // This time, create short RtL runs at a time. 63 | const list: string[] = []; 64 | for (let i = 0; i < 200; i++) { 65 | const source = sources[Math.floor(rng() * sources.length)]; 66 | const index = Math.floor(rng() * (list.length + 1)); 67 | // Out-of-bounds okay. 68 | for (let j = 0; j < 5; j++) { 69 | const newPosition = source.createBetween(list[index - 1], list[index]); 70 | list.splice(index, 0, newPosition); 71 | } 72 | } 73 | 74 | assertIsOrdered(list); 75 | testUniqueAfterDelete(list, sources[0]); 76 | }); 77 | 78 | it("biased", () => { 79 | const sources = newSources(rng, numUsers); 80 | 81 | // Randomly create positions in a single list, simulating sequential access. 82 | // This time, bias towards smaller indices using a sqrt. 83 | const list: string[] = []; 84 | for (let i = 0; i < 1000; i++) { 85 | const source = 86 | sources[Math.floor(Math.sqrt(rng() * sources.length * sources.length))]; 87 | const index = Math.floor(rng() * (list.length + 1)); 88 | // Out-of-bounds okay. 89 | const newPosition = source.createBetween(list[index - 1], list[index]); 90 | list.splice(index, 0, newPosition); 91 | } 92 | 93 | assertIsOrdered(list); 94 | testUniqueAfterDelete(list, sources[0]); 95 | }); 96 | } 97 | -------------------------------------------------------------------------------- /test/cursors.test.ts: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import seedrandom from "seedrandom"; 3 | import { Cursors, IDs, PositionSource } from "../src"; 4 | 5 | describe("Cursors", () => { 6 | let rng!: seedrandom.prng; 7 | let source!: PositionSource; 8 | 9 | beforeEach(() => { 10 | rng = seedrandom("42"); 11 | source = new PositionSource({ ID: IDs.pseudoRandom(rng) }); 12 | }); 13 | 14 | function testLength(len: number) { 15 | let list!: string[]; 16 | 17 | describe(`length ${len}`, () => { 18 | beforeEach(() => { 19 | list = []; 20 | for (let i = 0; i < len; i++) { 21 | list.push(source.createBetween(list.at(-1), undefined)); 22 | } 23 | }); 24 | 25 | it("present", () => { 26 | for (let i = 0; i <= list.length; i++) { 27 | const cursor = Cursors.fromIndex(i, list); 28 | assert.strictEqual(Cursors.toIndex(cursor, list), i); 29 | if (i !== 0) { 30 | // Insert a char in the next gap to the left, shifting the cursor. 31 | const list2 = [ 32 | ...list.slice(0, i - 1), 33 | source.createBetween(list[i - 2], list[i - 1]), 34 | ...list.slice(i - 1), 35 | ]; 36 | assert.strictEqual(Cursors.toIndex(cursor, list2), i + 1); 37 | } 38 | if (i !== list.length) { 39 | // Insert a char in the next gap to the right, which shouldn't shift the cursor. 40 | const list3 = [ 41 | ...list.slice(0, i + 1), 42 | source.createBetween(list[i], list[i + 1]), 43 | ...list.slice(i), 44 | ]; 45 | assert.strictEqual(Cursors.toIndex(cursor, list3), i); 46 | } 47 | // Insert a char in the cursor's gap, which 48 | // still shouldn't shift the cursor, since we 49 | // bind to the left char. 50 | const list4 = [ 51 | ...list.slice(0, i), 52 | source.createBetween(list[i - 1], list[i]), 53 | ...list.slice(i), 54 | ]; 55 | assert.strictEqual(Cursors.toIndex(cursor, list4), i); 56 | } 57 | }); 58 | 59 | it("not present", () => { 60 | for (let i = 0; i <= list.length; i++) { 61 | // Set the cursor to a new position that we "delete" 62 | // (actually just leave not-present) in list. 63 | const listExtended = [ 64 | ...list.slice(0, i), 65 | source.createBetween(list[i - 1], list[i]), 66 | ...list.slice(i), 67 | ]; 68 | const cursor = Cursors.fromIndex(i + 1, listExtended); 69 | 70 | // In list, the index falls back by 1 to i. 71 | assert.strictEqual(Cursors.toIndex(cursor, list), i); 72 | if (i !== 0) { 73 | // Insert a char in the next gap to the left, shifting the cursor. 74 | const list2 = [ 75 | ...list.slice(0, i - 1), 76 | source.createBetween(list[i - 2], list[i - 1]), 77 | ...list.slice(i - 1), 78 | ]; 79 | assert.strictEqual(Cursors.toIndex(cursor, list2), i + 1); 80 | } 81 | if (i !== list.length) { 82 | // Insert a char in the next gap to the right, which shouldn't shift the cursor. 83 | const list3 = [ 84 | ...list.slice(0, i + 1), 85 | source.createBetween(list[i], list[i + 1]), 86 | ...list.slice(i), 87 | ]; 88 | assert.strictEqual(Cursors.toIndex(cursor, list3), i); 89 | } 90 | // Insert a char in the cursor's gap, which 91 | // may or may not shift the cursor, depending on how 92 | // the new position compares to the cursor's. 93 | const list4 = [ 94 | ...list.slice(0, i), 95 | source.createBetween(list[i - 1], list[i]), 96 | ...list.slice(i), 97 | ]; 98 | const index4 = Cursors.toIndex(cursor, list4); 99 | assert(i <= index4 && index4 <= i + 1); 100 | } 101 | }); 102 | }); 103 | } 104 | 105 | testLength(32); 106 | testLength(31); 107 | testLength(33); 108 | testLength(23); 109 | testLength(1); 110 | testLength(0); 111 | }); 112 | -------------------------------------------------------------------------------- /src/ids.ts: -------------------------------------------------------------------------------- 1 | import * as crypto from "crypto"; 2 | import type seedrandom from "seedrandom"; 3 | import { LastInternal, precond } from "./util"; 4 | 5 | /** 6 | * Utitilies for generating `PositionSource` IDs 7 | * (the `options.ID` constructor argument). 8 | */ 9 | export class IDs { 10 | private constructor() { 11 | // Not instantiable. 12 | } 13 | 14 | /** 15 | * Default characters used in IDs: alphanumeric chars. 16 | */ 17 | static readonly DEFAULT_CHARS: string = 18 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 19 | 20 | // Rationale for value 8: 21 | // Each character of the ID gives us ~6 bits of entropy, 22 | // for a total of ~48 bits. This gives a < 1% 23 | // probability that two connected `PositionSource`s 24 | // will ever choose the same IDs, even if we 25 | // consider the total probability across 100,000,000 26 | // documents with 1,000 IDs each 27 | // (= 10 users x 100 days x 1 ID/user/day). 28 | /** 29 | * The default length of an ID, in characters. 30 | */ 31 | static readonly DEFAULT_LENGTH: number = 8; 32 | 33 | /** 34 | * Returns a cryptographically random ID made of alphanumeric characters. 35 | * 36 | * @param options.length The length of the ID, in characters. 37 | * Default: `IDs.DEFAULT_LENGTH`. 38 | * @param options.chars The characters to draw from. Default: `IDs.DEFAULT_CHARS`. 39 | * 40 | * If specified, only the first 256 elements are used, and you achieve 41 | * about `log_2(chars.length)` bits of entropy per `length`. 42 | */ 43 | static random(options?: { length?: number; chars?: string }): string { 44 | const length = options?.length ?? this.DEFAULT_LENGTH; 45 | const chars = options?.chars ?? this.DEFAULT_CHARS; 46 | 47 | const arr = new Array(length); 48 | let randomValues = new Uint8Array(length); 49 | if (typeof window === "undefined") { 50 | // Use Node crypto library. 51 | // We use eval("require") to prevent Webpack from attempting 52 | // to bundle the crypto module and complaining. 53 | // In theory we should also be able to do this by 54 | // adding "browser": {"crypto": false} to package.json, 55 | // but that is not working, and besides, every user 56 | // of this package would have to remember to do so. 57 | // See https://github.com/webpack/webpack/issues/8826 58 | const cryptoReal = ( 59 | (eval("require"))("crypto") 60 | ); 61 | const randomBuffer = cryptoReal.randomBytes(length); 62 | randomValues = new Uint8Array(randomBuffer); 63 | } else { 64 | // Use browser crypto library. 65 | window.crypto.getRandomValues(randomValues); 66 | } 67 | for (let i = 0; i < length; i++) { 68 | // This will be biased if chars.length does not divide 256, 69 | // but it will still give at least floor(log_2(chars.length)) 70 | // bits of entropy. 71 | arr[i] = chars[randomValues[i] % chars.length]; 72 | } 73 | return arr.join(""); 74 | } 75 | 76 | /** 77 | * Returns a psuedorandom ID made of alphanumeric characters, 78 | * generated using `rng` from package [seedrandom](https://www.npmjs.com/package/seedrandom). 79 | * 80 | * Pseudorandom IDs with a fixed seed are recommended for 81 | * tests and benchmarks, to make them deterministic. 82 | * 83 | * @param options.length The length of the ID, in characters. 84 | * Default: `IDs.DEFAULT_LENGTH`. 85 | * @param options.chars The characters to draw from. Default: `IDs.DEFAULT_CHARS`. 86 | * 87 | * If specified, only the first 256 elements are used, and you achieve 88 | * about `log_2(chars.length)` bits of entropy per `length`. 89 | */ 90 | static pseudoRandom( 91 | rng: seedrandom.prng, 92 | options?: { length?: number; chars?: string } 93 | ): string { 94 | const length = options?.length ?? this.DEFAULT_LENGTH; 95 | const chars = options?.chars ?? this.DEFAULT_CHARS; 96 | 97 | const arr = new Array(length); 98 | for (let i = 0; i < arr.length; i++) { 99 | // Although we could pick chars without bias, we instead use the 100 | // same bias as `random`, for consistency. 101 | arr[i] = chars[Math.floor(rng() * 256) % chars.length]; 102 | } 103 | return arr.join(""); 104 | } 105 | 106 | /** 107 | * Throws an error if `ID` does not satisfy the 108 | * following requirements from `PositionSource`'s constructor: 109 | * - It does not contain `','` or `'.'`. 110 | * - The first character is lexicographically less than `'~'` (code point 126). 111 | */ 112 | static validate(ID: string): void { 113 | precond(ID < LastInternal, "ID must be less than", LastInternal, ":", ID); 114 | precond(!ID.includes(","), "ID must not contain ',':", ID); 115 | precond(!ID.includes("."), "ID must not contain '.':", ID); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /algorithm.md: -------------------------------------------------------------------------------- 1 | # Algorithm 2 | 3 | ## Background 4 | 5 | At a high level, position-strings implements the core of a List CRDT. Each position string corresponds to an element in the list, such that the lexicographic order on strings matches the list order. We don't implement a literal List CRDT with state and operations, but it's straightforward to implement one on top of position-strings. 6 | 7 | More specifically, position-strings is based on [Fugue: A Basic List CRDT](https://mattweidner.com/2022/10/21/basic-list-crdt.html#a-basic-uniquely-dense-total-order). It is an optimized version of that post's [string implementation](https://mattweidner.com/2022/10/21/basic-list-crdt.html#intro-string-implementation), which uses strings to represent paths in a tree. The strings are designed so that their lexicographic order matches the tree's [in-order traversal](https://en.wikipedia.org/wiki/Tree_traversal#In-order,_LNR) order. 8 | 9 | ## Tree Structure 10 | 11 | position-strings's implicit tree is structured in layers. Each layer has a specific type and can only contain nodes of that type. There are 3 layer types that alternate cyclically (1 -> 2 -> 3 -> 1 -> 2 -> 3 -> ...). Each position string corresponds to a type-3 node, and the string itself encodes the node labels on the path from the root to that node. 12 | 13 | The 3 node/layer types are: 14 | 15 | 1. **Waypoint nodes**: Labeled by the ID of the `PositionSource` that created it, sorted arbitrarily. The ID ensures that positions created by different `PositionSource`s are distinct: each `PositionSource` only returns positions whose _final_ waypoint node uses its own ID. 16 | 2. **valueIndex nodes**: Labeled by an integer, sorted by magnitude. When a `PositionSource` creates positions in a left-to-right sequence, instead of appending a new waypoint node each time, it reuses the first waypoint node and just increases the valueIndex. That causes the position string length to grow logarithmically instead of linearly. 17 | 3. **Side nodes**: Labeled by a bit "left side" (0) or "right side" (1). The actual position at a node, and all of the node's right-side descendants, use "right side"; all of its left-side descendants use "left side". This ensures that all left descendants are less than the position at a node, which is less than all right descendants. 18 | 19 | ### `createBetween` 20 | 21 | In terms of the tree structure, `PositionSource.createBetween(left, right)` does the following: 22 | 23 | 1. If `right` is a descendant of `left`, create a left descendant of `right` as follows. First, create a waypoint node that is a left child of `right` (replacing `right`'s final "right side" bit with "left side"). Then append the next new valueIndex node (usually 0) and a "right side" node, to fill out the 3 layers. Return that final node. 24 | 2. Otherwise, see if we can just increase `left`'s final valueIndex, instead of lengthing its path. This is allowed if (a) `left`'s final waypoint node uses our ID, and (b) `right` doesn't use that same waypoint node. If so, look up the next unused valueIndex for that waypoint (stored in `PositionSource`), then use `left` but with that final valueIndex. 25 | 3. If not, create a right descendant of `left` like in case 1: append a waypoint node, the next new valueIndex, then "right side"; return that final node. 26 | 27 | You can check that the resulting node lies between `left` and `right`, and that this procedure satisfies properties 4-6 from the [README](./README.md). 28 | 29 | > The tree we've described so far is similar to that used by the [Logoot List CRDT](https://doi.org/10.1109/ICDCS.2009.75), which also has alternating layers of IDs and numbers. However, Logoot sorts by numbers first and then IDs, while we do the opposite. This lets us avoid interleaving: if two `PositionSource`s concurrently create a sequence of positions at the same place, their positions will end up under different waypoint nodes, hence appear one after the other. 30 | 31 | ## String Representation 32 | 33 | Finally, we need to map type-3 nodes in the above tree to position strings, such that the tree order matches the position strings' lexicographic order. 34 | 35 | Given a tree node `a`, let `aPath` be the sequence of node labels on the path from the root to that node. Note that the tree order matches the "lexicographic order" on these sequences: `a < b` if `aPath[i] < bPath[i]` at the first index `i` where they disagree, or if `aPath` is a strict prefix of `bPath`. 36 | 37 | I claim that we can set `a`'s position string to be `aPos = aPath.map(f).join("")` for any `f: (label: string, i: number) => string` with the following property: 38 | 39 | - If `aPath` and `bPath` first disagree at index `i` and `aPath[i] < bPath[i]`, then: 40 | 1. `f(aPath[i], i) < f(bPath[i], i)` as strings. 41 | 2. `f(aPath[i], i)` is not a prefix of `f(bPath[i], i)`. 42 | 43 | Indeed, then there is some index `j` such that `f(aPath[i], i).charAt(j) < f(bPath[i], i).charAt(j)`. Hence no matter what happens in the rest of `aPos` and `bPos`, we'll still have `aPos < bPos`. 44 | 45 | One working `f` is defined as follows, with a different rule for each layer type: 46 | 47 | 1. (Waypoint nodes) Map the node's label (an ID) to `` `,${ID}.` ``. The period, which is not allowed in IDs, ensures the no-prefix rule (ii). 48 | 2. (valueIndex nodes) Map the valueIndex to its _valueSeq_: its entry in a special sequence of numbers that is in lexicographic order and has no prefixes (when base52 encoded). You can read about the sequence we use in the comment above [`position_source.ts`](./src/position_source.ts)'s `nextOddValueSeq` function. 49 | 3. (Side nodes) Map "left side" to `"0"` and "right side" to `"1"`. 50 | 51 | ### Optimizations 52 | 53 | In the actual implementation, we optimize the above string representation in a few ways. 54 | 55 | First, for waypoint nodes, we only use each "long name" `` `,${ID}.` `` once per position string. If the same ID occurs later in the same path, those nodes get a "short name" that is just an index into the list of prior long names. Index `n` is encoded as `base52(n // 10) + base10(n % 10)`. The set of all waypoint names following a given path is still unique, which ensures rule (i) for some arbitrary order on IDs (not necessarily lexicographic); and they are prefix-free (rule (ii)) due to short names' special ending digit and long names' special starting comma and ending period. 56 | 57 | Second, instead of giving each side node a whole character, we give it the last bit in the preceding valueSeq. Specifically, we go by twos in the special sequence, then add 1 if the side is "right". 58 | 59 | Third, for the first waypoint node, we use `` `${ID}.` `` (no comma) instead of the long name `` `,${ID}.` ``. Otherwise, every position would start with a redundant `','`. 60 | -------------------------------------------------------------------------------- /benchmarks/main.ts: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import fs from "fs"; 3 | import createRBTree from "functional-red-black-tree"; 4 | import seedrandom from "seedrandom"; 5 | import { IDs, PositionSource } from "../src"; 6 | import realTextTraceEdits from "./real_text_trace_edits.json"; 7 | 8 | const resultsDir = "benchmark_results/"; 9 | 10 | const { edits, finalText } = realTextTraceEdits as unknown as { 11 | finalText: string; 12 | edits: Array<[number, number, string | undefined]>; 13 | }; 14 | 15 | function run(ops?: number, rotateFreq?: number) { 16 | console.log( 17 | "## Run:", 18 | ops ?? "all", 19 | "ops; rotate", 20 | rotateFreq ? `every ${rotateFreq} ops` : "never" 21 | ); 22 | console.log(); 23 | 24 | const rng = seedrandom("42"); 25 | let source = new PositionSource({ 26 | ID: IDs.pseudoRandom(rng), 27 | }); 28 | let list = createRBTree(); 29 | // In order of creation, so we can watch time trends. 30 | const metrics: PositionMetric[] = []; 31 | 32 | for (let i = 0; i < (ops ?? edits.length); i++) { 33 | if (rotateFreq && i > 0 && i % rotateFreq === 0) { 34 | source = new PositionSource({ ID: IDs.pseudoRandom(rng) }); 35 | } 36 | const edit = edits[i]; 37 | if (edit[2] !== undefined) { 38 | // Insert edit[2] at edit[0] 39 | const position = source.createBetween( 40 | edit[0] === 0 ? undefined : list.at(edit[0] - 1).key, 41 | edit[0] === list.length ? undefined : list.at(edit[0]).key 42 | ); 43 | list = list.insert(position, edit[2]); 44 | metrics.push(getMetric(position)); 45 | } else { 46 | // Delete character at edit[0]. 47 | list = list.at(edit[0]).remove(); 48 | } 49 | } 50 | 51 | if (ops === undefined) { 52 | // Check answer. 53 | assert.strictEqual(finalText, list.values.join("")); 54 | } 55 | 56 | // Print summary stats. 57 | // Note that collecting stats increases the runtime. 58 | printStats( 59 | "length", 60 | metrics.map((metric) => metric.length) 61 | ); 62 | printStats( 63 | "longNames", 64 | metrics.map((metric) => metric.longNames) 65 | ); 66 | printStats( 67 | "waypoints", 68 | metrics.map((metric) => metric.waypoints) 69 | ); 70 | printStats( 71 | "valueIndex", 72 | metrics.map((metric) => metric.valueIndex) 73 | ); 74 | 75 | // Estimate PositionSource memory usage. 76 | // @ts-expect-error Private access 77 | const lastValueSeqs = source.lastValueSeqs; 78 | const keyLengths = [...lastValueSeqs.keys()] 79 | .map((prefix) => prefix.length) 80 | .reduce((a, b) => a + b, 0); 81 | console.log("### PositionSource memory usage\n"); 82 | console.log("- Map size:", lastValueSeqs.size); 83 | console.log("- Sum of map key lengths:", keyLengths); 84 | console.log(); 85 | 86 | // Write data files. 87 | if (!fs.existsSync(resultsDir)) fs.mkdirSync(resultsDir); 88 | const fileName = `results_${ops ?? "all"}_${rotateFreq ?? "never"}.csv`; 89 | const csv = 90 | "length,longNames,waypoints,valueIndex\n" + 91 | metrics 92 | .map( 93 | (metric) => 94 | `${metric.length},${metric.longNames},${metric.waypoints},${metric.valueIndex}` 95 | ) 96 | .join("\n"); 97 | fs.writeFileSync(resultsDir + fileName, csv); 98 | } 99 | 100 | /** 101 | * Data for a single position string. 102 | */ 103 | interface PositionMetric { 104 | /** The position's length. */ 105 | length: number; 106 | /** 107 | * The number of waypoints using long names. 108 | * Equivalently, the number of full IDs in the string. 109 | */ 110 | longNames: number; 111 | /** The total number of waypoints. */ 112 | waypoints: number; 113 | /** 114 | * The valueIndex. This is the normal, 0-indexed count of values 115 | * in a row, not the valueSeq. 116 | */ 117 | valueIndex: number; 118 | } 119 | 120 | function getLastWaypointChar(position: string): number { 121 | // Last waypoint char is the last '.' or digit. 122 | // We know it's not the very last char (always a valueSeq). 123 | for (let i = position.length - 2; i >= 0; i--) { 124 | const char = position[i]; 125 | if (char === "." || ("0" <= char && char <= "9")) { 126 | // i is the last waypoint char, i.e., the end of the prefix. 127 | return i; 128 | } 129 | } 130 | throw new Error("lastWaypointChar not found: " + position); 131 | } 132 | 133 | function parseBase52(s: string): number { 134 | let n = 0; 135 | for (let i = 0; i < s.length; i++) { 136 | const code = s.charCodeAt(i); 137 | const digit = code - (code >= 97 ? 71 : 65); 138 | n = 52 * n + digit; 139 | } 140 | return n; 141 | } 142 | 143 | function getMetric(position: string): PositionMetric { 144 | // longNames = # periods, since we end each ID with one. 145 | let periods = 0; 146 | for (const char of position) { 147 | if (char === ".") periods++; 148 | } 149 | const longNames = periods; 150 | 151 | // Get valueSeq: after last waypoint char. 152 | const lastWaypointChar = getLastWaypointChar(position); 153 | const valueSeq = parseBase52(position.slice(lastWaypointChar + 1)); 154 | 155 | return { 156 | length: position.length, 157 | longNames, 158 | waypoints: waypointCount(position), 159 | valueIndex: valueIndexFromSeq(valueSeq), 160 | }; 161 | } 162 | 163 | function waypointCount(position: string): number { 164 | // One waypoint per: 165 | // - '.' (end of a long name) 166 | // - Digit outside of a long name 167 | // (end of a short name). 168 | let inLongName = false; 169 | let count = 0; 170 | for (let i = position.length - 1; i >= 0; i--) { 171 | const char = position[i]; 172 | if (char === ".") { 173 | // End of a long name. 174 | count++; 175 | // Skip the rest of the long name in case in contains 176 | // a non-short-name digit. 177 | inLongName = true; 178 | } else if (inLongName) { 179 | if (char === ",") inLongName = false; 180 | } else if ("0" <= char && char <= "9") count++; 181 | } 182 | return count; 183 | } 184 | 185 | /** 186 | * Returns the valueIndex corresponding to the (odd) valueSeq n. 187 | */ 188 | function valueIndexFromSeq(n: number): number { 189 | const d = n === 0 ? 1 : Math.floor(Math.log(n) / Math.log(52)) + 1; 190 | // First d-digit number is 52^d - 52 * 26^(d-1); check how far 191 | // we are from there (= index in d-digit sequence) 192 | let ans = n - (Math.pow(52, d) - 52 * Math.pow(26, d - 1)); 193 | // Previous digits d2 get 26^d2 digits each. 194 | for (let d2 = 1; d2 < d; d2++) { 195 | ans += Math.pow(26, d2); 196 | } 197 | // Sequence uses odds only, so discount that. 198 | return (ans - 1) / 2; 199 | } 200 | 201 | function printStats(name: string, data: number[]) { 202 | console.log(`### ${name}\n`); 203 | console.log( 204 | "- Average:", 205 | Math.round(data.reduce((a, b) => a + b, 0) / data.length) 206 | ); 207 | data.sort((a, b) => a - b); 208 | console.log("- Median:", percentile(data, 0.5)); 209 | console.log("- 99th percentile:", percentile(data, 0.99)); 210 | console.log("- Max:", percentile(data, 1)); 211 | console.log(); 212 | } 213 | 214 | function percentile(sortedData: number[], alpha: number) { 215 | const index = Math.ceil(alpha * sortedData.length) - 1; 216 | return sortedData[index]; 217 | } 218 | 219 | // In the order described in README.md#performance. 220 | run(); 221 | run(undefined, 1000); 222 | run(10000); 223 | run(10000, 1000); 224 | -------------------------------------------------------------------------------- /test/manual.test.ts: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import seedrandom from "seedrandom"; 3 | import { IDs, PositionSource } from "../src"; 4 | import { assertIsOrdered, testUniqueAfterDelete } from "./util"; 5 | 6 | describe("manual", () => { 7 | const rng = seedrandom("42"); 8 | const randomName = IDs.pseudoRandom(rng); 9 | const randomAlice = IDs.pseudoRandom(rng); 10 | const randomBobby = IDs.pseudoRandom(rng); 11 | const randomBob = IDs.pseudoRandom(rng, { length: 5 }); 12 | 13 | describe("single user", () => { 14 | describe("random ID", () => { 15 | testSingleUser(randomName); 16 | }); 17 | describe("alphabetic ID", () => { 18 | testSingleUser("alice"); 19 | }); 20 | describe("numeric ID", () => { 21 | testSingleUser("0"); 22 | }); 23 | describe("empty ID", () => { 24 | testSingleUser(""); 25 | }); 26 | }); 27 | 28 | describe("two users", () => { 29 | describe("random IDs", () => { 30 | testTwoUsers(randomAlice, randomBobby); 31 | }); 32 | describe("random IDs, unequal lengths", () => { 33 | testTwoUsers(randomAlice, randomBob); 34 | }); 35 | describe("random IDs, prefixes", () => { 36 | testTwoUsers(randomBobby, randomBob); 37 | }); 38 | describe("numeric IDs", () => { 39 | testTwoUsers("57834", "00143"); 40 | }); 41 | describe("random and empty IDs", () => { 42 | testTwoUsers(randomAlice, ""); 43 | }); 44 | }); 45 | }); 46 | 47 | function testSingleUser(ID: string) { 48 | let alice!: PositionSource; 49 | 50 | beforeEach(() => { 51 | alice = new PositionSource({ ID }); 52 | }); 53 | 54 | it("LtR", () => { 55 | let previous = PositionSource.FIRST; 56 | const list: string[] = []; 57 | for (let i = 0; i < 20; i++) { 58 | previous = alice.createBetween(previous, PositionSource.LAST); 59 | list.push(previous); 60 | } 61 | assertIsOrdered(list); 62 | }); 63 | 64 | it("RtL", () => { 65 | let previous = PositionSource.LAST; 66 | const list: string[] = []; 67 | for (let i = 0; i < 20; i++) { 68 | previous = alice.createBetween(PositionSource.FIRST, previous); 69 | list.unshift(previous); 70 | } 71 | assertIsOrdered(list); 72 | }); 73 | 74 | it("restart", () => { 75 | const list: string[] = []; 76 | for (let j = 0; j < 5; j++) { 77 | let previous: string = PositionSource.FIRST; 78 | let after = list[0]; // Out-of-bounds okay 79 | for (let i = 0; i < 10; i++) { 80 | previous = alice.createBetween(previous, after); 81 | list.splice(i, 0, previous); 82 | } 83 | } 84 | assertIsOrdered(list); 85 | }); 86 | 87 | it("LtR long", () => { 88 | let previous = PositionSource.FIRST; 89 | const list: string[] = []; 90 | for (let i = 0; i < 1000; i++) { 91 | previous = alice.createBetween(previous, PositionSource.LAST); 92 | list.push(previous); 93 | } 94 | assertIsOrdered(list); 95 | // Efficiency check. 96 | assert.isBelow(list.at(-1)!.length, 30); 97 | }); 98 | 99 | it("RtL long", () => { 100 | let previous = PositionSource.LAST; 101 | const list: string[] = []; 102 | for (let i = 0; i < 1000; i++) { 103 | previous = alice.createBetween(PositionSource.FIRST, previous); 104 | list.unshift(previous); 105 | } 106 | assertIsOrdered(list); 107 | }); 108 | 109 | it("LtR, mid LtR", () => { 110 | let previous = PositionSource.FIRST; 111 | const list: string[] = []; 112 | for (let i = 0; i < 20; i++) { 113 | previous = alice.createBetween(previous, PositionSource.LAST); 114 | list.push(previous); 115 | } 116 | const midRight = list[10]; 117 | previous = list[9]; 118 | for (let i = 0; i < 20; i++) { 119 | previous = alice.createBetween(previous, midRight); 120 | list.splice(10 + i, 0, previous); 121 | } 122 | assertIsOrdered(list); 123 | }); 124 | 125 | it("LtR, mid RtL", () => { 126 | let previous = PositionSource.FIRST; 127 | const list: string[] = []; 128 | for (let i = 0; i < 20; i++) { 129 | previous = alice.createBetween(previous, PositionSource.LAST); 130 | list.push(previous); 131 | } 132 | const midLeft = list[9]; 133 | previous = list[10]; 134 | for (let i = 0; i < 20; i++) { 135 | previous = alice.createBetween(midLeft, previous); 136 | list.splice(10, 0, previous); 137 | } 138 | assertIsOrdered(list); 139 | }); 140 | 141 | it("unique after delete", () => { 142 | let previous = PositionSource.FIRST; 143 | const list: string[] = []; 144 | for (let i = 0; i < 20; i++) { 145 | previous = alice.createBetween(previous, PositionSource.LAST); 146 | list.push(previous); 147 | } 148 | const midLeft = list[9]; 149 | previous = list[10]; 150 | for (let i = 0; i < 20; i++) { 151 | previous = alice.createBetween(midLeft, previous); 152 | list.splice(10, 0, previous); 153 | } 154 | 155 | testUniqueAfterDelete(list, alice); 156 | }); 157 | } 158 | 159 | function testTwoUsers(ID1: string, ID2: string) { 160 | let alice!: PositionSource; 161 | let bob!: PositionSource; 162 | 163 | beforeEach(() => { 164 | alice = new PositionSource({ ID: ID1 }); 165 | bob = new PositionSource({ ID: ID2 }); 166 | }); 167 | 168 | it("LtR sequential", () => { 169 | let previous = PositionSource.FIRST; 170 | const list: string[] = []; 171 | for (let i = 0; i < 40; i++) { 172 | const user = i >= 20 ? bob : alice; 173 | previous = user.createBetween(previous, PositionSource.LAST); 174 | list.push(previous); 175 | } 176 | assertIsOrdered(list); 177 | }); 178 | 179 | it("LtR alternating", () => { 180 | let previous = PositionSource.FIRST; 181 | const list: string[] = []; 182 | for (let i = 0; i < 40; i++) { 183 | const user = i % 2 == 0 ? bob : alice; 184 | previous = user.createBetween(previous, PositionSource.LAST); 185 | list.push(previous); 186 | } 187 | assertIsOrdered(list); 188 | }); 189 | 190 | it("RtL sequential", () => { 191 | let previous = PositionSource.LAST; 192 | const list: string[] = []; 193 | for (let i = 0; i < 40; i++) { 194 | const user = i >= 20 ? bob : alice; 195 | previous = user.createBetween(PositionSource.FIRST, previous); 196 | list.unshift(previous); 197 | } 198 | assertIsOrdered(list); 199 | }); 200 | 201 | it("RtL alternating", () => { 202 | let previous = PositionSource.LAST; 203 | const list: string[] = []; 204 | for (let i = 0; i < 40; i++) { 205 | const user = i % 2 == 0 ? bob : alice; 206 | previous = user.createBetween(PositionSource.FIRST, previous); 207 | list.unshift(previous); 208 | } 209 | assertIsOrdered(list); 210 | }); 211 | 212 | it("restart alternating", () => { 213 | const list: string[] = []; 214 | for (let j = 0; j < 5; j++) { 215 | let previous = PositionSource.FIRST; 216 | let after = list[0]; // out-of-bounds okay 217 | for (let i = 0; i < 10; i++) { 218 | const user = i % 2 === 0 ? bob : alice; 219 | previous = user.createBetween(previous, after); 220 | list.splice(i, 0, previous); 221 | } 222 | } 223 | assertIsOrdered(list); 224 | }); 225 | 226 | it("LtR concurrent", () => { 227 | let previous: string | undefined = undefined; 228 | const list1: string[] = []; 229 | for (let i = 0; i < 20; i++) { 230 | previous = alice.createBetween(previous, undefined); 231 | list1.push(previous); 232 | } 233 | previous = undefined; 234 | const list2: string[] = []; 235 | for (let i = 0; i < 20; i++) { 236 | previous = bob.createBetween(previous, undefined); 237 | list2.push(previous); 238 | } 239 | // list1 and list2 should be sorted one after the other, according 240 | // to their first element (non-interleaving). 241 | let list: string[]; 242 | if (list1[0] < list2[0]) { 243 | // list1 < list2 244 | list = [...list1, ...list2]; 245 | } else list = [...list2, ...list1]; 246 | assertIsOrdered(list); 247 | }); 248 | 249 | it("RtL concurrent", () => { 250 | let previous: string | undefined = undefined; 251 | const list1: string[] = []; 252 | for (let i = 0; i < 20; i++) { 253 | previous = alice.createBetween(undefined, previous); 254 | list1.unshift(previous); 255 | } 256 | previous = undefined; 257 | const list2: string[] = []; 258 | for (let i = 0; i < 20; i++) { 259 | previous = bob.createBetween(undefined, previous); 260 | list2.unshift(previous); 261 | } 262 | // list1 and list2 should be sorted one after the other, according 263 | // to their first element (non-interleaving). 264 | let list: string[]; 265 | if (list1[0] < list2[0]) { 266 | // list1 < list2 267 | list = [...list1, ...list2]; 268 | } else list = [...list2, ...list1]; 269 | assertIsOrdered(list); 270 | }); 271 | 272 | it("insert between concurrent", () => { 273 | // "Hard case" from the blog post - see 274 | // https://mattweidner.com/2022/10/05/basic-list-crdt.html#between-concurrent 275 | const a = alice.createBetween(undefined, undefined); 276 | const b = alice.createBetween(a, undefined); 277 | 278 | let c = alice.createBetween(a, b); 279 | let d = bob.createBetween(a, b); 280 | // Order so c < d. 281 | if (d < c) [c, d] = [d, c]; 282 | 283 | // Try making e on both alice and bob. 284 | let e1 = alice.createBetween(c, d); 285 | let e2 = bob.createBetween(c, d); 286 | 287 | assert.notEqual(e1, e2); 288 | assertIsOrdered([a, c, e1, d, b]); 289 | assertIsOrdered([a, c, e2, d, b]); 290 | }); 291 | 292 | it("unique after delete", () => { 293 | const list: string[] = []; 294 | for (let j = 0; j < 5; j++) { 295 | let previous = PositionSource.FIRST; 296 | let after = list[0]; // out-of-bounds okay 297 | for (let i = 0; i < 10; i++) { 298 | const user = i % 2 === 0 ? bob : alice; 299 | previous = user.createBetween(previous, after); 300 | list.splice(i, 0, previous); 301 | } 302 | } 303 | assertIsOrdered(list); 304 | 305 | testUniqueAfterDelete(list, alice); 306 | testUniqueAfterDelete(list, bob); 307 | }); 308 | 309 | it("left children", () => { 310 | const gParent = alice.createBetween(); 311 | // Each parent is a child of gParent with the same waypoint but 312 | // a range of valueIndex's. 313 | const parents: string[] = []; 314 | let previous = gParent; 315 | for (let i = 0; i < 500; i++) { 316 | previous = bob.createBetween(previous, PositionSource.LAST); 317 | parents.push(previous); 318 | } 319 | const list = [gParent, ...parents]; 320 | // Create positions between gParent and the parents; since parent 321 | // starts with gParent, they'll be left children of parent. 322 | // This checks that leftVersion() works on those valueSeq's. 323 | for (let i = 0; i < parents.length; i++) { 324 | const child = bob.createBetween(gParent, parents[i]); 325 | list.splice(2 * i + 1, 0, child); 326 | } 327 | assertIsOrdered(list); 328 | 329 | testUniqueAfterDelete(list, alice); 330 | testUniqueAfterDelete(list, bob); 331 | }); 332 | } 333 | -------------------------------------------------------------------------------- /src/position_source.ts: -------------------------------------------------------------------------------- 1 | import { IDs } from "./ids"; 2 | import { assert, LastInternal, precond } from "./util"; 3 | 4 | /** 5 | * A source of lexicographically-ordered "position strings" for 6 | * collaborative lists and text. 7 | * 8 | * In a collaborative list (or text string), you need a way to refer 9 | * to "positions" within that list that: 10 | * 1. Point to a specific list element (or text character). 11 | * 2. Are global (all users agree on them) and immutable (they do not 12 | * change over time). 13 | * 3. Can be sorted. 14 | * 4. Are unique, even if different users concurrently create positions 15 | * at the same place. 16 | * 17 | * `PositionSource` gives you such positions, in the form 18 | * of lexicographically-ordered strings. Specifically, `createBetween` 19 | * returns a new "position string" in between two existing position strings. 20 | * 21 | * These strings have the bonus properties: 22 | * - 5. (Non-Interleaving) If two `PositionSource`s concurrently create a (forward or backward) 23 | * sequence of positions at the same place, 24 | * their sequences will not be interleaved. 25 | * For example, if 26 | * Alice types "Hello" while Bob types "World" at the same place, 27 | * and they each use a `PositionSource` to create a position for each 28 | * character, then 29 | * the resulting order will be "HelloWorld" or "WorldHello", not 30 | * "HWeolrllod". 31 | * - 6. If a `PositionSource` creates positions in a forward (increasing) 32 | * sequence, their lengths as strings will only grow logarithmically, 33 | * not linearly. 34 | * 35 | * Position strings are printable ASCII. Specifically, they 36 | * contain alphanumeric characters, `','`, and `'.'`. 37 | * Also, the special string `PositionSource.LAST` is `'~'`. 38 | * 39 | * Further reading: 40 | * - [Fractional indexing](https://www.figma.com/blog/realtime-editing-of-ordered-sequences/#fractional-indexing), 41 | * a related scheme that satisfies 1-3 but not 4-6. 42 | * - [List CRDTs](https://mattweidner.com/2022/10/21/basic-list-crdt.html) 43 | * and how they map to position strings. `PositionSource` uses an optimized 44 | * variant of that link's string implementation. 45 | * - [Paper about interleaving](https://www.repository.cam.ac.uk/handle/1810/290391) 46 | * in collaborative text editors. 47 | */ 48 | export class PositionSource { 49 | /** 50 | * A string that is less than all positions. 51 | * 52 | * Value: `""`. 53 | */ 54 | static readonly FIRST: string = ""; 55 | /** 56 | * A string that is greater than all positions. 57 | * 58 | * Value: `"~"`. 59 | */ 60 | static readonly LAST: string = LastInternal; 61 | 62 | /** 63 | * The unique ID for this `PositionSource`. 64 | */ 65 | readonly ID: string; 66 | /** 67 | * Our waypoints' long name: `,${ID}.`. 68 | */ 69 | private readonly longName: string; 70 | /** 71 | * Variant of longName used for a position's first ID: `${ID}.`. 72 | * (Otherwise every position would start with a redundant ','.) 73 | */ 74 | private readonly firstName: string; 75 | 76 | /** 77 | * For each waypoint that we created, maps a prefix (see getPrefix) 78 | * for that waypoint to its last (most recent) valueSeq. 79 | * We always store the right-side version (odd valueSeq). 80 | */ 81 | private lastValueSeqs = new Map(); 82 | 83 | /** 84 | * Constructs a new `PositionSource`. 85 | * 86 | * It is okay to share a single `PositionSource` between 87 | * all documents (lists/text strings) in the same JavaScript runtime. 88 | * 89 | * For efficiency (shorter position strings), 90 | * within each JavaScript runtime, you should not use 91 | * more than one `PositionSource` for the same document. 92 | * An exception is if multiple logical users share the same runtime; 93 | * we then recommend one `PositionSource` per user. 94 | * 95 | * @param options.ID A unique ID for this `PositionSource`. Defaults to 96 | * `IDs.random()`. 97 | * 98 | * If provided, `options.ID` must satisfy: 99 | * - It is unique across the entire collaborative application, i.e., 100 | * all `PositionSource`s whose positions may be compared to ours. This 101 | * includes past `PositionSource`s, even if they correspond to the same 102 | * user/device. 103 | * - It does not contain `','` or `'.'`. 104 | * - The first character is lexicographically less than `'~'` (code point 126). 105 | * 106 | * If `options.ID` contains non-alphanumeric characters, then created 107 | * positions will contain those characters in addition to 108 | * alphanumeric characters, `','`, and `'.'`. 109 | */ 110 | constructor(options?: { ID?: string }) { 111 | if (options?.ID !== undefined) { 112 | IDs.validate(options.ID); 113 | } 114 | this.ID = options?.ID ?? IDs.random(); 115 | this.longName = `,${this.ID}.`; 116 | this.firstName = `${this.ID}.`; 117 | } 118 | 119 | /** 120 | * Returns a new position between `left` and `right` 121 | * (`left < new < right`). 122 | * 123 | * The new position is unique across the entire collaborative application, 124 | * even in the face of concurrent calls to this method on other 125 | * `PositionSource`s. 126 | * 127 | * @param left Defaults to `PositionSource.FIRST` (insert at the beginning). 128 | * 129 | * @param right Defaults to `PositionSource.LAST` (insert at the end). 130 | */ 131 | createBetween( 132 | left: string = PositionSource.FIRST, 133 | right: string = PositionSource.LAST 134 | ): string { 135 | precond(left < right, "left must be less than right:", left, "!<", right); 136 | precond( 137 | right <= PositionSource.LAST, 138 | "right must be less than or equal to LAST:", 139 | right, 140 | "!<=", 141 | PositionSource.LAST 142 | ); 143 | 144 | const leftFixed = left === PositionSource.FIRST ? null : left; 145 | const rightFixed = right === PositionSource.LAST ? null : right; 146 | 147 | let ans: string; 148 | 149 | if ( 150 | rightFixed !== null && 151 | (leftFixed === null || rightFixed.startsWith(leftFixed)) 152 | ) { 153 | // Left child of right. This always appends a waypoint. 154 | const ancestor = leftVersion(rightFixed); 155 | ans = this.appendWaypoint(ancestor); 156 | } else { 157 | // Right child of left. 158 | if (leftFixed === null) { 159 | // ancestor is FIRST. 160 | ans = this.appendWaypoint(""); 161 | } else { 162 | // Check if we can reuse left's prefix. 163 | // It needs to be one of ours, and right can't use the same 164 | // prefix (otherwise we would get ans > right by comparing right's 165 | // older valueIndex to our new valueIndex). 166 | const prefix = getPrefix(leftFixed); 167 | const lastValueSeq = this.lastValueSeqs.get(prefix); 168 | if ( 169 | lastValueSeq !== undefined && 170 | !(rightFixed !== null && rightFixed.startsWith(prefix)) 171 | ) { 172 | // Reuse. 173 | const valueSeq = nextOddValueSeq(lastValueSeq); 174 | ans = prefix + stringifyBase52(valueSeq); 175 | this.lastValueSeqs.set(prefix, valueSeq); 176 | } else { 177 | // Append waypoint. 178 | ans = this.appendWaypoint(leftFixed); 179 | } 180 | } 181 | } 182 | 183 | assert(left < ans && ans < right, "Bad position:", left, ans, right); 184 | return ans; 185 | } 186 | 187 | /** 188 | * Appends a wayoint to the given ancestor (= prefix adjusted for 189 | * side), returning a unique new position using that waypoint. 190 | * 191 | * lastValueSeqs is also updated as needed for the waypoint. 192 | */ 193 | private appendWaypoint(ancestor: string): string { 194 | let waypointName = ancestor === "" ? this.firstName : this.longName; 195 | // If our ID already appears in ancestor, instead use a short 196 | // name for the waypoint. 197 | // Here we use the uniqueness of ',' and '.' to 198 | // claim that if this.longName (= `,${ID}.`) appears in ancestor, then it 199 | // must actually be from a waypoint that we created. 200 | let existing = ancestor.lastIndexOf(this.longName); 201 | if (ancestor.startsWith(this.firstName)) existing = 0; 202 | if (existing !== -1) { 203 | // Find the index of existing among the long-name 204 | // waypoints, in backwards order. Here we use the fact that 205 | // each longName ends with '.' and that '.' does not appear otherwise. 206 | let index = -1; 207 | for (let i = existing; i < ancestor.length; i++) { 208 | if (ancestor[i] === ".") index++; 209 | } 210 | waypointName = stringifyShortName(index); 211 | } 212 | 213 | const prefix = ancestor + waypointName; 214 | const lastValueSeq = this.lastValueSeqs.get(prefix); 215 | // Use next odd (right-side) valueSeq (1 if it's a new waypoint). 216 | const valueSeq = 217 | lastValueSeq === undefined ? 1 : nextOddValueSeq(lastValueSeq); 218 | this.lastValueSeqs.set(prefix, valueSeq); 219 | return prefix + stringifyBase52(valueSeq); 220 | } 221 | } 222 | 223 | /** 224 | * Returns position's *prefix*: the string through the last waypoint 225 | * name, or equivalently, without the final valueSeq. 226 | */ 227 | function getPrefix(position: string): string { 228 | // Last waypoint char is the last '.' (for long names) or 229 | // digit (for short names). Note that neither appear in valueSeq, 230 | // which is all letters. 231 | for (let i = position.length - 2; i >= 0; i--) { 232 | const char = position[i]; 233 | if (char === "." || ("0" <= char && char <= "9")) { 234 | // i is the last waypoint char, i.e., the end of the prefix. 235 | return position.slice(0, i + 1); 236 | } 237 | } 238 | assert(false, "No last waypoint char found (not a position?)", position); 239 | return ""; 240 | } 241 | 242 | /** 243 | * Returns the variant of position ending with a "left" marker 244 | * instead of the default "right" marker. 245 | * 246 | * I.e., the ancestor for position's left descendants. 247 | */ 248 | function leftVersion(position: string) { 249 | // We need to subtract one from the (odd) valueSeq, equivalently, from 250 | // its last base52 digit. 251 | const last = parseBase52(position[position.length - 1]); 252 | assert(last % 2 === 1, "Bad valueSeq (not a position?)", last, position); 253 | return position.slice(0, -1) + stringifyBase52(last - 1); 254 | } 255 | 256 | /** 257 | * Base 52, except for last digit, which is base 10 using 258 | * digits. That makes it easy to find the end of a short name 259 | * in getPrefix: it ends at the last digit. 260 | */ 261 | function stringifyShortName(n: number): string { 262 | if (n < 10) return String.fromCharCode(48 + n); 263 | else 264 | return ( 265 | stringifyBase52(Math.floor(n / 10)) + String.fromCharCode(48 + (n % 10)) 266 | ); 267 | } 268 | 269 | /** 270 | * Base 52 encoding using letters (with "digits" in order by code point). 271 | */ 272 | function stringifyBase52(n: number): string { 273 | if (n === 0) return "A"; 274 | const codes: number[] = []; 275 | while (n > 0) { 276 | const digit = n % 52; 277 | codes.unshift((digit >= 26 ? 71 : 65) + digit); 278 | n = Math.floor(n / 52); 279 | } 280 | return String.fromCharCode(...codes); 281 | } 282 | 283 | function parseBase52(s: string): number { 284 | let n = 0; 285 | for (let i = 0; i < s.length; i++) { 286 | const code = s.charCodeAt(i); 287 | const digit = code - (code >= 97 ? 71 : 65); 288 | n = 52 * n + digit; 289 | } 290 | return n; 291 | } 292 | 293 | const log52 = Math.log(52); 294 | 295 | /** 296 | * Returns the next odd valueSeq in the special sequence. 297 | * This is equivalent to mapping n to its valueIndex, adding 2, 298 | * then mapping back. 299 | * 300 | * The sequence has the following properties: 301 | * 1. Each number is a nonnegative integer (however, not all 302 | * nonnegative integers are enumerated). 303 | * 2. The numbers' base-52 representations are enumerated in 304 | * lexicographic order, with no prefixes (i.e., no string 305 | * representation is a prefix of another). 306 | * 3. The n-th enumerated number has O(log(n)) base-52 digits. 307 | * 308 | * Properties (2) and (3) are analogous to normal counting, except 309 | * that we order by the (base-52) lexicographic order instead of the 310 | * usual order by magnitude. It is also the case that 311 | * the numbers are in order by magnitude, although we do not 312 | * use this property. 313 | * 314 | * The specific sequence is as follows: 315 | * - Start with 0. 316 | * - Enumerate 26^1 numbers (A, B, ..., Z). 317 | * - Add 1, multiply by 52, then enumerate 26^2 numbers 318 | * (aA, aB, ..., mz). 319 | * - Add 1, multiply by 52, then enumerate 26^3 numbers 320 | * (nAA, nAB, ..., tZz). 321 | * - Repeat this pattern indefinitely, enumerating 322 | * 26^d d-digit numbers for each d >= 1. Imagining a decimal place 323 | * in front of each number, each d consumes 2^(-d) of the unit interval, 324 | * so we never "reach 1" (overflow to d+1 digits when 325 | * we meant to use d digits). 326 | * 327 | * I believe this is related to 328 | * [Elias gamma coding](https://en.wikipedia.org/wiki/Elias_gamma_coding). 329 | */ 330 | function nextOddValueSeq(n: number): number { 331 | const d = n === 0 ? 1 : Math.floor(Math.log(n) / log52) + 1; 332 | // You can calculate that the last d-digit number is 52^d - 26^d - 1. 333 | if (n === Math.pow(52, d) - Math.pow(26, d) - 1) { 334 | // First step is a new length: n -> (n + 1) * 52. 335 | // Second step is n -> n + 1. 336 | return (n + 1) * 52 + 1; 337 | } else { 338 | // n -> n + 1 twice. 339 | return n + 2; 340 | } 341 | } 342 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # position-strings 2 | 3 | A source of lexicographically-ordered "position strings" for 4 | collaborative lists and text. 5 | 6 | - [About](#about) 7 | - [Usage](#usage) 8 | - [API](#api) 9 | - [Example App](#example-app) 10 | - [Performance](#performance) 11 | 12 | ## About 13 | 14 | In a collaborative list (or text string), you need a way to refer 15 | to "positions" within that list that: 16 | 17 | 1. Point to a specific list element (or text character). 18 | 2. Are global (all users agree on them) and immutable (they do not 19 | change over time). 20 | 3. Can be sorted. 21 | 4. Are unique, even if different users concurrently create positions 22 | at the same place. 23 | 24 | This package gives you such positions, in the form 25 | of lexicographically-ordered strings. Specifically, `PositionSource.createBetween` 26 | returns a new "position string" in between two existing position strings. 27 | 28 | These strings have the bonus properties: 29 | 30 | 5. (Non-Interleaving) If two `PositionSource`s concurrently create a (forward or backward) 31 | sequence of positions at the same place, 32 | their sequences will not be interleaved. 33 | 34 | For example, if 35 | Alice types "Hello" while Bob types "World" at the same place, 36 | and they each use a `PositionSource` to create a position for each 37 | character, then 38 | the resulting order will be "HelloWorld" or "WorldHello", not 39 | "HWeolrllod". 40 | 41 | 6. If a `PositionSource` creates positions in a forward (increasing) 42 | sequence, their lengths as strings will only grow logarithmically, 43 | not linearly. 44 | 45 | Position strings are printable ASCII. Specifically, they 46 | contain alphanumeric characters, `','`, and `'.'`. 47 | Also, the special string `PositionSource.LAST` is `'~'`. 48 | 49 | ### Further reading 50 | 51 | - [Fractional indexing](https://www.figma.com/blog/realtime-editing-of-ordered-sequences/#fractional-indexing), 52 | a related scheme that satisfies 1-3 but not 4-6. 53 | - [List CRDTs](https://mattweidner.com/2022/10/21/basic-list-crdt.html) 54 | and how they map to position strings. `PositionSource` uses an optimized 55 | variant of that link's [string implementation](https://mattweidner.com/2022/10/21/basic-list-crdt.html#intro-string-implementation), described in 56 | [algorithm.md](https://github.com/mweidner037/position-strings/blob/master/algorithm.md). 57 | - [Paper about interleaving](https://www.repository.cam.ac.uk/handle/1810/290391) 58 | in collaborative text editors. 59 | - [list-positions](https://github.com/mweidner037/list-positions/tree/master/#readme), a similar but more comprehensive library. It provides helper data structures (e.g., a `List` class) and more efficient usage options. 60 | 61 | ## Usage 62 | 63 | Install with npm: 64 | 65 | ```bash 66 | npm i --save position-strings 67 | ``` 68 | 69 | Creating position strings: 70 | 71 | ```ts 72 | import { PositionSource } from "position-strings"; 73 | 74 | // At the start of your app: 75 | const source = new PositionSource(); 76 | 77 | // When the user types `char` at `index`: 78 | const position = source.createBetween( 79 | myListPositions[index - 1], 80 | myListPositions[index] 81 | // If index is 0 or myListPositions.length, the above behaves reasonably, 82 | // since undefined defaults to PositionSource.FIRST or LAST. 83 | ); 84 | myListPositions.splice(index, 0, position); 85 | myList.splice(index, 0, char); 86 | // Or insert { position, char } into a database table, ordered map, etc. 87 | ``` 88 | 89 | If your list is collaborative: 90 | 91 | ```ts 92 | import { findPosition } from "position-strings"; 93 | 94 | // After creating { char, position }, also broadcast it to other users. 95 | // When you receive `remote = { char, position }` from another user: 96 | const index = findPosition(remote.position, myListPositions).index; 97 | myListPositions.splice(index, 0, remote.position); 98 | myList.splice(index, 0, remote.char); 99 | // Or insert `remote` into a database table and query 100 | // "SELECT char FROM table ORDER BY position". 101 | // Or insert `remote` into an ordered map, etc. 102 | ``` 103 | 104 | To use cursors: 105 | 106 | ```ts 107 | import { Cursors, PositionSource } from "position-strings"; 108 | 109 | let cursor: string = PositionSource.FIRST; 110 | 111 | // When the user deliberately moves their cursor to `cursorIndex`: 112 | cursor = Cursors.fromIndex(cursorIndex, myListPositions); 113 | // Or run the algorithm in the `Cursors.fromIndex` docs. 114 | 115 | // When the text changes, update the displayed cursor: 116 | cursorIndex = Cursors.toIndex(cursor, myListPositions); 117 | // Or run the query in the `Cursors.toIndex` docs. 118 | ``` 119 | 120 | ## API 121 | 122 | - [Class `PositionSource`](#class-positionsource) 123 | - [Function `findPosition`](#function-findposition) 124 | - [Class `Cursors`](#class-cursors) 125 | - [Class `IDs`](#class-ids) 126 | 127 | ### Class `PositionSource` 128 | 129 | #### constructor 130 | 131 | ```ts 132 | constructor(options?: { ID?: string }) 133 | ``` 134 | 135 | Constructs a new `PositionSource`. 136 | 137 | It is okay to share a single `PositionSource` between 138 | all documents (lists/text strings) in the same JavaScript runtime. 139 | 140 | For efficiency (shorter position strings), 141 | within each JavaScript runtime, you should not use 142 | more than one `PositionSource` for the same document. 143 | An exception is if multiple logical users share the same runtime; 144 | we then recommend one `PositionSource` per user. 145 | 146 | _@param_ `options.ID` A unique ID for this `PositionSource`. Defaults to 147 | `IDs.random()`. 148 | 149 | If provided, `options.ID` must satisfy: 150 | 151 | - It is unique across the entire collaborative application, i.e., 152 | all `PositionSource`s whose positions may be compared to ours. This 153 | includes past `PositionSource`s, even if they correspond to the same 154 | user/device. 155 | - It does not contain `','` or `'.'`. 156 | - The first character is lexicographically less than `'~'` (code point 126). 157 | 158 | If `options.ID` contains non-alphanumeric characters, then created 159 | positions will contain those characters in addition to 160 | alphanumeric characters, `','`, and `'.'`. 161 | 162 | #### createBetween 163 | 164 | ```ts 165 | createBetween( 166 | left: string = PositionSource.FIRST, 167 | right: string = PositionSource.LAST 168 | ): string 169 | ``` 170 | 171 | Returns a new position between `left` and `right` 172 | (`left < new < right`). 173 | 174 | The new position is unique across the entire collaborative application, 175 | even in the face of concurrent calls to this method on other 176 | `PositionSource`s. 177 | 178 | _@param_ `left` Defaults to `PositionSource.FIRST` (insert at the beginning). 179 | 180 | _@param_ `right` Defaults to `PositionSource.LAST` (insert at the end). 181 | 182 | #### Properties 183 | 184 | ```ts 185 | readonly ID: string 186 | ``` 187 | 188 | The unique ID for this `PositionSource`. 189 | 190 | ```ts 191 | static readonly FIRST: string = "" 192 | ``` 193 | 194 | A string that is less than all positions. 195 | 196 | ```ts 197 | static readonly LAST: string = "~" 198 | ``` 199 | 200 | A string that is greater than all positions. 201 | 202 | ### Function `findPosition` 203 | 204 | ```ts 205 | function findPosition( 206 | position: string, 207 | positions: ArrayLike 208 | ): { index: number; isPresent: boolean }; 209 | ``` 210 | 211 | Returns `{ index, isPresent }`, where: 212 | 213 | - `index` is the current index of `position` in `positions`, 214 | or where it would be if added. 215 | - `isPresent` is true if `position` is present in `positions`. 216 | 217 | If this method is inconvenient (e.g., the positions are in a database 218 | instead of an array), you can instead compute 219 | `index` by finding the number of positions less than `position`. 220 | For example, in SQL, use: 221 | 222 | ```sql 223 | SELECT COUNT(*) FROM table WHERE position < $position 224 | ``` 225 | 226 | See also: `Cursors.toIndex`. 227 | 228 | _@param_ `positions` The target list's positions, in lexicographic order. 229 | There should be no duplicate positions. 230 | 231 | ### Class `Cursors` 232 | 233 | Utilities for working with cursors in a collaborative list 234 | or text string. 235 | 236 | A cursor points to a particular spot in a list, in between 237 | two list elements (or text characters). This class handles 238 | cursors for lists that use our position strings. 239 | 240 | A cursor is represented as a string. 241 | Specifically, it is the position of the element 242 | to its left, or `PositionSource.FIRST` if it is at the beginning 243 | of the list. If that position is later deleted, the cursor stays the 244 | same, but its index shifts to next element on its left. 245 | 246 | You can use cursor strings as ordinary cursors, selection endpoints, 247 | range endpoints for a comment or formatting span, etc. 248 | 249 | #### fromIndex 250 | 251 | ```ts 252 | static fromIndex(index: number, positions: ArrayLike): string 253 | ``` 254 | 255 | Returns the cursor at `index` within the given list of positions. Invert with `Cursors.toIndex`. 256 | 257 | That is, the cursor is between the list elements at `index - 1` and `index`. 258 | 259 | If this method is inconvenient (e.g., the positions are in a database 260 | instead of an array), you can instead run the following algorithm yourself: 261 | 262 | - If `index` is 0, return `PositionSource.FIRST = ""`. 263 | - Else return `positions[index - 1]`. 264 | 265 | _@param_ `positions` The target list's positions, in lexicographic order. 266 | There should be no duplicate positions. 267 | 268 | #### toIndex 269 | 270 | ```ts 271 | static toIndex(cursor: string, positions: ArrayLike): number 272 | ``` 273 | 274 | Returns the current index of `cursor` within the given list of 275 | positions. Inverse of `Cursors.fromIndex`. 276 | 277 | That is, the cursor is between the list elements at `index - 1` and `index`. 278 | 279 | If this method is inconvenient (e.g., the positions are in a database 280 | instead of an array), you can instead compute 281 | `index` by finding the number of positions less than 282 | or equal to `position`. 283 | For example, in SQL, use: 284 | 285 | ```sql 286 | SELECT COUNT(*) FROM table WHERE position <= $position 287 | ``` 288 | 289 | See also: `findPosition`. 290 | 291 | _@param_ `positions` The target list's positions, in lexicographic order. 292 | There should be no duplicate positions. 293 | 294 | ### Class `IDs` 295 | 296 | Utitilies for generating `PositionSource` IDs (the `options.ID` constructor argument). 297 | 298 | #### random 299 | 300 | ```ts 301 | static random(options?: { length?: number; chars?: string }): string 302 | ``` 303 | 304 | Returns a cryptographically random ID made of alphanumeric characters. 305 | 306 | _@param_ `options.length` The length of the ID, in characters. 307 | Default: `IDs.DEFAULT_LENGTH`. 308 | 309 | _@param_ `options.chars` The characters to draw from. Default: `IDs.DEFAULT_CHARS`. 310 | 311 | If specified, only the first 256 elements are used, and you achieve 312 | about `log_2(chars.length)` bits of entropy per `length`. 313 | 314 | #### pseudoRandom 315 | 316 | ```ts 317 | static pseudoRandom( 318 | rng: seedrandom.prng, 319 | options?: { length?: number; chars?: string } 320 | ): string 321 | ``` 322 | 323 | Returns a psuedorandom ID made of alphanumeric characters, 324 | generated using `rng` from package [seedrandom](https://www.npmjs.com/package/seedrandom). 325 | 326 | > Note: If you install `@types/seedrandom` yourself instead of relying on our 327 | > dependency, install version `2.4.28`, even though `seedrandom` itself 328 | > has version `3.0.5`. 329 | 330 | Pseudorandom IDs with a fixed seed are recommended for 331 | tests and benchmarks, to make them deterministic. 332 | 333 | _@param_ `options.length` The length of the ID, in characters. 334 | Default: `IDs.DEFAULT_LENGTH`. 335 | 336 | _@param_ `options.chars` The characters to draw from. Default: `IDs.DEFAULT_CHARS`. 337 | 338 | If specified, only the first 256 elements are used, and you achieve 339 | about `log_2(chars.length)` bits of entropy per `length`. 340 | 341 | #### validate 342 | 343 | ```ts 344 | static validate(ID: string): void 345 | ``` 346 | 347 | Throws an error if `ID` does not satisfy the 348 | following requirements from `PositionSource`'s constructor: 349 | 350 | - It does not contain `','` or `'.'`. 351 | - The first character is lexicographically less than `'~'` (code point 126). 352 | 353 | #### Properties 354 | 355 | ```ts 356 | static readonly DEFAULT_LENGTH: number = 10 357 | ``` 358 | 359 | The default length of an ID, in characters. 360 | 361 | ```ts 362 | static readonly DEFAULT_CHARS: string = 363 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 364 | ``` 365 | 366 | Default characters used in IDs: alphanumeric chars. 367 | 368 | ## Example App 369 | 370 | [Firebase text-editor](https://firebase-text-editor.herokuapp.com/) uses position-strings to implement collaborative (plain) text editing on top of [Firebase RTDB](https://firebase.google.com/docs/database). Each character is stored together with its position, and a Firebase query is used to list the characters in order. 371 | 372 | The app also demonstrates using `Cursors` to track the local user's selection start and end. 373 | 374 | [Source code](https://github.com/mweidner037/firebase-text-editor/blob/master/src/site/main.ts) 375 | 376 | ## Performance 377 | 378 | _Position string length_ is our main performance metric. This determines the memory, storage, and network overhead due to a collaborative list's positions. 379 | 380 | > Additionally, each `PositionSource` instance uses some memory, and `PositionSource.createBetween` takes some time, but these are usually small enough to ignore. 381 | 382 | To measure position string length in a realistic setting, we benchmark against [Martin Kleppmann's text trace](https://github.com/automerge/automerge-perf). That is, we pretend a user is typing into a collaborative text editor that attaches a position string to each character, then output statistics for those positions. 383 | 384 | For the complete trace (182k positions, 260k total edits) typed by a single `PositionSource`, the average position length is **33 characters**, and the max length is 55. 385 | 386 | For a more realistic scenario with 260 `PositionSource`s (a new one every 1,000 edits), the average position length is **111 characters**, and the max length is 237. "Rotating" `PositionSource`s in this way simulates the effect of multiple users, or a single user who occasionally reloads the page. (The extra length comes from referencing multiple [IDs](#properties) per position: an average of 8 IDs/position x 8 chars/ID = 64 chars/position.) 387 | 388 | If we only consider the first 10,000 edits, the averages decrease to **23 characters** (single `PositionSource`) and **50 characters** (new `PositionSource` every 1,000 edits). 389 | 390 | More stats for these four scenarios are in [stats.md](https://github.com/mweidner037/position-strings/blob/master/stats.md). For full data, run `npm run benchmarks` (after `npm ci`) and look in `benchmark_results/`. 391 | 392 | ### Performance Considerations 393 | 394 | - In realistic scenarios with multiple `PositionSource`s, most of the positions' length comes from referencing [IDs](#properties). By default, IDs are 8 random alphanumeric characters to give a low probability of collisions, but you can pass your own shorter IDs to [`PositionSource`'s constructor](#constructor). For example, you could assign IDs sequentially from a server. 395 | - A set of positions from the same list compress reasonably well together, since they represent different paths in the same tree. In particular, a list's worth of positions should compress well under gzip or prefix compression. However, compressing individual positions is not recommended. 396 | - [`PositionSource.createBetween`](#createbetween) is optimized for left-to-right insertions. If you primarily insert right-to-left or at random, you will see worse performance. 397 | --------------------------------------------------------------------------------