├── .eslintignore
├── .prettierignore
├── .gitignore
├── tsconfig.commonjs.json
├── tsconfig.dev.json
├── src
    ├── index.ts
    ├── util.ts
    ├── find_position.ts
    ├── cursors.ts
    ├── ids.ts
    └── position_source.ts
├── .mocharc.jsonc
├── tsconfig.json
├── test
    ├── ids.test.ts
    ├── util.ts
    ├── find_position.test.ts
    ├── fuzz.test.ts
    ├── cursors.test.ts
    └── manual.test.ts
├── LICENSE
├── .eslintrc.js
├── package.json
├── stats.md
├── algorithm.md
├── benchmarks
    └── main.ts
└── README.md


/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | build
3 | test
4 | benchmarks
5 | *.js
6 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | /build/*
2 | /benchmark_results/*
3 | real_text_trace_edits.json
4 | LICENSE
5 | stats.md


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules
3 | build
4 | .vscode/
5 | .idea/
6 | *.tsbuildinfo
7 | benchmark_results


--------------------------------------------------------------------------------
/tsconfig.commonjs.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "./tsconfig.json",
3 |   "compilerOptions": {
4 |     "module": "commonjs",
5 |     "outDir": "build/commonjs"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tsconfig.dev.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "./tsconfig.commonjs.json",
3 |   "compilerOptions": { "resolveJsonModule": true },
4 |   "include": ["src", "test", "benchmarks"]
5 | }
6 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./cursors";
2 | export * from "./ids";
3 | export * from "./position_source";
4 | export * from "./find_position";
5 | // utils.ts is internal only.
6 | 


--------------------------------------------------------------------------------
/.mocharc.jsonc:
--------------------------------------------------------------------------------
 1 | {
 2 |   // To run tests written in TypeScript without compiling them, we need to run them in ts-node.
 3 |   "require": ["ts-node/register"],
 4 | 
 5 |   // Any *.test.ts file in ./test will be run as a test
 6 |   "spec": "test/**/*.test.ts",
 7 | 
 8 |   // A change in sources or tests should trigger test re-run
 9 |   "watch-files": ["test/**", "src/**"]
10 | }
11 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "rootDir": "src",
 4 |     "outDir": "build/esm",
 5 |     "target": "es2021",
 6 |     "module": "es2015",
 7 |     /* Needed with module: es2015 or else stuff breaks. */
 8 |     "moduleResolution": "node",
 9 |     /* Enable strict type checking. */
10 |     "strict": true,
11 |     /* Enable interop with dependencies using different module systems. */
12 |     "esModuleInterop": true,
13 |     /* Emit .d.ts files. */
14 |     "declaration": true,
15 |     /* Emit sourcemap files. */
16 |     "sourceMap": true
17 |     /* Don't turn on importHelpers, so we can avoid tslib dependency. */
18 |   },
19 |   "include": ["src"]
20 | }
21 | 


--------------------------------------------------------------------------------
/test/ids.test.ts:
--------------------------------------------------------------------------------
 1 | import { assert } from "chai";
 2 | import { PositionSource } from "../src";
 3 | 
 4 | describe("IDs", () => {
 5 |   describe("validate", () => {
 6 |     it("rejects period", () => {
 7 |       assert.throws(() => new PositionSource({ ID: "ali.ce" }));
 8 |     });
 9 | 
10 |     it("rejects comma", () => {
11 |       assert.throws(() => new PositionSource({ ID: "ali,ce" }));
12 |     });
13 | 
14 |     it("rejects LAST or greater", () => {
15 |       assert.throws(() => new PositionSource({ ID: PositionSource.LAST }));
16 |       assert.throws(
17 |         () => new PositionSource({ ID: PositionSource.LAST + "alice" })
18 |       );
19 |     });
20 |   });
21 | });
22 | 


--------------------------------------------------------------------------------
/src/util.ts:
--------------------------------------------------------------------------------
 1 | export function precond(
 2 |   statement: boolean,
 3 |   message: string,
 4 |   ...optionalParams: unknown[]
 5 | ): asserts statement is true {
 6 |   if (!statement) {
 7 |     if (optionalParams.length === 0) {
 8 |       throw new Error(message);
 9 |     } else {
10 |       throw new Error(
11 |         message + " " + optionalParams.map((value) => String(value)).join(" ")
12 |       );
13 |     }
14 |   }
15 | }
16 | 
17 | export function assert(
18 |   statement: boolean,
19 |   message?: string,
20 |   ...optionalParams: unknown[]
21 | ): asserts statement is true {
22 |   if (!statement) {
23 |     if (message === undefined) {
24 |       precond(statement, "Assertion failed", ...optionalParams);
25 |     } else {
26 |       precond(statement, "Assertion failed: " + message, ...optionalParams);
27 |     }
28 |   }
29 | }
30 | 
31 | /**
32 |  * [[PositionSource.LAST]] copy that avoids circular dependencies
33 |  * (PositionSource <-> IDs).
34 |  */
35 | export const LastInternal = "~";
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |  The MIT License (MIT)
 2 | 
 3 | Copyright © 2023 Matthew Weidner
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/test/util.ts:
--------------------------------------------------------------------------------
 1 | import { assert } from "chai";
 2 | import seedrandom from "seedrandom";
 3 | import { IDs, PositionSource } from "../src";
 4 | 
 5 | export function assertIsOrdered(list: string[]) {
 6 |   for (let i = 0; i < list.length - 1; i++) {
 7 |     assert(list[i] < list[i + 1], `Out of order: ${list[i]} !< ${list[i + 1]}`);
 8 |   }
 9 | }
10 | 
11 | export function newSources(
12 |   rng: seedrandom.prng,
13 |   count: number
14 | ): PositionSource[] {
15 |   const sources: PositionSource[] = [];
16 |   for (let i = 0; i < count; i++) {
17 |     sources.push(new PositionSource({ ID: IDs.pseudoRandom(rng) }));
18 |   }
19 |   return sources;
20 | }
21 | 
22 | export function testUniqueAfterDelete(list: string[], source: PositionSource) {
23 |   // In each slot, create two positions with same left & right,
24 |   // simulating that the first was deleted. Then make sure they
25 |   // are still distinct, in case the first is resurrected.
26 |   for (let i = 0; i <= list.length; i++) {
27 |     const a = source.createBetween(list[i - 1], list[i]);
28 |     const b = source.createBetween(list[i - 1], list[i]);
29 |     assert.notStrictEqual(a, b);
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/find_position.ts:
--------------------------------------------------------------------------------
 1 | import { assert } from "./util";
 2 | 
 3 | /**
 4 |  * Returns `{ index, isPresent }`, where:
 5 |  * - `index` is the current index of `position` in `positions`,
 6 |  * or where it would be if added.
 7 |  * - `isPresent` is true if `position` is present in `positions`.
 8 |  *
 9 |  * If this method is inconvenient (e.g., the positions are in a database
10 |  * instead of an array), you can instead compute
11 |  * `index` by finding the number of positions less than `position`.
12 |  * For example, in SQL, use:
13 |  * ```sql
14 |  * SELECT COUNT(*) FROM table WHERE position < $position
15 |  * ```
16 |  *
17 |  * See also: `Cursors.toIndex`.
18 |  *
19 |  * @param positions The target list's positions, in lexicographic order.
20 |  * There should be no duplicate positions.
21 |  */
22 | export function findPosition(
23 |   position: string,
24 |   positions: ArrayLike<string>
25 | ): { index: number; isPresent: boolean } {
26 |   // Binary search: index is the "rank" of position, computed using
27 |   // https://en.wikipedia.org/wiki/Binary_search_algorithm#Procedure_for_finding_the_leftmost_element
28 |   let L = 0;
29 |   let R = positions.length;
30 |   while (L < R) {
31 |     const m = Math.floor((L + R) / 2);
32 |     if (positions[m] < position) L = m + 1;
33 |     else R = m;
34 |   }
35 | 
36 |   assert(
37 |     (L === 0 || positions[L - 1] < position) &&
38 |       (L === positions.length || positions[L] >= position),
39 |     "Bad binary search (positions out of order?):",
40 |     position,
41 |     L
42 |   );
43 |   return { index: L, isPresent: positions[L] === position };
44 | }
45 | 


--------------------------------------------------------------------------------
/test/find_position.test.ts:
--------------------------------------------------------------------------------
 1 | import { assert } from "chai";
 2 | import seedrandom from "seedrandom";
 3 | import { findPosition, IDs, PositionSource } from "../src";
 4 | 
 5 | describe("findPosition", () => {
 6 |   let rng!: seedrandom.prng;
 7 |   let source!: PositionSource;
 8 | 
 9 |   beforeEach(() => {
10 |     rng = seedrandom("42");
11 |     source = new PositionSource({ ID: IDs.pseudoRandom(rng) });
12 |   });
13 | 
14 |   function testLength(len: number) {
15 |     let list!: string[];
16 | 
17 |     describe(`length ${len}`, () => {
18 |       beforeEach(() => {
19 |         list = [];
20 |         for (let i = 0; i < len; i++) {
21 |           list.push(source.createBetween(list.at(-1), undefined));
22 |         }
23 |       });
24 | 
25 |       it("present", () => {
26 |         for (let i = 0; i < list.length; i++) {
27 |           assert.deepStrictEqual(findPosition(list[i], list), {
28 |             index: i,
29 |             isPresent: true,
30 |           });
31 |         }
32 |       });
33 | 
34 |       it("not present", () => {
35 |         for (let i = 0; i <= list.length; i++) {
36 |           const newPos = source.createBetween(list[i - 1], list[i]);
37 |           // newPos would be at index i if present (between the current
38 |           // i - 1 & i).
39 |           assert.deepStrictEqual(findPosition(newPos, list), {
40 |             index: i,
41 |             isPresent: false,
42 |           });
43 |         }
44 |       });
45 |     });
46 |   }
47 | 
48 |   testLength(32);
49 |   testLength(31);
50 |   testLength(33);
51 |   testLength(23);
52 |   testLength(1);
53 |   testLength(0);
54 | });
55 | 


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   root: true,
 3 |   env: {
 4 |     browser: true,
 5 |     es2021: true,
 6 |     node: true,
 7 |   },
 8 |   parser: "@typescript-eslint/parser",
 9 |   parserOptions: {
10 |     tsconfigRootDir: __dirname,
11 |     project: ["./tsconfig.json"],
12 |     sourceType: "module",
13 |   },
14 |   plugins: ["@typescript-eslint", "import"],
15 |   extends: [
16 |     "eslint:recommended",
17 |     "plugin:@typescript-eslint/recommended",
18 |     "plugin:@typescript-eslint/recommended-requiring-type-checking",
19 |     "plugin:import/typescript",
20 |     "prettier",
21 |   ],
22 |   rules: {
23 |     // Allow inference in function return type.
24 |     "@typescript-eslint/explicit-function-return-type": "off",
25 |     "@typescript-eslint/explicit-module-boundary-types": "off",
26 |     // I like non-null assertions.
27 |     "@typescript-eslint/no-non-null-assertion": "off",
28 |     // Disallow default exports; only allow named exports.
29 |     "import/no-default-export": "error",
30 |     // Impose alphabetically ordered imports.
31 |     "import/order": "error",
32 |     // Allow implicit string casts in template literals.
33 |     "@typescript-eslint/restrict-template-expressions": "off",
34 |     // Allow ts-ignore with justification.
35 |     "@typescript-eslint/ban-ts-comment": [
36 |       "error",
37 |       {
38 |         "ts-expect-error": "allow-with-description",
39 |       },
40 |     ],
41 |     "@typescript-eslint/no-unused-vars": [
42 |       "warn",
43 |       {
44 |         // Allow unused parameter names that start with _,
45 |         // like TypeScript does.
46 |         argsIgnorePattern: "^_",
47 |       },
48 |     ],
49 |   },
50 | };
51 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "position-strings",
 3 |   "version": "2.0.1",
 4 |   "description": "Lexicographically-ordered position strings for collaborative lists and text",
 5 |   "author": "Matthew Weidner",
 6 |   "license": "MIT",
 7 |   "bugs": {
 8 |     "url": "https://github.com/mweidner037/position-strings/issues"
 9 |   },
10 |   "homepage": "https://github.com/mweidner037/position-strings/tree/master/#readme",
11 |   "repository": {
12 |     "type": "git",
13 |     "url": "git+https://github.com/mweidner037/position-strings.git"
14 |   },
15 |   "keywords": [
16 |     "CRDT",
17 |     "text editing",
18 |     "collaboration",
19 |     "fractional indexing"
20 |   ],
21 |   "module": "build/esm/index.js",
22 |   "browser": "build/esm/index.js",
23 |   "types": "build/esm/index.d.ts",
24 |   "main": "build/commonjs/index.js",
25 |   "files": [
26 |     "/build",
27 |     "/src"
28 |   ],
29 |   "directories": {
30 |     "lib": "src"
31 |   },
32 |   "publishConfig": {
33 |     "access": "public"
34 |   },
35 |   "sideEffects": false,
36 |   "dependencies": {
37 |     "@types/seedrandom": "^2.4.28"
38 |   },
39 |   "devDependencies": {
40 |     "@types/chai": "^4.3.4",
41 |     "@types/functional-red-black-tree": "^1.0.2",
42 |     "@types/mocha": "^10.0.1",
43 |     "@typescript-eslint/eslint-plugin": "^5.52.0",
44 |     "@typescript-eslint/parser": "^5.52.0",
45 |     "chai": "^4.3.7",
46 |     "cross-env": "^7.0.3",
47 |     "eslint": "^8.34.0",
48 |     "eslint-config-prettier": "^8.6.0",
49 |     "eslint-plugin-import": "^2.27.5",
50 |     "functional-red-black-tree": "^1.0.1",
51 |     "mocha": "^10.2.0",
52 |     "npm-run-all": "^4.1.5",
53 |     "prettier": "^2.8.4",
54 |     "rimraf": "^4.1.2",
55 |     "seedrandom": "^3.0.5",
56 |     "ts-node": "^10.9.1",
57 |     "typescript": "^4.9.5"
58 |   },
59 |   "scripts": {
60 |     "prepack": "npm run clean && npm run build && npm run test",
61 |     "build": "npm-run-all build:*",
62 |     "build:ts": "tsc -p tsconfig.json && tsc -p tsconfig.commonjs.json",
63 |     "test": "npm-run-all test:*",
64 |     "test:lint": "eslint --ext .ts,.js .",
65 |     "test:unit": "cross-env TS_NODE_PROJECT='./tsconfig.dev.json' mocha",
66 |     "test:format": "prettier --check .",
67 |     "fix": "npm-run-all fix:*",
68 |     "fix:format": "prettier --write .",
69 |     "benchmarks": "ts-node --project tsconfig.dev.json benchmarks/main.ts",
70 |     "clean": "rimraf generated generated_esm build"
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/stats.md:
--------------------------------------------------------------------------------
  1 | 
  2 | > position-strings@2.0.0 benchmarks
  3 | > ts-node --project tsconfig.dev.json benchmarks/main.ts
  4 | 
  5 | ## Run: all ops; rotate never
  6 | 
  7 | ### length
  8 | 
  9 | - Average: 33
 10 | - Median: 32
 11 | - 99th percentile: 51
 12 | - Max: 55
 13 | 
 14 | ### longNames
 15 | 
 16 | - Average: 1
 17 | - Median: 1
 18 | - 99th percentile: 1
 19 | - Max: 1
 20 | 
 21 | ### waypoints
 22 | 
 23 | - Average: 9
 24 | - Median: 8
 25 | - 99th percentile: 15
 26 | - Max: 17
 27 | 
 28 | ### valueIndex
 29 | 
 30 | - Average: 615
 31 | - Median: 208
 32 | - 99th percentile: 5780
 33 | - Max: 7603
 34 | 
 35 | ### PositionSource memory usage
 36 | 
 37 | - Map size: 3333
 38 | - Sum of map key lengths: 112034
 39 | 
 40 | ## Run: all ops; rotate every 1000 ops
 41 | 
 42 | ### length
 43 | 
 44 | - Average: 111
 45 | - Median: 109
 46 | - 99th percentile: 206
 47 | - Max: 237
 48 | 
 49 | ### longNames
 50 | 
 51 | - Average: 8
 52 | - Median: 8
 53 | - 99th percentile: 16
 54 | - Max: 18
 55 | 
 56 | ### waypoints
 57 | 
 58 | - Average: 13
 59 | - Median: 13
 60 | - 99th percentile: 24
 61 | - Max: 26
 62 | 
 63 | ### valueIndex
 64 | 
 65 | - Average: 185
 66 | - Median: 108
 67 | - 99th percentile: 851
 68 | - Max: 999
 69 | 
 70 | ### PositionSource memory usage
 71 | 
 72 | - Map size: 20
 73 | - Sum of map key lengths: 2574
 74 | 
 75 | ## Run: 10000 ops; rotate never
 76 | 
 77 | ### length
 78 | 
 79 | - Average: 23
 80 | - Median: 25
 81 | - 99th percentile: 32
 82 | - Max: 35
 83 | 
 84 | ### longNames
 85 | 
 86 | - Average: 1
 87 | - Median: 1
 88 | - 99th percentile: 1
 89 | - Max: 1
 90 | 
 91 | ### waypoints
 92 | 
 93 | - Average: 5
 94 | - Median: 6
 95 | - 99th percentile: 8
 96 | - Max: 9
 97 | 
 98 | ### valueIndex
 99 | 
100 | - Average: 293
101 | - Median: 183
102 | - 99th percentile: 1029
103 | - Max: 1069
104 | 
105 | ### PositionSource memory usage
106 | 
107 | - Map size: 151
108 | - Sum of map key lengths: 3666
109 | 
110 | ## Run: 10000 ops; rotate every 1000 ops
111 | 
112 | ### length
113 | 
114 | - Average: 50
115 | - Median: 49
116 | - 99th percentile: 86
117 | - Max: 86
118 | 
119 | ### longNames
120 | 
121 | - Average: 3
122 | - Median: 3
123 | - 99th percentile: 6
124 | - Max: 6
125 | 
126 | ### waypoints
127 | 
128 | - Average: 7
129 | - Median: 7
130 | - 99th percentile: 11
131 | - Max: 12
132 | 
133 | ### valueIndex
134 | 
135 | - Average: 173
136 | - Median: 113
137 | - 99th percentile: 686
138 | - Max: 759
139 | 
140 | ### PositionSource memory usage
141 | 
142 | - Map size: 7
143 | - Sum of map key lengths: 580
144 | 
145 | 


--------------------------------------------------------------------------------
/src/cursors.ts:
--------------------------------------------------------------------------------
 1 | import { findPosition } from "./find_position";
 2 | import { PositionSource } from "./position_source";
 3 | import { precond } from "./util";
 4 | 
 5 | /**
 6 |  * Utilities for working with cursors in a collaborative list
 7 |  * or text string.
 8 |  *
 9 |  * A *cursor* points to a particular spot in a list, in between
10 |  * two list elements (or text characters). This class handles
11 |  * cursors for lists that use our position strings.
12 |  *
13 |  * A cursor is represented as a string.
14 |  * Specifically, it is the position of the element
15 |  * to its left, or `PositionSource.FIRST` if it is at the beginning
16 |  * of the list. If that position is later deleted, the cursor stays the
17 |  * same, but its index shifts to next element on its left.
18 |  *
19 |  * You can use cursor strings as ordinary cursors, selection endpoints,
20 |  * range endpoints for a comment or formatting span, etc.
21 |  */
22 | export class Cursors {
23 |   private constructor() {
24 |     // Not instantiable.
25 |   }
26 | 
27 |   /**
28 |    * Returns the cursor at `index` within the given list of positions. Invert with `Cursors.toIndex`.
29 |    *
30 |    * That is, the cursor is between the list elements at `index - 1` and `index`.
31 |    *
32 |    * If this method is inconvenient (e.g., the positions are in a database
33 |    * instead of an array), you can instead run the following algorithm yourself:
34 |    * - If `index` is 0, return `PositionSource.FIRST = ""`.
35 |    * - Else return `positions[index - 1]`.
36 |    *
37 |    * @param positions The target list's positions, in lexicographic order.
38 |    * There should be no duplicate positions.
39 |    */
40 |   static fromIndex(index: number, positions: ArrayLike<string>): string {
41 |     precond(
42 |       index >= 0 && index <= positions.length,
43 |       "Index out of bounds:",
44 |       index,
45 |       positions.length
46 |     );
47 |     return index === 0 ? PositionSource.FIRST : positions[index - 1];
48 |   }
49 | 
50 |   /**
51 |    * Returns the current index of `cursor` within the given list of
52 |    * positions. Inverse of `Cursors.fromIndex`.
53 |    *
54 |    * That is, the cursor is between the list elements at `index - 1` and `index`.
55 |    *
56 |    * If this method is inconvenient (e.g., the positions are in a database
57 |    * instead of an array), you can instead compute
58 |    * `index` by finding the number of positions less than
59 |    * or equal to `position`.
60 |    * For example, in SQL, use:
61 |    * ```sql
62 |    * SELECT COUNT(*) FROM table WHERE position <= $position
63 |    * ```
64 |    *
65 |    * See also: `findPosition`.
66 |    *
67 |    * @param positions The target list's positions, in lexicographic order.
68 |    * There should be no duplicate positions.
69 |    */
70 |   static toIndex(cursor: string, positions: ArrayLike<string>): number {
71 |     const { index, isPresent } = findPosition(cursor, positions);
72 |     // findPosition gives < elements, but we want <= elements.
73 |     // So if there's an == element, add 1.
74 |     return isPresent ? index + 1 : index;
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/test/fuzz.test.ts:
--------------------------------------------------------------------------------
 1 | import seedrandom from "seedrandom";
 2 | import { assertIsOrdered, newSources, testUniqueAfterDelete } from "./util";
 3 | 
 4 | describe("fuzz", () => {
 5 |   describe("sequential", () => {
 6 |     describe("1 user", () => sequential(1));
 7 |     describe("10 users", () => sequential(10));
 8 |   });
 9 | });
10 | 
11 | function sequential(numUsers: number) {
12 |   let rng!: seedrandom.prng;
13 | 
14 |   beforeEach(() => {
15 |     rng = seedrandom("42");
16 |   });
17 | 
18 |   it("random", () => {
19 |     const sources = newSources(rng, numUsers);
20 | 
21 |     // Randomly create positions in a single list, simulating sequential access.
22 |     const list: string[] = [];
23 |     for (let i = 0; i < 1000; i++) {
24 |       const source = sources[Math.floor(rng() * sources.length)];
25 |       const index = Math.floor(rng() * (list.length + 1));
26 |       // Out-of-bounds okay.
27 |       const newPosition = source.createBetween(list[index - 1], list[index]);
28 |       list.splice(index, 0, newPosition);
29 |     }
30 | 
31 |     assertIsOrdered(list);
32 |     testUniqueAfterDelete(list, sources[0]);
33 |   });
34 | 
35 |   it("random LtR runs", () => {
36 |     const sources = newSources(rng, numUsers);
37 | 
38 |     // Randomly create positions in a single list, simulating sequential access.
39 |     // This time, create short LtR runs at a time.
40 |     const list: string[] = [];
41 |     for (let i = 0; i < 200; i++) {
42 |       const source = sources[Math.floor(rng() * sources.length)];
43 |       const index = Math.floor(rng() * (list.length + 1));
44 |       // Out-of-bounds okay.
45 |       for (let j = 0; j < 5; j++) {
46 |         const newPosition = source.createBetween(
47 |           list[index - 1 + j],
48 |           list[index + j]
49 |         );
50 |         list.splice(index + j, 0, newPosition);
51 |       }
52 |     }
53 | 
54 |     assertIsOrdered(list);
55 |     testUniqueAfterDelete(list, sources[0]);
56 |   });
57 | 
58 |   it("random RtL runs", () => {
59 |     const sources = newSources(rng, numUsers);
60 | 
61 |     // Randomly create positions in a single list, simulating sequential access.
62 |     // This time, create short RtL runs at a time.
63 |     const list: string[] = [];
64 |     for (let i = 0; i < 200; i++) {
65 |       const source = sources[Math.floor(rng() * sources.length)];
66 |       const index = Math.floor(rng() * (list.length + 1));
67 |       // Out-of-bounds okay.
68 |       for (let j = 0; j < 5; j++) {
69 |         const newPosition = source.createBetween(list[index - 1], list[index]);
70 |         list.splice(index, 0, newPosition);
71 |       }
72 |     }
73 | 
74 |     assertIsOrdered(list);
75 |     testUniqueAfterDelete(list, sources[0]);
76 |   });
77 | 
78 |   it("biased", () => {
79 |     const sources = newSources(rng, numUsers);
80 | 
81 |     // Randomly create positions in a single list, simulating sequential access.
82 |     // This time, bias towards smaller indices using a sqrt.
83 |     const list: string[] = [];
84 |     for (let i = 0; i < 1000; i++) {
85 |       const source =
86 |         sources[Math.floor(Math.sqrt(rng() * sources.length * sources.length))];
87 |       const index = Math.floor(rng() * (list.length + 1));
88 |       // Out-of-bounds okay.
89 |       const newPosition = source.createBetween(list[index - 1], list[index]);
90 |       list.splice(index, 0, newPosition);
91 |     }
92 | 
93 |     assertIsOrdered(list);
94 |     testUniqueAfterDelete(list, sources[0]);
95 |   });
96 | }
97 | 


--------------------------------------------------------------------------------
/test/cursors.test.ts:
--------------------------------------------------------------------------------
  1 | import { assert } from "chai";
  2 | import seedrandom from "seedrandom";
  3 | import { Cursors, IDs, PositionSource } from "../src";
  4 | 
  5 | describe("Cursors", () => {
  6 |   let rng!: seedrandom.prng;
  7 |   let source!: PositionSource;
  8 | 
  9 |   beforeEach(() => {
 10 |     rng = seedrandom("42");
 11 |     source = new PositionSource({ ID: IDs.pseudoRandom(rng) });
 12 |   });
 13 | 
 14 |   function testLength(len: number) {
 15 |     let list!: string[];
 16 | 
 17 |     describe(`length ${len}`, () => {
 18 |       beforeEach(() => {
 19 |         list = [];
 20 |         for (let i = 0; i < len; i++) {
 21 |           list.push(source.createBetween(list.at(-1), undefined));
 22 |         }
 23 |       });
 24 | 
 25 |       it("present", () => {
 26 |         for (let i = 0; i <= list.length; i++) {
 27 |           const cursor = Cursors.fromIndex(i, list);
 28 |           assert.strictEqual(Cursors.toIndex(cursor, list), i);
 29 |           if (i !== 0) {
 30 |             // Insert a char in the next gap to the left, shifting the cursor.
 31 |             const list2 = [
 32 |               ...list.slice(0, i - 1),
 33 |               source.createBetween(list[i - 2], list[i - 1]),
 34 |               ...list.slice(i - 1),
 35 |             ];
 36 |             assert.strictEqual(Cursors.toIndex(cursor, list2), i + 1);
 37 |           }
 38 |           if (i !== list.length) {
 39 |             // Insert a char in the next gap to the right, which shouldn't shift the cursor.
 40 |             const list3 = [
 41 |               ...list.slice(0, i + 1),
 42 |               source.createBetween(list[i], list[i + 1]),
 43 |               ...list.slice(i),
 44 |             ];
 45 |             assert.strictEqual(Cursors.toIndex(cursor, list3), i);
 46 |           }
 47 |           // Insert a char in the cursor's gap, which
 48 |           // still shouldn't shift the cursor, since we
 49 |           // bind to the left char.
 50 |           const list4 = [
 51 |             ...list.slice(0, i),
 52 |             source.createBetween(list[i - 1], list[i]),
 53 |             ...list.slice(i),
 54 |           ];
 55 |           assert.strictEqual(Cursors.toIndex(cursor, list4), i);
 56 |         }
 57 |       });
 58 | 
 59 |       it("not present", () => {
 60 |         for (let i = 0; i <= list.length; i++) {
 61 |           // Set the cursor to a new position that we "delete"
 62 |           // (actually just leave not-present) in list.
 63 |           const listExtended = [
 64 |             ...list.slice(0, i),
 65 |             source.createBetween(list[i - 1], list[i]),
 66 |             ...list.slice(i),
 67 |           ];
 68 |           const cursor = Cursors.fromIndex(i + 1, listExtended);
 69 | 
 70 |           // In list, the index falls back by 1 to i.
 71 |           assert.strictEqual(Cursors.toIndex(cursor, list), i);
 72 |           if (i !== 0) {
 73 |             // Insert a char in the next gap to the left, shifting the cursor.
 74 |             const list2 = [
 75 |               ...list.slice(0, i - 1),
 76 |               source.createBetween(list[i - 2], list[i - 1]),
 77 |               ...list.slice(i - 1),
 78 |             ];
 79 |             assert.strictEqual(Cursors.toIndex(cursor, list2), i + 1);
 80 |           }
 81 |           if (i !== list.length) {
 82 |             // Insert a char in the next gap to the right, which shouldn't shift the cursor.
 83 |             const list3 = [
 84 |               ...list.slice(0, i + 1),
 85 |               source.createBetween(list[i], list[i + 1]),
 86 |               ...list.slice(i),
 87 |             ];
 88 |             assert.strictEqual(Cursors.toIndex(cursor, list3), i);
 89 |           }
 90 |           // Insert a char in the cursor's gap, which
 91 |           // may or may not shift the cursor, depending on how
 92 |           // the new position compares to the cursor's.
 93 |           const list4 = [
 94 |             ...list.slice(0, i),
 95 |             source.createBetween(list[i - 1], list[i]),
 96 |             ...list.slice(i),
 97 |           ];
 98 |           const index4 = Cursors.toIndex(cursor, list4);
 99 |           assert(i <= index4 && index4 <= i + 1);
100 |         }
101 |       });
102 |     });
103 |   }
104 | 
105 |   testLength(32);
106 |   testLength(31);
107 |   testLength(33);
108 |   testLength(23);
109 |   testLength(1);
110 |   testLength(0);
111 | });
112 | 


--------------------------------------------------------------------------------
/src/ids.ts:
--------------------------------------------------------------------------------
  1 | import * as crypto from "crypto";
  2 | import type seedrandom from "seedrandom";
  3 | import { LastInternal, precond } from "./util";
  4 | 
  5 | /**
  6 |  * Utitilies for generating `PositionSource` IDs
  7 |  * (the `options.ID` constructor argument).
  8 |  */
  9 | export class IDs {
 10 |   private constructor() {
 11 |     // Not instantiable.
 12 |   }
 13 | 
 14 |   /**
 15 |    * Default characters used in IDs: alphanumeric chars.
 16 |    */
 17 |   static readonly DEFAULT_CHARS: string =
 18 |     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
 19 | 
 20 |   // Rationale for value 8:
 21 |   // Each character of the ID gives us ~6 bits of entropy,
 22 |   //  for a total of ~48 bits.  This gives a < 1%
 23 |   // probability that two connected `PositionSource`s
 24 |   // will ever choose the same IDs, even if we
 25 |   // consider the total probability across 100,000,000
 26 |   // documents with 1,000 IDs each
 27 |   // (= 10 users x 100 days x 1 ID/user/day).
 28 |   /**
 29 |    * The default length of an ID, in characters.
 30 |    */
 31 |   static readonly DEFAULT_LENGTH: number = 8;
 32 | 
 33 |   /**
 34 |    * Returns a cryptographically random ID made of alphanumeric characters.
 35 |    *
 36 |    * @param options.length The length of the ID, in characters.
 37 |    * Default: `IDs.DEFAULT_LENGTH`.
 38 |    * @param options.chars The characters to draw from. Default: `IDs.DEFAULT_CHARS`.
 39 |    *
 40 |    * If specified, only the first 256 elements are used, and you achieve
 41 |    * about `log_2(chars.length)` bits of entropy per `length`.
 42 |    */
 43 |   static random(options?: { length?: number; chars?: string }): string {
 44 |     const length = options?.length ?? this.DEFAULT_LENGTH;
 45 |     const chars = options?.chars ?? this.DEFAULT_CHARS;
 46 | 
 47 |     const arr = new Array<string>(length);
 48 |     let randomValues = new Uint8Array(length);
 49 |     if (typeof window === "undefined") {
 50 |       // Use Node crypto library.
 51 |       // We use eval("require") to prevent Webpack from attempting
 52 |       // to bundle the crypto module and complaining.
 53 |       // In theory we should also be able to do this by
 54 |       // adding "browser": {"crypto": false} to package.json,
 55 |       // but that is not working, and besides, every user
 56 |       // of this package would have to remember to do so.
 57 |       // See https://github.com/webpack/webpack/issues/8826
 58 |       const cryptoReal = <typeof crypto>(
 59 |         (<typeof require>eval("require"))("crypto")
 60 |       );
 61 |       const randomBuffer = cryptoReal.randomBytes(length);
 62 |       randomValues = new Uint8Array(randomBuffer);
 63 |     } else {
 64 |       // Use browser crypto library.
 65 |       window.crypto.getRandomValues(randomValues);
 66 |     }
 67 |     for (let i = 0; i < length; i++) {
 68 |       // This will be biased if chars.length does not divide 256,
 69 |       // but it will still give at least floor(log_2(chars.length))
 70 |       // bits of entropy.
 71 |       arr[i] = chars[randomValues[i] % chars.length];
 72 |     }
 73 |     return arr.join("");
 74 |   }
 75 | 
 76 |   /**
 77 |    * Returns a psuedorandom ID made of alphanumeric characters,
 78 |    * generated using `rng` from package [seedrandom](https://www.npmjs.com/package/seedrandom).
 79 |    *
 80 |    * Pseudorandom IDs with a fixed seed are recommended for
 81 |    * tests and benchmarks, to make them deterministic.
 82 |    *
 83 |    * @param options.length The length of the ID, in characters.
 84 |    * Default: `IDs.DEFAULT_LENGTH`.
 85 |    * @param options.chars The characters to draw from. Default: `IDs.DEFAULT_CHARS`.
 86 |    *
 87 |    * If specified, only the first 256 elements are used, and you achieve
 88 |    * about `log_2(chars.length)` bits of entropy per `length`.
 89 |    */
 90 |   static pseudoRandom(
 91 |     rng: seedrandom.prng,
 92 |     options?: { length?: number; chars?: string }
 93 |   ): string {
 94 |     const length = options?.length ?? this.DEFAULT_LENGTH;
 95 |     const chars = options?.chars ?? this.DEFAULT_CHARS;
 96 | 
 97 |     const arr = new Array<string>(length);
 98 |     for (let i = 0; i < arr.length; i++) {
 99 |       // Although we could pick chars without bias, we instead use the
100 |       // same bias as `random`, for consistency.
101 |       arr[i] = chars[Math.floor(rng() * 256) % chars.length];
102 |     }
103 |     return arr.join("");
104 |   }
105 | 
106 |   /**
107 |    * Throws an error if `ID` does not satisfy the
108 |    * following requirements from `PositionSource`'s constructor:
109 |    * - It does not contain `','` or `'.'`.
110 |    * - The first character is lexicographically less than `'~'` (code point 126).
111 |    */
112 |   static validate(ID: string): void {
113 |     precond(ID < LastInternal, "ID must be less than", LastInternal, ":", ID);
114 |     precond(!ID.includes(","), "ID must not contain ',':", ID);
115 |     precond(!ID.includes("."), "ID must not contain '.':", ID);
116 |   }
117 | }
118 | 


--------------------------------------------------------------------------------
/algorithm.md:
--------------------------------------------------------------------------------
 1 | # Algorithm
 2 | 
 3 | ## Background
 4 | 
 5 | At a high level, position-strings implements the core of a List CRDT. Each position string corresponds to an element in the list, such that the lexicographic order on strings matches the list order. We don't implement a literal List CRDT with state and operations, but it's straightforward to implement one on top of position-strings.
 6 | 
 7 | More specifically, position-strings is based on [Fugue: A Basic List CRDT](https://mattweidner.com/2022/10/21/basic-list-crdt.html#a-basic-uniquely-dense-total-order). It is an optimized version of that post's [string implementation](https://mattweidner.com/2022/10/21/basic-list-crdt.html#intro-string-implementation), which uses strings to represent paths in a tree. The strings are designed so that their lexicographic order matches the tree's [in-order traversal](https://en.wikipedia.org/wiki/Tree_traversal#In-order,_LNR) order.
 8 | 
 9 | ## Tree Structure
10 | 
11 | position-strings's implicit tree is structured in layers. Each layer has a specific type and can only contain nodes of that type. There are 3 layer types that alternate cyclically (1 -> 2 -> 3 -> 1 -> 2 -> 3 -> ...). Each position string corresponds to a type-3 node, and the string itself encodes the node labels on the path from the root to that node.
12 | 
13 | The 3 node/layer types are:
14 | 
15 | 1. **Waypoint nodes**: Labeled by the ID of the `PositionSource` that created it, sorted arbitrarily. The ID ensures that positions created by different `PositionSource`s are distinct: each `PositionSource` only returns positions whose _final_ waypoint node uses its own ID.
16 | 2. **valueIndex nodes**: Labeled by an integer, sorted by magnitude. When a `PositionSource` creates positions in a left-to-right sequence, instead of appending a new waypoint node each time, it reuses the first waypoint node and just increases the valueIndex. That causes the position string length to grow logarithmically instead of linearly.
17 | 3. **Side nodes**: Labeled by a bit "left side" (0) or "right side" (1). The actual position at a node, and all of the node's right-side descendants, use "right side"; all of its left-side descendants use "left side". This ensures that all left descendants are less than the position at a node, which is less than all right descendants.
18 | 
19 | ### `createBetween`
20 | 
21 | In terms of the tree structure, `PositionSource.createBetween(left, right)` does the following:
22 | 
23 | 1. If `right` is a descendant of `left`, create a left descendant of `right` as follows. First, create a waypoint node that is a left child of `right` (replacing `right`'s final "right side" bit with "left side"). Then append the next new valueIndex node (usually 0) and a "right side" node, to fill out the 3 layers. Return that final node.
24 | 2. Otherwise, see if we can just increase `left`'s final valueIndex, instead of lengthing its path. This is allowed if (a) `left`'s final waypoint node uses our ID, and (b) `right` doesn't use that same waypoint node. If so, look up the next unused valueIndex for that waypoint (stored in `PositionSource`), then use `left` but with that final valueIndex.
25 | 3. If not, create a right descendant of `left` like in case 1: append a waypoint node, the next new valueIndex, then "right side"; return that final node.
26 | 
27 | You can check that the resulting node lies between `left` and `right`, and that this procedure satisfies properties 4-6 from the [README](./README.md).
28 | 
29 | > The tree we've described so far is similar to that used by the [Logoot List CRDT](https://doi.org/10.1109/ICDCS.2009.75), which also has alternating layers of IDs and numbers. However, Logoot sorts by numbers first and then IDs, while we do the opposite. This lets us avoid interleaving: if two `PositionSource`s concurrently create a sequence of positions at the same place, their positions will end up under different waypoint nodes, hence appear one after the other.
30 | 
31 | ## String Representation
32 | 
33 | Finally, we need to map type-3 nodes in the above tree to position strings, such that the tree order matches the position strings' lexicographic order.
34 | 
35 | Given a tree node `a`, let `aPath` be the sequence of node labels on the path from the root to that node. Note that the tree order matches the "lexicographic order" on these sequences: `a < b` if `aPath[i] < bPath[i]` at the first index `i` where they disagree, or if `aPath` is a strict prefix of `bPath`.
36 | 
37 | I claim that we can set `a`'s position string to be `aPos = aPath.map(f).join("")` for any `f: (label: string, i: number) => string` with the following property:
38 | 
39 | - If `aPath` and `bPath` first disagree at index `i` and `aPath[i] < bPath[i]`, then:
40 |   1. `f(aPath[i], i) < f(bPath[i], i)` as strings.
41 |   2. `f(aPath[i], i)` is not a prefix of `f(bPath[i], i)`.
42 | 
43 | Indeed, then there is some index `j` such that `f(aPath[i], i).charAt(j) < f(bPath[i], i).charAt(j)`. Hence no matter what happens in the rest of `aPos` and `bPos`, we'll still have `aPos < bPos`.
44 | 
45 | One working `f` is defined as follows, with a different rule for each layer type:
46 | 
47 | 1. (Waypoint nodes) Map the node's label (an ID) to `` `,${ID}.` ``. The period, which is not allowed in IDs, ensures the no-prefix rule (ii).
48 | 2. (valueIndex nodes) Map the valueIndex to its _valueSeq_: its entry in a special sequence of numbers that is in lexicographic order and has no prefixes (when base52 encoded). You can read about the sequence we use in the comment above [`position_source.ts`](./src/position_source.ts)'s `nextOddValueSeq` function.
49 | 3. (Side nodes) Map "left side" to `"0"` and "right side" to `"1"`.
50 | 
51 | ### Optimizations
52 | 
53 | In the actual implementation, we optimize the above string representation in a few ways.
54 | 
55 | First, for waypoint nodes, we only use each "long name" `` `,${ID}.` `` once per position string. If the same ID occurs later in the same path, those nodes get a "short name" that is just an index into the list of prior long names. Index `n` is encoded as `base52(n // 10) + base10(n % 10)`. The set of all waypoint names following a given path is still unique, which ensures rule (i) for some arbitrary order on IDs (not necessarily lexicographic); and they are prefix-free (rule (ii)) due to short names' special ending digit and long names' special starting comma and ending period.
56 | 
57 | Second, instead of giving each side node a whole character, we give it the last bit in the preceding valueSeq. Specifically, we go by twos in the special sequence, then add 1 if the side is "right".
58 | 
59 | Third, for the first waypoint node, we use `` `${ID}.` `` (no comma) instead of the long name `` `,${ID}.` ``. Otherwise, every position would start with a redundant `','`.
60 | 


--------------------------------------------------------------------------------
/benchmarks/main.ts:
--------------------------------------------------------------------------------
  1 | import { assert } from "chai";
  2 | import fs from "fs";
  3 | import createRBTree from "functional-red-black-tree";
  4 | import seedrandom from "seedrandom";
  5 | import { IDs, PositionSource } from "../src";
  6 | import realTextTraceEdits from "./real_text_trace_edits.json";
  7 | 
  8 | const resultsDir = "benchmark_results/";
  9 | 
 10 | const { edits, finalText } = realTextTraceEdits as unknown as {
 11 |   finalText: string;
 12 |   edits: Array<[number, number, string | undefined]>;
 13 | };
 14 | 
 15 | function run(ops?: number, rotateFreq?: number) {
 16 |   console.log(
 17 |     "## Run:",
 18 |     ops ?? "all",
 19 |     "ops; rotate",
 20 |     rotateFreq ? `every ${rotateFreq} ops` : "never"
 21 |   );
 22 |   console.log();
 23 | 
 24 |   const rng = seedrandom("42");
 25 |   let source = new PositionSource({
 26 |     ID: IDs.pseudoRandom(rng),
 27 |   });
 28 |   let list = createRBTree<string, string>();
 29 |   // In order of creation, so we can watch time trends.
 30 |   const metrics: PositionMetric[] = [];
 31 | 
 32 |   for (let i = 0; i < (ops ?? edits.length); i++) {
 33 |     if (rotateFreq && i > 0 && i % rotateFreq === 0) {
 34 |       source = new PositionSource({ ID: IDs.pseudoRandom(rng) });
 35 |     }
 36 |     const edit = edits[i];
 37 |     if (edit[2] !== undefined) {
 38 |       // Insert edit[2] at edit[0]
 39 |       const position = source.createBetween(
 40 |         edit[0] === 0 ? undefined : list.at(edit[0] - 1).key,
 41 |         edit[0] === list.length ? undefined : list.at(edit[0]).key
 42 |       );
 43 |       list = list.insert(position, edit[2]);
 44 |       metrics.push(getMetric(position));
 45 |     } else {
 46 |       // Delete character at edit[0].
 47 |       list = list.at(edit[0]).remove();
 48 |     }
 49 |   }
 50 | 
 51 |   if (ops === undefined) {
 52 |     // Check answer.
 53 |     assert.strictEqual(finalText, list.values.join(""));
 54 |   }
 55 | 
 56 |   // Print summary stats.
 57 |   // Note that collecting stats increases the runtime.
 58 |   printStats(
 59 |     "length",
 60 |     metrics.map((metric) => metric.length)
 61 |   );
 62 |   printStats(
 63 |     "longNames",
 64 |     metrics.map((metric) => metric.longNames)
 65 |   );
 66 |   printStats(
 67 |     "waypoints",
 68 |     metrics.map((metric) => metric.waypoints)
 69 |   );
 70 |   printStats(
 71 |     "valueIndex",
 72 |     metrics.map((metric) => metric.valueIndex)
 73 |   );
 74 | 
 75 |   // Estimate PositionSource memory usage.
 76 |   // @ts-expect-error Private access
 77 |   const lastValueSeqs = source.lastValueSeqs;
 78 |   const keyLengths = [...lastValueSeqs.keys()]
 79 |     .map((prefix) => prefix.length)
 80 |     .reduce((a, b) => a + b, 0);
 81 |   console.log("### PositionSource memory usage\n");
 82 |   console.log("- Map size:", lastValueSeqs.size);
 83 |   console.log("- Sum of map key lengths:", keyLengths);
 84 |   console.log();
 85 | 
 86 |   // Write data files.
 87 |   if (!fs.existsSync(resultsDir)) fs.mkdirSync(resultsDir);
 88 |   const fileName = `results_${ops ?? "all"}_${rotateFreq ?? "never"}.csv`;
 89 |   const csv =
 90 |     "length,longNames,waypoints,valueIndex\n" +
 91 |     metrics
 92 |       .map(
 93 |         (metric) =>
 94 |           `${metric.length},${metric.longNames},${metric.waypoints},${metric.valueIndex}`
 95 |       )
 96 |       .join("\n");
 97 |   fs.writeFileSync(resultsDir + fileName, csv);
 98 | }
 99 | 
100 | /**
101 |  * Data for a single position string.
102 |  */
103 | interface PositionMetric {
104 |   /** The position's length. */
105 |   length: number;
106 |   /**
107 |    * The number of waypoints using long names.
108 |    * Equivalently, the number of full IDs in the string.
109 |    */
110 |   longNames: number;
111 |   /** The total number of waypoints. */
112 |   waypoints: number;
113 |   /**
114 |    * The valueIndex. This is the normal, 0-indexed count of values
115 |    * in a row, not the valueSeq.
116 |    */
117 |   valueIndex: number;
118 | }
119 | 
120 | function getLastWaypointChar(position: string): number {
121 |   // Last waypoint char is the last '.' or digit.
122 |   // We know it's not the very last char (always a valueSeq).
123 |   for (let i = position.length - 2; i >= 0; i--) {
124 |     const char = position[i];
125 |     if (char === "." || ("0" <= char && char <= "9")) {
126 |       // i is the last waypoint char, i.e., the end of the prefix.
127 |       return i;
128 |     }
129 |   }
130 |   throw new Error("lastWaypointChar not found: " + position);
131 | }
132 | 
133 | function parseBase52(s: string): number {
134 |   let n = 0;
135 |   for (let i = 0; i < s.length; i++) {
136 |     const code = s.charCodeAt(i);
137 |     const digit = code - (code >= 97 ? 71 : 65);
138 |     n = 52 * n + digit;
139 |   }
140 |   return n;
141 | }
142 | 
143 | function getMetric(position: string): PositionMetric {
144 |   // longNames = # periods, since we end each ID with one.
145 |   let periods = 0;
146 |   for (const char of position) {
147 |     if (char === ".") periods++;
148 |   }
149 |   const longNames = periods;
150 | 
151 |   // Get valueSeq: after last waypoint char.
152 |   const lastWaypointChar = getLastWaypointChar(position);
153 |   const valueSeq = parseBase52(position.slice(lastWaypointChar + 1));
154 | 
155 |   return {
156 |     length: position.length,
157 |     longNames,
158 |     waypoints: waypointCount(position),
159 |     valueIndex: valueIndexFromSeq(valueSeq),
160 |   };
161 | }
162 | 
163 | function waypointCount(position: string): number {
164 |   // One waypoint per:
165 |   // - '.' (end of a long name)
166 |   // - Digit outside of a long name
167 |   // (end of a short name).
168 |   let inLongName = false;
169 |   let count = 0;
170 |   for (let i = position.length - 1; i >= 0; i--) {
171 |     const char = position[i];
172 |     if (char === ".") {
173 |       // End of a long name.
174 |       count++;
175 |       // Skip the rest of the long name in case in contains
176 |       // a non-short-name digit.
177 |       inLongName = true;
178 |     } else if (inLongName) {
179 |       if (char === ",") inLongName = false;
180 |     } else if ("0" <= char && char <= "9") count++;
181 |   }
182 |   return count;
183 | }
184 | 
185 | /**
186 |  * Returns the valueIndex corresponding to the (odd) valueSeq n.
187 |  */
188 | function valueIndexFromSeq(n: number): number {
189 |   const d = n === 0 ? 1 : Math.floor(Math.log(n) / Math.log(52)) + 1;
190 |   // First d-digit number is 52^d - 52 * 26^(d-1); check how far
191 |   // we are from there (= index in d-digit sequence)
192 |   let ans = n - (Math.pow(52, d) - 52 * Math.pow(26, d - 1));
193 |   // Previous digits d2 get 26^d2 digits each.
194 |   for (let d2 = 1; d2 < d; d2++) {
195 |     ans += Math.pow(26, d2);
196 |   }
197 |   // Sequence uses odds only, so discount that.
198 |   return (ans - 1) / 2;
199 | }
200 | 
201 | function printStats(name: string, data: number[]) {
202 |   console.log(`### ${name}\n`);
203 |   console.log(
204 |     "- Average:",
205 |     Math.round(data.reduce((a, b) => a + b, 0) / data.length)
206 |   );
207 |   data.sort((a, b) => a - b);
208 |   console.log("- Median:", percentile(data, 0.5));
209 |   console.log("- 99th percentile:", percentile(data, 0.99));
210 |   console.log("- Max:", percentile(data, 1));
211 |   console.log();
212 | }
213 | 
214 | function percentile(sortedData: number[], alpha: number) {
215 |   const index = Math.ceil(alpha * sortedData.length) - 1;
216 |   return sortedData[index];
217 | }
218 | 
219 | // In the order described in README.md#performance.
220 | run();
221 | run(undefined, 1000);
222 | run(10000);
223 | run(10000, 1000);
224 | 


--------------------------------------------------------------------------------
/test/manual.test.ts:
--------------------------------------------------------------------------------
  1 | import { assert } from "chai";
  2 | import seedrandom from "seedrandom";
  3 | import { IDs, PositionSource } from "../src";
  4 | import { assertIsOrdered, testUniqueAfterDelete } from "./util";
  5 | 
  6 | describe("manual", () => {
  7 |   const rng = seedrandom("42");
  8 |   const randomName = IDs.pseudoRandom(rng);
  9 |   const randomAlice = IDs.pseudoRandom(rng);
 10 |   const randomBobby = IDs.pseudoRandom(rng);
 11 |   const randomBob = IDs.pseudoRandom(rng, { length: 5 });
 12 | 
 13 |   describe("single user", () => {
 14 |     describe("random ID", () => {
 15 |       testSingleUser(randomName);
 16 |     });
 17 |     describe("alphabetic ID", () => {
 18 |       testSingleUser("alice");
 19 |     });
 20 |     describe("numeric ID", () => {
 21 |       testSingleUser("0");
 22 |     });
 23 |     describe("empty ID", () => {
 24 |       testSingleUser("");
 25 |     });
 26 |   });
 27 | 
 28 |   describe("two users", () => {
 29 |     describe("random IDs", () => {
 30 |       testTwoUsers(randomAlice, randomBobby);
 31 |     });
 32 |     describe("random IDs, unequal lengths", () => {
 33 |       testTwoUsers(randomAlice, randomBob);
 34 |     });
 35 |     describe("random IDs, prefixes", () => {
 36 |       testTwoUsers(randomBobby, randomBob);
 37 |     });
 38 |     describe("numeric IDs", () => {
 39 |       testTwoUsers("57834", "00143");
 40 |     });
 41 |     describe("random and empty IDs", () => {
 42 |       testTwoUsers(randomAlice, "");
 43 |     });
 44 |   });
 45 | });
 46 | 
 47 | function testSingleUser(ID: string) {
 48 |   let alice!: PositionSource;
 49 | 
 50 |   beforeEach(() => {
 51 |     alice = new PositionSource({ ID });
 52 |   });
 53 | 
 54 |   it("LtR", () => {
 55 |     let previous = PositionSource.FIRST;
 56 |     const list: string[] = [];
 57 |     for (let i = 0; i < 20; i++) {
 58 |       previous = alice.createBetween(previous, PositionSource.LAST);
 59 |       list.push(previous);
 60 |     }
 61 |     assertIsOrdered(list);
 62 |   });
 63 | 
 64 |   it("RtL", () => {
 65 |     let previous = PositionSource.LAST;
 66 |     const list: string[] = [];
 67 |     for (let i = 0; i < 20; i++) {
 68 |       previous = alice.createBetween(PositionSource.FIRST, previous);
 69 |       list.unshift(previous);
 70 |     }
 71 |     assertIsOrdered(list);
 72 |   });
 73 | 
 74 |   it("restart", () => {
 75 |     const list: string[] = [];
 76 |     for (let j = 0; j < 5; j++) {
 77 |       let previous: string = PositionSource.FIRST;
 78 |       let after = list[0]; // Out-of-bounds okay
 79 |       for (let i = 0; i < 10; i++) {
 80 |         previous = alice.createBetween(previous, after);
 81 |         list.splice(i, 0, previous);
 82 |       }
 83 |     }
 84 |     assertIsOrdered(list);
 85 |   });
 86 | 
 87 |   it("LtR long", () => {
 88 |     let previous = PositionSource.FIRST;
 89 |     const list: string[] = [];
 90 |     for (let i = 0; i < 1000; i++) {
 91 |       previous = alice.createBetween(previous, PositionSource.LAST);
 92 |       list.push(previous);
 93 |     }
 94 |     assertIsOrdered(list);
 95 |     // Efficiency check.
 96 |     assert.isBelow(list.at(-1)!.length, 30);
 97 |   });
 98 | 
 99 |   it("RtL long", () => {
100 |     let previous = PositionSource.LAST;
101 |     const list: string[] = [];
102 |     for (let i = 0; i < 1000; i++) {
103 |       previous = alice.createBetween(PositionSource.FIRST, previous);
104 |       list.unshift(previous);
105 |     }
106 |     assertIsOrdered(list);
107 |   });
108 | 
109 |   it("LtR, mid LtR", () => {
110 |     let previous = PositionSource.FIRST;
111 |     const list: string[] = [];
112 |     for (let i = 0; i < 20; i++) {
113 |       previous = alice.createBetween(previous, PositionSource.LAST);
114 |       list.push(previous);
115 |     }
116 |     const midRight = list[10];
117 |     previous = list[9];
118 |     for (let i = 0; i < 20; i++) {
119 |       previous = alice.createBetween(previous, midRight);
120 |       list.splice(10 + i, 0, previous);
121 |     }
122 |     assertIsOrdered(list);
123 |   });
124 | 
125 |   it("LtR, mid RtL", () => {
126 |     let previous = PositionSource.FIRST;
127 |     const list: string[] = [];
128 |     for (let i = 0; i < 20; i++) {
129 |       previous = alice.createBetween(previous, PositionSource.LAST);
130 |       list.push(previous);
131 |     }
132 |     const midLeft = list[9];
133 |     previous = list[10];
134 |     for (let i = 0; i < 20; i++) {
135 |       previous = alice.createBetween(midLeft, previous);
136 |       list.splice(10, 0, previous);
137 |     }
138 |     assertIsOrdered(list);
139 |   });
140 | 
141 |   it("unique after delete", () => {
142 |     let previous = PositionSource.FIRST;
143 |     const list: string[] = [];
144 |     for (let i = 0; i < 20; i++) {
145 |       previous = alice.createBetween(previous, PositionSource.LAST);
146 |       list.push(previous);
147 |     }
148 |     const midLeft = list[9];
149 |     previous = list[10];
150 |     for (let i = 0; i < 20; i++) {
151 |       previous = alice.createBetween(midLeft, previous);
152 |       list.splice(10, 0, previous);
153 |     }
154 | 
155 |     testUniqueAfterDelete(list, alice);
156 |   });
157 | }
158 | 
159 | function testTwoUsers(ID1: string, ID2: string) {
160 |   let alice!: PositionSource;
161 |   let bob!: PositionSource;
162 | 
163 |   beforeEach(() => {
164 |     alice = new PositionSource({ ID: ID1 });
165 |     bob = new PositionSource({ ID: ID2 });
166 |   });
167 | 
168 |   it("LtR sequential", () => {
169 |     let previous = PositionSource.FIRST;
170 |     const list: string[] = [];
171 |     for (let i = 0; i < 40; i++) {
172 |       const user = i >= 20 ? bob : alice;
173 |       previous = user.createBetween(previous, PositionSource.LAST);
174 |       list.push(previous);
175 |     }
176 |     assertIsOrdered(list);
177 |   });
178 | 
179 |   it("LtR alternating", () => {
180 |     let previous = PositionSource.FIRST;
181 |     const list: string[] = [];
182 |     for (let i = 0; i < 40; i++) {
183 |       const user = i % 2 == 0 ? bob : alice;
184 |       previous = user.createBetween(previous, PositionSource.LAST);
185 |       list.push(previous);
186 |     }
187 |     assertIsOrdered(list);
188 |   });
189 | 
190 |   it("RtL sequential", () => {
191 |     let previous = PositionSource.LAST;
192 |     const list: string[] = [];
193 |     for (let i = 0; i < 40; i++) {
194 |       const user = i >= 20 ? bob : alice;
195 |       previous = user.createBetween(PositionSource.FIRST, previous);
196 |       list.unshift(previous);
197 |     }
198 |     assertIsOrdered(list);
199 |   });
200 | 
201 |   it("RtL alternating", () => {
202 |     let previous = PositionSource.LAST;
203 |     const list: string[] = [];
204 |     for (let i = 0; i < 40; i++) {
205 |       const user = i % 2 == 0 ? bob : alice;
206 |       previous = user.createBetween(PositionSource.FIRST, previous);
207 |       list.unshift(previous);
208 |     }
209 |     assertIsOrdered(list);
210 |   });
211 | 
212 |   it("restart alternating", () => {
213 |     const list: string[] = [];
214 |     for (let j = 0; j < 5; j++) {
215 |       let previous = PositionSource.FIRST;
216 |       let after = list[0]; // out-of-bounds okay
217 |       for (let i = 0; i < 10; i++) {
218 |         const user = i % 2 === 0 ? bob : alice;
219 |         previous = user.createBetween(previous, after);
220 |         list.splice(i, 0, previous);
221 |       }
222 |     }
223 |     assertIsOrdered(list);
224 |   });
225 | 
226 |   it("LtR concurrent", () => {
227 |     let previous: string | undefined = undefined;
228 |     const list1: string[] = [];
229 |     for (let i = 0; i < 20; i++) {
230 |       previous = alice.createBetween(previous, undefined);
231 |       list1.push(previous);
232 |     }
233 |     previous = undefined;
234 |     const list2: string[] = [];
235 |     for (let i = 0; i < 20; i++) {
236 |       previous = bob.createBetween(previous, undefined);
237 |       list2.push(previous);
238 |     }
239 |     // list1 and list2 should be sorted one after the other, according
240 |     // to their first element (non-interleaving).
241 |     let list: string[];
242 |     if (list1[0] < list2[0]) {
243 |       // list1 < list2
244 |       list = [...list1, ...list2];
245 |     } else list = [...list2, ...list1];
246 |     assertIsOrdered(list);
247 |   });
248 | 
249 |   it("RtL concurrent", () => {
250 |     let previous: string | undefined = undefined;
251 |     const list1: string[] = [];
252 |     for (let i = 0; i < 20; i++) {
253 |       previous = alice.createBetween(undefined, previous);
254 |       list1.unshift(previous);
255 |     }
256 |     previous = undefined;
257 |     const list2: string[] = [];
258 |     for (let i = 0; i < 20; i++) {
259 |       previous = bob.createBetween(undefined, previous);
260 |       list2.unshift(previous);
261 |     }
262 |     // list1 and list2 should be sorted one after the other, according
263 |     // to their first element (non-interleaving).
264 |     let list: string[];
265 |     if (list1[0] < list2[0]) {
266 |       // list1 < list2
267 |       list = [...list1, ...list2];
268 |     } else list = [...list2, ...list1];
269 |     assertIsOrdered(list);
270 |   });
271 | 
272 |   it("insert between concurrent", () => {
273 |     // "Hard case" from the blog post - see
274 |     // https://mattweidner.com/2022/10/05/basic-list-crdt.html#between-concurrent
275 |     const a = alice.createBetween(undefined, undefined);
276 |     const b = alice.createBetween(a, undefined);
277 | 
278 |     let c = alice.createBetween(a, b);
279 |     let d = bob.createBetween(a, b);
280 |     // Order so c < d.
281 |     if (d < c) [c, d] = [d, c];
282 | 
283 |     // Try making e on both alice and bob.
284 |     let e1 = alice.createBetween(c, d);
285 |     let e2 = bob.createBetween(c, d);
286 | 
287 |     assert.notEqual(e1, e2);
288 |     assertIsOrdered([a, c, e1, d, b]);
289 |     assertIsOrdered([a, c, e2, d, b]);
290 |   });
291 | 
292 |   it("unique after delete", () => {
293 |     const list: string[] = [];
294 |     for (let j = 0; j < 5; j++) {
295 |       let previous = PositionSource.FIRST;
296 |       let after = list[0]; // out-of-bounds okay
297 |       for (let i = 0; i < 10; i++) {
298 |         const user = i % 2 === 0 ? bob : alice;
299 |         previous = user.createBetween(previous, after);
300 |         list.splice(i, 0, previous);
301 |       }
302 |     }
303 |     assertIsOrdered(list);
304 | 
305 |     testUniqueAfterDelete(list, alice);
306 |     testUniqueAfterDelete(list, bob);
307 |   });
308 | 
309 |   it("left children", () => {
310 |     const gParent = alice.createBetween();
311 |     // Each parent is a child of gParent with the same waypoint but
312 |     // a range of valueIndex's.
313 |     const parents: string[] = [];
314 |     let previous = gParent;
315 |     for (let i = 0; i < 500; i++) {
316 |       previous = bob.createBetween(previous, PositionSource.LAST);
317 |       parents.push(previous);
318 |     }
319 |     const list = [gParent, ...parents];
320 |     // Create positions between gParent and the parents; since parent
321 |     // starts with gParent, they'll be left children of parent.
322 |     // This checks that leftVersion() works on those valueSeq's.
323 |     for (let i = 0; i < parents.length; i++) {
324 |       const child = bob.createBetween(gParent, parents[i]);
325 |       list.splice(2 * i + 1, 0, child);
326 |     }
327 |     assertIsOrdered(list);
328 | 
329 |     testUniqueAfterDelete(list, alice);
330 |     testUniqueAfterDelete(list, bob);
331 |   });
332 | }
333 | 


--------------------------------------------------------------------------------
/src/position_source.ts:
--------------------------------------------------------------------------------
  1 | import { IDs } from "./ids";
  2 | import { assert, LastInternal, precond } from "./util";
  3 | 
  4 | /**
  5 |  * A source of lexicographically-ordered "position strings" for
  6 |  * collaborative lists and text.
  7 |  *
  8 |  * In a collaborative list (or text string), you need a way to refer
  9 |  * to "positions" within that list that:
 10 |  * 1. Point to a specific list element (or text character).
 11 |  * 2. Are global (all users agree on them) and immutable (they do not
 12 |  * change over time).
 13 |  * 3. Can be sorted.
 14 |  * 4. Are unique, even if different users concurrently create positions
 15 |  * at the same place.
 16 |  *
 17 |  * `PositionSource` gives you such positions, in the form
 18 |  * of lexicographically-ordered strings. Specifically, `createBetween`
 19 |  * returns a new "position string" in between two existing position strings.
 20 |  *
 21 |  * These strings have the bonus properties:
 22 |  * - 5. (Non-Interleaving) If two `PositionSource`s concurrently create a (forward or backward)
 23 |  * sequence of positions at the same place,
 24 |  * their sequences will not be interleaved.
 25 |  * For example, if
 26 |  * Alice types "Hello" while Bob types "World" at the same place,
 27 |  * and they each use a `PositionSource` to create a position for each
 28 |  * character, then
 29 |  * the resulting order will be "HelloWorld" or "WorldHello", not
 30 |  * "HWeolrllod".
 31 |  * - 6. If a `PositionSource` creates positions in a forward (increasing)
 32 |  * sequence, their lengths as strings will only grow logarithmically,
 33 |  * not linearly.
 34 |  *
 35 |  * Position strings are printable ASCII. Specifically, they
 36 |  * contain alphanumeric characters, `','`, and `'.'`.
 37 |  * Also, the special string `PositionSource.LAST` is `'~'`.
 38 |  *
 39 |  * Further reading:
 40 |  * - [Fractional indexing](https://www.figma.com/blog/realtime-editing-of-ordered-sequences/#fractional-indexing),
 41 |  * a related scheme that satisfies 1-3 but not 4-6.
 42 |  * - [List CRDTs](https://mattweidner.com/2022/10/21/basic-list-crdt.html)
 43 |  * and how they map to position strings. `PositionSource` uses an optimized
 44 |  * variant of that link's string implementation.
 45 |  * - [Paper about interleaving](https://www.repository.cam.ac.uk/handle/1810/290391)
 46 |  * in collaborative text editors.
 47 |  */
 48 | export class PositionSource {
 49 |   /**
 50 |    * A string that is less than all positions.
 51 |    *
 52 |    * Value: `""`.
 53 |    */
 54 |   static readonly FIRST: string = "";
 55 |   /**
 56 |    * A string that is greater than all positions.
 57 |    *
 58 |    * Value: `"~"`.
 59 |    */
 60 |   static readonly LAST: string = LastInternal;
 61 | 
 62 |   /**
 63 |    * The unique ID for this `PositionSource`.
 64 |    */
 65 |   readonly ID: string;
 66 |   /**
 67 |    * Our waypoints' long name: `,${ID}.`.
 68 |    */
 69 |   private readonly longName: string;
 70 |   /**
 71 |    * Variant of longName used for a position's first ID: `${ID}.`.
 72 |    * (Otherwise every position would start with a redundant ','.)
 73 |    */
 74 |   private readonly firstName: string;
 75 | 
 76 |   /**
 77 |    * For each waypoint that we created, maps a prefix (see getPrefix)
 78 |    * for that waypoint to its last (most recent) valueSeq.
 79 |    * We always store the right-side version (odd valueSeq).
 80 |    */
 81 |   private lastValueSeqs = new Map<string, number>();
 82 | 
 83 |   /**
 84 |    * Constructs a new `PositionSource`.
 85 |    *
 86 |    * It is okay to share a single `PositionSource` between
 87 |    * all documents (lists/text strings) in the same JavaScript runtime.
 88 |    *
 89 |    * For efficiency (shorter position strings),
 90 |    * within each JavaScript runtime, you should not use
 91 |    * more than one `PositionSource` for the same document.
 92 |    * An exception is if multiple logical users share the same runtime;
 93 |    * we then recommend one `PositionSource` per user.
 94 |    *
 95 |    * @param options.ID A unique ID for this `PositionSource`. Defaults to
 96 |    * `IDs.random()`.
 97 |    *
 98 |    * If provided, `options.ID` must satisfy:
 99 |    * - It is unique across the entire collaborative application, i.e.,
100 |    * all `PositionSource`s whose positions may be compared to ours. This
101 |    * includes past `PositionSource`s, even if they correspond to the same
102 |    * user/device.
103 |    * - It does not contain `','` or `'.'`.
104 |    * - The first character is lexicographically less than `'~'` (code point 126).
105 |    *
106 |    * If `options.ID` contains non-alphanumeric characters, then created
107 |    * positions will contain those characters in addition to
108 |    * alphanumeric characters, `','`, and `'.'`.
109 |    */
110 |   constructor(options?: { ID?: string }) {
111 |     if (options?.ID !== undefined) {
112 |       IDs.validate(options.ID);
113 |     }
114 |     this.ID = options?.ID ?? IDs.random();
115 |     this.longName = `,${this.ID}.`;
116 |     this.firstName = `${this.ID}.`;
117 |   }
118 | 
119 |   /**
120 |    * Returns a new position between `left` and `right`
121 |    * (`left < new < right`).
122 |    *
123 |    * The new position is unique across the entire collaborative application,
124 |    * even in the face of concurrent calls to this method on other
125 |    * `PositionSource`s.
126 |    *
127 |    * @param left Defaults to `PositionSource.FIRST` (insert at the beginning).
128 |    *
129 |    * @param right Defaults to `PositionSource.LAST` (insert at the end).
130 |    */
131 |   createBetween(
132 |     left: string = PositionSource.FIRST,
133 |     right: string = PositionSource.LAST
134 |   ): string {
135 |     precond(left < right, "left must be less than right:", left, "!<", right);
136 |     precond(
137 |       right <= PositionSource.LAST,
138 |       "right must be less than or equal to LAST:",
139 |       right,
140 |       "!<=",
141 |       PositionSource.LAST
142 |     );
143 | 
144 |     const leftFixed = left === PositionSource.FIRST ? null : left;
145 |     const rightFixed = right === PositionSource.LAST ? null : right;
146 | 
147 |     let ans: string;
148 | 
149 |     if (
150 |       rightFixed !== null &&
151 |       (leftFixed === null || rightFixed.startsWith(leftFixed))
152 |     ) {
153 |       // Left child of right. This always appends a waypoint.
154 |       const ancestor = leftVersion(rightFixed);
155 |       ans = this.appendWaypoint(ancestor);
156 |     } else {
157 |       // Right child of left.
158 |       if (leftFixed === null) {
159 |         // ancestor is FIRST.
160 |         ans = this.appendWaypoint("");
161 |       } else {
162 |         // Check if we can reuse left's prefix.
163 |         // It needs to be one of ours, and right can't use the same
164 |         // prefix (otherwise we would get ans > right by comparing right's
165 |         // older valueIndex to our new valueIndex).
166 |         const prefix = getPrefix(leftFixed);
167 |         const lastValueSeq = this.lastValueSeqs.get(prefix);
168 |         if (
169 |           lastValueSeq !== undefined &&
170 |           !(rightFixed !== null && rightFixed.startsWith(prefix))
171 |         ) {
172 |           // Reuse.
173 |           const valueSeq = nextOddValueSeq(lastValueSeq);
174 |           ans = prefix + stringifyBase52(valueSeq);
175 |           this.lastValueSeqs.set(prefix, valueSeq);
176 |         } else {
177 |           // Append waypoint.
178 |           ans = this.appendWaypoint(leftFixed);
179 |         }
180 |       }
181 |     }
182 | 
183 |     assert(left < ans && ans < right, "Bad position:", left, ans, right);
184 |     return ans;
185 |   }
186 | 
187 |   /**
188 |    * Appends a wayoint to the given ancestor (= prefix adjusted for
189 |    * side), returning a unique new position using that waypoint.
190 |    *
191 |    * lastValueSeqs is also updated as needed for the waypoint.
192 |    */
193 |   private appendWaypoint(ancestor: string): string {
194 |     let waypointName = ancestor === "" ? this.firstName : this.longName;
195 |     // If our ID already appears in ancestor, instead use a short
196 |     // name for the waypoint.
197 |     // Here we use the uniqueness of ',' and '.' to
198 |     // claim that if this.longName (= `,${ID}.`) appears in ancestor, then it
199 |     // must actually be from a waypoint that we created.
200 |     let existing = ancestor.lastIndexOf(this.longName);
201 |     if (ancestor.startsWith(this.firstName)) existing = 0;
202 |     if (existing !== -1) {
203 |       // Find the index of existing among the long-name
204 |       // waypoints, in backwards order. Here we use the fact that
205 |       // each longName ends with '.' and that '.' does not appear otherwise.
206 |       let index = -1;
207 |       for (let i = existing; i < ancestor.length; i++) {
208 |         if (ancestor[i] === ".") index++;
209 |       }
210 |       waypointName = stringifyShortName(index);
211 |     }
212 | 
213 |     const prefix = ancestor + waypointName;
214 |     const lastValueSeq = this.lastValueSeqs.get(prefix);
215 |     // Use next odd (right-side) valueSeq (1 if it's a new waypoint).
216 |     const valueSeq =
217 |       lastValueSeq === undefined ? 1 : nextOddValueSeq(lastValueSeq);
218 |     this.lastValueSeqs.set(prefix, valueSeq);
219 |     return prefix + stringifyBase52(valueSeq);
220 |   }
221 | }
222 | 
223 | /**
224 |  * Returns position's *prefix*: the string through the last waypoint
225 |  * name, or equivalently, without the final valueSeq.
226 |  */
227 | function getPrefix(position: string): string {
228 |   // Last waypoint char is the last '.' (for long names) or
229 |   // digit (for short names). Note that neither appear in valueSeq,
230 |   // which is all letters.
231 |   for (let i = position.length - 2; i >= 0; i--) {
232 |     const char = position[i];
233 |     if (char === "." || ("0" <= char && char <= "9")) {
234 |       // i is the last waypoint char, i.e., the end of the prefix.
235 |       return position.slice(0, i + 1);
236 |     }
237 |   }
238 |   assert(false, "No last waypoint char found (not a position?)", position);
239 |   return "";
240 | }
241 | 
242 | /**
243 |  * Returns the variant of position ending with a "left" marker
244 |  * instead of the default "right" marker.
245 |  *
246 |  * I.e., the ancestor for position's left descendants.
247 |  */
248 | function leftVersion(position: string) {
249 |   // We need to subtract one from the (odd) valueSeq, equivalently, from
250 |   // its last base52 digit.
251 |   const last = parseBase52(position[position.length - 1]);
252 |   assert(last % 2 === 1, "Bad valueSeq (not a position?)", last, position);
253 |   return position.slice(0, -1) + stringifyBase52(last - 1);
254 | }
255 | 
256 | /**
257 |  * Base 52, except for last digit, which is base 10 using
258 |  * digits. That makes it easy to find the end of a short name
259 |  * in getPrefix: it ends at the last digit.
260 |  */
261 | function stringifyShortName(n: number): string {
262 |   if (n < 10) return String.fromCharCode(48 + n);
263 |   else
264 |     return (
265 |       stringifyBase52(Math.floor(n / 10)) + String.fromCharCode(48 + (n % 10))
266 |     );
267 | }
268 | 
269 | /**
270 |  * Base 52 encoding using letters (with "digits" in order by code point).
271 |  */
272 | function stringifyBase52(n: number): string {
273 |   if (n === 0) return "A";
274 |   const codes: number[] = [];
275 |   while (n > 0) {
276 |     const digit = n % 52;
277 |     codes.unshift((digit >= 26 ? 71 : 65) + digit);
278 |     n = Math.floor(n / 52);
279 |   }
280 |   return String.fromCharCode(...codes);
281 | }
282 | 
283 | function parseBase52(s: string): number {
284 |   let n = 0;
285 |   for (let i = 0; i < s.length; i++) {
286 |     const code = s.charCodeAt(i);
287 |     const digit = code - (code >= 97 ? 71 : 65);
288 |     n = 52 * n + digit;
289 |   }
290 |   return n;
291 | }
292 | 
293 | const log52 = Math.log(52);
294 | 
295 | /**
296 |  * Returns the next odd valueSeq in the special sequence.
297 |  * This is equivalent to mapping n to its valueIndex, adding 2,
298 |  * then mapping back.
299 |  *
300 |  * The sequence has the following properties:
301 |  * 1. Each number is a nonnegative integer (however, not all
302 |  * nonnegative integers are enumerated).
303 |  * 2. The numbers' base-52 representations are enumerated in
304 |  * lexicographic order, with no prefixes (i.e., no string
305 |  * representation is a prefix of another).
306 |  * 3. The n-th enumerated number has O(log(n)) base-52 digits.
307 |  *
308 |  * Properties (2) and (3) are analogous to normal counting, except
309 |  * that we order by the (base-52) lexicographic order instead of the
310 |  * usual order by magnitude. It is also the case that
311 |  * the numbers are in order by magnitude, although we do not
312 |  * use this property.
313 |  *
314 |  * The specific sequence is as follows:
315 |  * - Start with 0.
316 |  * - Enumerate 26^1 numbers (A, B, ..., Z).
317 |  * - Add 1, multiply by 52, then enumerate 26^2 numbers
318 |  * (aA, aB, ..., mz).
319 |  * - Add 1, multiply by 52, then enumerate 26^3 numbers
320 |  * (nAA, nAB, ..., tZz).
321 |  * - Repeat this pattern indefinitely, enumerating
322 |  * 26^d d-digit numbers for each d >= 1. Imagining a decimal place
323 |  * in front of each number, each d consumes 2^(-d) of the unit interval,
324 |  * so we never "reach 1" (overflow to d+1 digits when
325 |  * we meant to use d digits).
326 |  *
327 |  * I believe this is related to
328 |  * [Elias gamma coding](https://en.wikipedia.org/wiki/Elias_gamma_coding).
329 |  */
330 | function nextOddValueSeq(n: number): number {
331 |   const d = n === 0 ? 1 : Math.floor(Math.log(n) / log52) + 1;
332 |   // You can calculate that the last d-digit number is 52^d - 26^d - 1.
333 |   if (n === Math.pow(52, d) - Math.pow(26, d) - 1) {
334 |     // First step is a new length: n -> (n + 1) * 52.
335 |     // Second step is n -> n + 1.
336 |     return (n + 1) * 52 + 1;
337 |   } else {
338 |     // n -> n + 1 twice.
339 |     return n + 2;
340 |   }
341 | }
342 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # position-strings
  2 | 
  3 | A source of lexicographically-ordered "position strings" for
  4 | collaborative lists and text.
  5 | 
  6 | - [About](#about)
  7 | - [Usage](#usage)
  8 | - [API](#api)
  9 | - [Example App](#example-app)
 10 | - [Performance](#performance)
 11 | 
 12 | ## About
 13 | 
 14 | In a collaborative list (or text string), you need a way to refer
 15 | to "positions" within that list that:
 16 | 
 17 | 1. Point to a specific list element (or text character).
 18 | 2. Are global (all users agree on them) and immutable (they do not
 19 |    change over time).
 20 | 3. Can be sorted.
 21 | 4. Are unique, even if different users concurrently create positions
 22 |    at the same place.
 23 | 
 24 | This package gives you such positions, in the form
 25 | of lexicographically-ordered strings. Specifically, `PositionSource.createBetween`
 26 | returns a new "position string" in between two existing position strings.
 27 | 
 28 | These strings have the bonus properties:
 29 | 
 30 | 5. (Non-Interleaving) If two `PositionSource`s concurrently create a (forward or backward)
 31 |    sequence of positions at the same place,
 32 |    their sequences will not be interleaved.
 33 | 
 34 |    For example, if
 35 |    Alice types "Hello" while Bob types "World" at the same place,
 36 |    and they each use a `PositionSource` to create a position for each
 37 |    character, then
 38 |    the resulting order will be "HelloWorld" or "WorldHello", not
 39 |    "HWeolrllod".
 40 | 
 41 | 6. If a `PositionSource` creates positions in a forward (increasing)
 42 |    sequence, their lengths as strings will only grow logarithmically,
 43 |    not linearly.
 44 | 
 45 | Position strings are printable ASCII. Specifically, they
 46 | contain alphanumeric characters, `','`, and `'.'`.
 47 | Also, the special string `PositionSource.LAST` is `'~'`.
 48 | 
 49 | ### Further reading
 50 | 
 51 | - [Fractional indexing](https://www.figma.com/blog/realtime-editing-of-ordered-sequences/#fractional-indexing),
 52 |   a related scheme that satisfies 1-3 but not 4-6.
 53 | - [List CRDTs](https://mattweidner.com/2022/10/21/basic-list-crdt.html)
 54 |   and how they map to position strings. `PositionSource` uses an optimized
 55 |   variant of that link's [string implementation](https://mattweidner.com/2022/10/21/basic-list-crdt.html#intro-string-implementation), described in
 56 |   [algorithm.md](https://github.com/mweidner037/position-strings/blob/master/algorithm.md).
 57 | - [Paper about interleaving](https://www.repository.cam.ac.uk/handle/1810/290391)
 58 |   in collaborative text editors.
 59 | - [list-positions](https://github.com/mweidner037/list-positions/tree/master/#readme), a similar but more comprehensive library. It provides helper data structures (e.g., a `List<T>` class) and more efficient usage options.
 60 | 
 61 | ## Usage
 62 | 
 63 | Install with npm:
 64 | 
 65 | ```bash
 66 | npm i --save position-strings
 67 | ```
 68 | 
 69 | Creating position strings:
 70 | 
 71 | ```ts
 72 | import { PositionSource } from "position-strings";
 73 | 
 74 | // At the start of your app:
 75 | const source = new PositionSource();
 76 | 
 77 | // When the user types `char` at `index`:
 78 | const position = source.createBetween(
 79 |   myListPositions[index - 1],
 80 |   myListPositions[index]
 81 |   // If index is 0 or myListPositions.length, the above behaves reasonably,
 82 |   // since undefined defaults to PositionSource.FIRST or LAST.
 83 | );
 84 | myListPositions.splice(index, 0, position);
 85 | myList.splice(index, 0, char);
 86 | // Or insert { position, char } into a database table, ordered map, etc.
 87 | ```
 88 | 
 89 | If your list is collaborative:
 90 | 
 91 | ```ts
 92 | import { findPosition } from "position-strings";
 93 | 
 94 | // After creating { char, position }, also broadcast it to other users.
 95 | // When you receive `remote = { char, position }` from another user:
 96 | const index = findPosition(remote.position, myListPositions).index;
 97 | myListPositions.splice(index, 0, remote.position);
 98 | myList.splice(index, 0, remote.char);
 99 | // Or insert `remote` into a database table and query
100 | // "SELECT char FROM table ORDER BY position".
101 | // Or insert `remote` into an ordered map, etc.
102 | ```
103 | 
104 | To use cursors:
105 | 
106 | ```ts
107 | import { Cursors, PositionSource } from "position-strings";
108 | 
109 | let cursor: string = PositionSource.FIRST;
110 | 
111 | // When the user deliberately moves their cursor to `cursorIndex`:
112 | cursor = Cursors.fromIndex(cursorIndex, myListPositions);
113 | // Or run the algorithm in the `Cursors.fromIndex` docs.
114 | 
115 | // When the text changes, update the displayed cursor:
116 | cursorIndex = Cursors.toIndex(cursor, myListPositions);
117 | // Or run the query in the `Cursors.toIndex` docs.
118 | ```
119 | 
120 | ## API
121 | 
122 | - [Class `PositionSource`](#class-positionsource)
123 | - [Function `findPosition`](#function-findposition)
124 | - [Class `Cursors`](#class-cursors)
125 | - [Class `IDs`](#class-ids)
126 | 
127 | ### Class `PositionSource`
128 | 
129 | #### constructor
130 | 
131 | ```ts
132 | constructor(options?: { ID?: string })
133 | ```
134 | 
135 | Constructs a new `PositionSource`.
136 | 
137 | It is okay to share a single `PositionSource` between
138 | all documents (lists/text strings) in the same JavaScript runtime.
139 | 
140 | For efficiency (shorter position strings),
141 | within each JavaScript runtime, you should not use
142 | more than one `PositionSource` for the same document.
143 | An exception is if multiple logical users share the same runtime;
144 | we then recommend one `PositionSource` per user.
145 | 
146 | _@param_ `options.ID` A unique ID for this `PositionSource`. Defaults to
147 | `IDs.random()`.
148 | 
149 | If provided, `options.ID` must satisfy:
150 | 
151 | - It is unique across the entire collaborative application, i.e.,
152 |   all `PositionSource`s whose positions may be compared to ours. This
153 |   includes past `PositionSource`s, even if they correspond to the same
154 |   user/device.
155 | - It does not contain `','` or `'.'`.
156 | - The first character is lexicographically less than `'~'` (code point 126).
157 | 
158 | If `options.ID` contains non-alphanumeric characters, then created
159 | positions will contain those characters in addition to
160 | alphanumeric characters, `','`, and `'.'`.
161 | 
162 | #### createBetween
163 | 
164 | ```ts
165 | createBetween(
166 |   left: string = PositionSource.FIRST,
167 |   right: string = PositionSource.LAST
168 | ): string
169 | ```
170 | 
171 | Returns a new position between `left` and `right`
172 | (`left < new < right`).
173 | 
174 | The new position is unique across the entire collaborative application,
175 | even in the face of concurrent calls to this method on other
176 | `PositionSource`s.
177 | 
178 | _@param_ `left` Defaults to `PositionSource.FIRST` (insert at the beginning).
179 | 
180 | _@param_ `right` Defaults to `PositionSource.LAST` (insert at the end).
181 | 
182 | #### Properties
183 | 
184 | ```ts
185 | readonly ID: string
186 | ```
187 | 
188 | The unique ID for this `PositionSource`.
189 | 
190 | ```ts
191 | static readonly FIRST: string = ""
192 | ```
193 | 
194 | A string that is less than all positions.
195 | 
196 | ```ts
197 | static readonly LAST: string = "~"
198 | ```
199 | 
200 | A string that is greater than all positions.
201 | 
202 | ### Function `findPosition`
203 | 
204 | ```ts
205 | function findPosition(
206 |   position: string,
207 |   positions: ArrayLike<string>
208 | ): { index: number; isPresent: boolean };
209 | ```
210 | 
211 | Returns `{ index, isPresent }`, where:
212 | 
213 | - `index` is the current index of `position` in `positions`,
214 |   or where it would be if added.
215 | - `isPresent` is true if `position` is present in `positions`.
216 | 
217 | If this method is inconvenient (e.g., the positions are in a database
218 | instead of an array), you can instead compute
219 | `index` by finding the number of positions less than `position`.
220 | For example, in SQL, use:
221 | 
222 | ```sql
223 | SELECT COUNT(*) FROM table WHERE position < $position
224 | ```
225 | 
226 | See also: `Cursors.toIndex`.
227 | 
228 | _@param_ `positions` The target list's positions, in lexicographic order.
229 | There should be no duplicate positions.
230 | 
231 | ### Class `Cursors`
232 | 
233 | Utilities for working with cursors in a collaborative list
234 | or text string.
235 | 
236 | A cursor points to a particular spot in a list, in between
237 | two list elements (or text characters). This class handles
238 | cursors for lists that use our position strings.
239 | 
240 | A cursor is represented as a string.
241 | Specifically, it is the position of the element
242 | to its left, or `PositionSource.FIRST` if it is at the beginning
243 | of the list. If that position is later deleted, the cursor stays the
244 | same, but its index shifts to next element on its left.
245 | 
246 | You can use cursor strings as ordinary cursors, selection endpoints,
247 | range endpoints for a comment or formatting span, etc.
248 | 
249 | #### fromIndex
250 | 
251 | ```ts
252 | static fromIndex(index: number, positions: ArrayLike<string>): string
253 | ```
254 | 
255 | Returns the cursor at `index` within the given list of positions. Invert with `Cursors.toIndex`.
256 | 
257 | That is, the cursor is between the list elements at `index - 1` and `index`.
258 | 
259 | If this method is inconvenient (e.g., the positions are in a database
260 | instead of an array), you can instead run the following algorithm yourself:
261 | 
262 | - If `index` is 0, return `PositionSource.FIRST = ""`.
263 | - Else return `positions[index - 1]`.
264 | 
265 | _@param_ `positions` The target list's positions, in lexicographic order.
266 | There should be no duplicate positions.
267 | 
268 | #### toIndex
269 | 
270 | ```ts
271 | static toIndex(cursor: string, positions: ArrayLike<string>): number
272 | ```
273 | 
274 | Returns the current index of `cursor` within the given list of
275 | positions. Inverse of `Cursors.fromIndex`.
276 | 
277 | That is, the cursor is between the list elements at `index - 1` and `index`.
278 | 
279 | If this method is inconvenient (e.g., the positions are in a database
280 | instead of an array), you can instead compute
281 | `index` by finding the number of positions less than
282 | or equal to `position`.
283 | For example, in SQL, use:
284 | 
285 | ```sql
286 | SELECT COUNT(*) FROM table WHERE position <= $position
287 | ```
288 | 
289 | See also: `findPosition`.
290 | 
291 | _@param_ `positions` The target list's positions, in lexicographic order.
292 | There should be no duplicate positions.
293 | 
294 | ### Class `IDs`
295 | 
296 | Utitilies for generating `PositionSource` IDs (the `options.ID` constructor argument).
297 | 
298 | #### random
299 | 
300 | ```ts
301 | static random(options?: { length?: number; chars?: string }): string
302 | ```
303 | 
304 | Returns a cryptographically random ID made of alphanumeric characters.
305 | 
306 | _@param_ `options.length` The length of the ID, in characters.
307 | Default: `IDs.DEFAULT_LENGTH`.
308 | 
309 | _@param_ `options.chars` The characters to draw from. Default: `IDs.DEFAULT_CHARS`.
310 | 
311 | If specified, only the first 256 elements are used, and you achieve
312 | about `log_2(chars.length)` bits of entropy per `length`.
313 | 
314 | #### pseudoRandom
315 | 
316 | ```ts
317 | static pseudoRandom(
318 |     rng: seedrandom.prng,
319 |     options?: { length?: number; chars?: string }
320 |   ): string
321 | ```
322 | 
323 | Returns a psuedorandom ID made of alphanumeric characters,
324 | generated using `rng` from package [seedrandom](https://www.npmjs.com/package/seedrandom).
325 | 
326 | > Note: If you install `@types/seedrandom` yourself instead of relying on our
327 | > dependency, install version `2.4.28`, even though `seedrandom` itself
328 | > has version `3.0.5`.
329 | 
330 | Pseudorandom IDs with a fixed seed are recommended for
331 | tests and benchmarks, to make them deterministic.
332 | 
333 | _@param_ `options.length` The length of the ID, in characters.
334 | Default: `IDs.DEFAULT_LENGTH`.
335 | 
336 | _@param_ `options.chars` The characters to draw from. Default: `IDs.DEFAULT_CHARS`.
337 | 
338 | If specified, only the first 256 elements are used, and you achieve
339 | about `log_2(chars.length)` bits of entropy per `length`.
340 | 
341 | #### validate
342 | 
343 | ```ts
344 | static validate(ID: string): void
345 | ```
346 | 
347 | Throws an error if `ID` does not satisfy the
348 | following requirements from `PositionSource`'s constructor:
349 | 
350 | - It does not contain `','` or `'.'`.
351 | - The first character is lexicographically less than `'~'` (code point 126).
352 | 
353 | #### Properties
354 | 
355 | ```ts
356 | static readonly DEFAULT_LENGTH: number = 10
357 | ```
358 | 
359 | The default length of an ID, in characters.
360 | 
361 | ```ts
362 | static readonly DEFAULT_CHARS: string =
363 |     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
364 | ```
365 | 
366 | Default characters used in IDs: alphanumeric chars.
367 | 
368 | ## Example App
369 | 
370 | [Firebase text-editor](https://firebase-text-editor.herokuapp.com/) uses position-strings to implement collaborative (plain) text editing on top of [Firebase RTDB](https://firebase.google.com/docs/database). Each character is stored together with its position, and a Firebase query is used to list the characters in order.
371 | 
372 | The app also demonstrates using `Cursors` to track the local user's selection start and end.
373 | 
374 | [Source code](https://github.com/mweidner037/firebase-text-editor/blob/master/src/site/main.ts)
375 | 
376 | ## Performance
377 | 
378 | _Position string length_ is our main performance metric. This determines the memory, storage, and network overhead due to a collaborative list's positions.
379 | 
380 | > Additionally, each `PositionSource` instance uses some memory, and `PositionSource.createBetween` takes some time, but these are usually small enough to ignore.
381 | 
382 | To measure position string length in a realistic setting, we benchmark against [Martin Kleppmann's text trace](https://github.com/automerge/automerge-perf). That is, we pretend a user is typing into a collaborative text editor that attaches a position string to each character, then output statistics for those positions.
383 | 
384 | For the complete trace (182k positions, 260k total edits) typed by a single `PositionSource`, the average position length is **33 characters**, and the max length is 55.
385 | 
386 | For a more realistic scenario with 260 `PositionSource`s (a new one every 1,000 edits), the average position length is **111 characters**, and the max length is 237. "Rotating" `PositionSource`s in this way simulates the effect of multiple users, or a single user who occasionally reloads the page. (The extra length comes from referencing multiple [IDs](#properties) per position: an average of 8 IDs/position x 8 chars/ID = 64 chars/position.)
387 | 
388 | If we only consider the first 10,000 edits, the averages decrease to **23 characters** (single `PositionSource`) and **50 characters** (new `PositionSource` every 1,000 edits).
389 | 
390 | More stats for these four scenarios are in [stats.md](https://github.com/mweidner037/position-strings/blob/master/stats.md). For full data, run `npm run benchmarks` (after `npm ci`) and look in `benchmark_results/`.
391 | 
392 | ### Performance Considerations
393 | 
394 | - In realistic scenarios with multiple `PositionSource`s, most of the positions' length comes from referencing [IDs](#properties). By default, IDs are 8 random alphanumeric characters to give a low probability of collisions, but you can pass your own shorter IDs to [`PositionSource`'s constructor](#constructor). For example, you could assign IDs sequentially from a server.
395 | - A set of positions from the same list compress reasonably well together, since they represent different paths in the same tree. In particular, a list's worth of positions should compress well under gzip or prefix compression. However, compressing individual positions is not recommended.
396 | - [`PositionSource.createBetween`](#createbetween) is optimized for left-to-right insertions. If you primarily insert right-to-left or at random, you will see worse performance.
397 | 


--------------------------------------------------------------------------------