├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── LICENSE.txt
├── README.md
├── package-lock.json
├── package.json
├── rollup.config.js
├── scripts
    └── generateBidiData.js
├── src
    ├── brackets.js
    ├── charTypes.js
    ├── data
    │   ├── bidiBrackets.data.js
    │   ├── bidiCharTypes.data.js
    │   └── bidiMirroring.data.js
    ├── embeddingLevels.js
    ├── index.js
    ├── mirroring.js
    ├── reordering.js
    └── util
    │   └── parseCharacterMap.js
└── test
    ├── BidiCharacterTest.js
    ├── BidiCharacterTest.txt
    ├── BidiTest.js
    ├── BidiTest.txt
    ├── README.md
    ├── runTestsOnBuild.js
    └── runTestsOnSrc.js


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
 3 | 
 4 | name: Node.js CI
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     strategy:
18 |       matrix:
19 |         node-version: [14.x]
20 |         # See supported Node.js release schedule at https://nodejs.org/en/about/releases/
21 | 
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Use Node.js ${{ matrix.node-version }}
25 |       uses: actions/setup-node@v2
26 |       with:
27 |         node-version: ${{ matrix.node-version }}
28 |     - run: npm ci
29 |     - run: npm run build
30 |     - run: npm run test-build
31 |     - run: npm test
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | dist
3 | node_modules
4 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2021 Jason Johnston
 2 | 
 3 | MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # bidi-js
  2 | 
  3 | This is a pure JavaScript implementation of the [Unicode Bidirectional Algorithm](https://www.unicode.org/reports/tr9/) version 13.0.0. Its goals, in no particular order, are to be:
  4 | 
  5 | * Correct
  6 | * Small
  7 | * Fast
  8 | 
  9 | 
 10 | ## Conformance
 11 | 
 12 | This implementation currently conforms to section [UAX-C1](https://unicode.org/reports/tr9/#C1) of the bidi spec, as verified by running all the provided [conformance tests](https://unicode.org/reports/tr9/#Bidi_Conformance_Testing).
 13 | 
 14 | ## Compatibility
 15 | 
 16 | It has no external dependencies and therefore should run just fine in any relatively capable web browser, Node.js, etc. The provided distribution `.js` files are valid ES5.
 17 | 
 18 | ## Usage
 19 | 
 20 | Install it from npm:
 21 | 
 22 | ```shell
 23 | npm install bidi-js
 24 | ```
 25 | 
 26 | [![NPM](https://nodei.co/npm/bidi-js.png?compact=true)](https://npmjs.org/package/bidi-js)
 27 | 
 28 | Import and initialize:
 29 | 
 30 | ```js
 31 | import bidiFactory from 'bidi-js'
 32 | // or: const bidiFactory = require('bidi-js')
 33 | 
 34 | const bidi = bidiFactory()
 35 | ```
 36 | 
 37 | The `bidi-js` package's only export is a factory function which you _must invoke_ to return a `bidi` object; that object exposes the methods for bidi processing.
 38 | 
 39 | (_Why a factory function?_ The main reason is to ensure the entire module's code is wrapped within a single self-contained function with no closure dependencies. This enables that function to be stringified and passed into a web worker, for example.)
 40 | 
 41 | Now that you have the `bidi` object, you can:
 42 | 
 43 | ### Calculate bidi embedding levels
 44 | 
 45 | ```js
 46 | const embeddingLevels = bidi.getEmbeddingLevels(
 47 |   text, //the input string containing mixed-direction text
 48 |   explicitDirection //"ltr" or "rtl" if you don't want to auto-detect it
 49 | )
 50 | 
 51 | const { levels, paragraphs } = embeddingLevels
 52 | ```
 53 | 
 54 | The result object `embeddingLevels` will usually be passed to other functions described below. Its contents, should you need to inspect them individually, are:
 55 | 
 56 | * `levels` is a `Uint8Array` holding the calculated [bidi embedding levels](https://unicode.org/reports/tr9/#BD2) for each character in the string. The most important thing to know about these levels is that any given character is in a right-to-left scope if its embedding level is an odd number, and left-to-right if it's an even number.
 57 | 
 58 | * `paragraphs` is an array of `{start, end, level}` objects, one for each paragraph in the text (paragraphs are separated by explicit breaking characters, not soft line wrapping). The `start` and `end` indices are inclusive, and `level` is the resolved base embedding level of that paragraph.
 59 | 
 60 | ### Calculate character reorderings
 61 | 
 62 | ```js
 63 | const flips = bidi.getReorderSegments(
 64 |   text, //the full input string
 65 |   embeddingLevels //the full result object from getEmbeddingLevels
 66 | )
 67 | 
 68 | // Process all reversal sequences, in order:
 69 | flips.forEach(range => {
 70 |   const [start, end] = range
 71 |   // Reverse this sequence of characters from start to end, inclusive
 72 |   for (let i = start; i <= end; i++) {
 73 |     //...
 74 |   }
 75 | })
 76 | ```
 77 | 
 78 | Each "flip" is a range that should be reversed in place; they must all be applied in order.
 79 | 
 80 | Sometimes you don't want to process the whole string at once, but just a particular substring. A common example would be if you've applied line wrapping, in which case you need to process each line individually (in particular this does some special handling for trailing whitespace for each line). For this you can pass the extra `start` and `end` parameters:
 81 | 
 82 | ```js
 83 | yourWrappedLines.forEach(([lineStart, lineEnd]) => {
 84 |   const flips = bidi.getReorderSegments(
 85 |     text,
 86 |     embeddingLevels,
 87 |     lineStart,
 88 |     lineEnd //inclusive
 89 |   )
 90 |   // ...process flips for this line
 91 | })
 92 | ```
 93 | 
 94 | ### Handle right-to-left mirrored characters
 95 | 
 96 | Some characters that resolve to right-to-left need to be swapped with their "mirrored" characters. Examples of this are opening/closing parentheses. You can determine all the characters that need to be mirrored like so:
 97 | 
 98 | ```js
 99 | const mirrored = bidi.getMirroredCharactersMap(
100 |   text,
101 |   embeddingLevels
102 | )
103 | ```
104 | 
105 | This returns a `Map` of numeric character indices to replacement characters.
106 | 
107 | You can also process just a substring with extra `start` and `end` parameters:
108 | 
109 | ```js
110 | const mirrored = bidi.getMirroredCharactersMap(
111 |   text,
112 |   embeddingLevels,
113 |   start,
114 |   end //inclusive
115 | )
116 | ```
117 | 
118 | If you'd rather process mirrored characters individually, you can use the single `getMirroredCharacter` function, just make sure you only do it for right-to-left characters (those whose embedding level is an odd number.) It will return `null` if the character doesn't support mirroring.
119 | 
120 | ```js
121 | const mirroredChar = (embeddingLevels.levels[charIndex] & 1) //odd number means RTL
122 |     ? bidi.getMirroredCharacter(text[charIndex])
123 |     : null
124 | ```
125 | 
126 | ### Get a character's bidi type
127 | 
128 | This is used internally, but you can also ask for the ["bidi character type"](https://unicode.org/reports/tr9/#BD1) of any character, should you need it:
129 | 
130 | ```js
131 | const bidiType = bidi.getBidiCharTypeName(string[charIndex])
132 | // e.g. "L", "R", "AL", "NSM", ...
133 | ```
134 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "bidi-js",
 3 |   "version": "1.0.3",
 4 |   "description": "A JavaScript implementation of the Unicode Bidirectional Algorithm",
 5 |   "main": "dist/bidi.js",
 6 |   "module": "dist/bidi.mjs",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": "https://github.com/lojjic/bidi-js.git"
10 |   },
11 |   "scripts": {
12 |     "build": "rollup -c rollup.config.js",
13 |     "test": "npx babel-node --plugins @babel/plugin-transform-modules-commonjs test/runTestsOnSrc.js",
14 |     "test-build": "node test/runTestsOnBuild.js"
15 |   },
16 |   "author": "Jason Johnston",
17 |   "license": "MIT",
18 |   "devDependencies": {
19 |     "@babel/cli": "^7.13.16",
20 |     "@babel/core": "^7.14.0",
21 |     "@babel/node": "^7.13.13",
22 |     "@babel/plugin-transform-modules-commonjs": "^7.13.8",
23 |     "@babel/preset-env": "^7.14.0",
24 |     "@rollup/plugin-babel": "^5.3.0",
25 |     "@rollup/plugin-buble": "^0.21.3",
26 |     "node-fetch": "^2.6.1",
27 |     "rollup": "^2.45.1",
28 |     "rollup-plugin-terser": "^7.0.2"
29 |   },
30 |   "files": [
31 |     "/dist",
32 |     "/src",
33 |     "/LICENSE.txt",
34 |     "/README.md"
35 |   ],
36 |   "dependencies": {
37 |     "require-from-string": "^2.0.2"
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/rollup.config.js:
--------------------------------------------------------------------------------
 1 | import { terser } from 'rollup-plugin-terser'
 2 | import buble from '@rollup/plugin-buble'
 3 | 
 4 | export default [
 5 |   // First compile to an iife, and wrap the whole thing into an exported factory function.
 6 |   // This ensures all the code is self-contained within that one factory function.
 7 |   {
 8 |     input: 'src/index.js',
 9 |     output: {
10 |       file: 'dist/bidi.js',
11 |       format: 'iife',
12 |       name: 'bidi',
13 |       banner: `export default function bidiFactory() {`,
14 |       footer: `return bidi}`
15 |     },
16 |     plugins: [
17 |       // Transpile down to ES5 for all build artifacts. This helps ensure that downstream
18 |       // transpilers won't inject references to external helpers/polyfills, which would
19 |       // break its ability to be serialized to a web worker.
20 |       buble()
21 |     ]
22 |   },
23 |   // Then wrap that exported factory function as esm and umd
24 |   {
25 |     input: 'dist/bidi.js',
26 |     output: [
27 |       {
28 |         file: 'dist/bidi.mjs',
29 |         format: 'esm'
30 |       },
31 |       {
32 |         file: 'dist/bidi.min.mjs',
33 |         format: 'esm',
34 |         plugins: [
35 |           terser({
36 |             ecma: 5,
37 |             mangle: {properties: {regex: /^_/}}
38 |           })
39 |         ]
40 |       },
41 |       {
42 |         file: 'dist/bidi.js',
43 |         format: 'umd',
44 |         name: 'bidi_js'
45 |       },
46 |       {
47 |         file: 'dist/bidi.min.js',
48 |         format: 'umd',
49 |         name: 'bidi_js',
50 |         plugins: [
51 |           terser({
52 |             ecma: 5,
53 |             mangle: {properties: {regex: /^_/}}
54 |           })
55 |         ]
56 |       }
57 |     ]
58 |   }
59 | ]
60 | 


--------------------------------------------------------------------------------
/scripts/generateBidiData.js:
--------------------------------------------------------------------------------
  1 | import fetch from 'node-fetch'
  2 | import { writeFileSync } from 'fs'
  3 | 
  4 | const radix = 36
  5 | 
  6 | /**
  7 |  * Generate data file containing all bidi character types.
  8 |  *
  9 |  * The format is an object whose keys are the bidi character classes (omitting the default "L"),
 10 |  * and its values are a comma-delimited string defining all the codepoint ranges in that class. Each
 11 |  * range is either a single codepoint or start+end codepoints separated by "+". Every codepoint is
 12 |  * stored as a base36-encoded relative offset from the previous codepoint.
 13 |  */
 14 | async function generateBidiCharTypesData() {
 15 |   const response = await fetch('https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedBidiClass.txt')
 16 |   const txt = await response.text()
 17 |   const classMap = new Map()
 18 |   txt.split('\n').forEach(line => {
 19 |     line = line.trim()
 20 |     if (!line || line.startsWith('#')) return
 21 |     const match = line.match(/^([A-Z0-9.]+)\s*;\s([\w]+)*/)
 22 |     if (match) {
 23 |       const cls = match[2]
 24 |       if (cls !== 'L') { // L is the default so omit it
 25 |         let codePoints = match[1].split('..').map(c => parseInt(c, 16))
 26 |         let ranges = classMap.get(cls)
 27 |         if (!ranges) {
 28 |           classMap.set(cls, ranges = [])
 29 |         }
 30 |         ranges.push(codePoints)
 31 |       }
 32 |     }
 33 |   })
 34 | 
 35 |   const out = {}
 36 |   classMap.forEach((ranges, cls) => {
 37 |     let lastCode = 0
 38 |     ranges.sort((a, b) => a[0] - b[0])
 39 | 
 40 |     // Map absolute ranges to relative skip/step increments
 41 |     ranges = ranges.map(([from, to]) => {
 42 |       const skip = from - lastCode
 43 |       const step = to - from
 44 |       lastCode = to || from
 45 |       return [skip, step]
 46 |     })
 47 | 
 48 |     // Collapse ranges that were adjacent in the data
 49 |     for (let i = 0; i < ranges.length - 1; i++) {
 50 |       while (ranges[i + 1] && ranges[i + 1][0] === 1) {
 51 |         ranges[i][1] = (ranges[i][1] || 0) + 1 + (ranges[i + 1][1] || 0)
 52 |         ranges.splice(i + 1, 1)
 53 |       }
 54 |     }
 55 | 
 56 |     // Stringify
 57 |     ranges = ranges.map(([skip, step]) => {
 58 |       return `${skip.toString(radix)}${step ? '+' + step.toString(radix) : ''}`
 59 |     })
 60 | 
 61 |     out[cls] = ranges.join(',')
 62 |   })
 63 | 
 64 |   const fileContent = `// Bidi character types data, auto generated
 65 | export default ${JSON.stringify(out, null, 2)}
 66 | `
 67 |   const filePath = new URL('../src/data/bidiCharTypes.data.js', import.meta.url)
 68 |   writeFileSync(filePath, fileContent)
 69 | 
 70 |   console.log(`Wrote file ${filePath}, size ${fileContent.length}`)
 71 | }
 72 | 
 73 | /**
 74 |  * Generate data file containing all bidi bracket pairs and canonical mappings. It is an object
 75 |  * with keys "pairs" and "canonical", each holding a string value
 76 |  *
 77 |  * The string format is a comma-delimited string defining a set of pairs. Each pair contains two
 78 |  * codepoints separated by ">"; these are the opening and closing brackets for pairs and the
 79 |  * non-canonical and canonical characters for canonical. Every codepoint is stored as a
 80 |  * base36-encoded relative offset from the previous codepoint.
 81 |  */
 82 | async function generateBracketsData() {
 83 |   // Build Map of opening to closing bracket codepoints
 84 |   let response = await fetch('https://www.unicode.org/Public/13.0.0/ucd/BidiBrackets.txt')
 85 |   let txt = await response.text()
 86 |   let pairs = new Map()
 87 |   let reversePairs = new Map()
 88 |   txt.split('\n').forEach(line => {
 89 |     line = line.trim()
 90 |     if (!line || line.startsWith('#')) return
 91 |     const match = line.match(/^([A-Z0-9.]+)\s*;\s*([A-Z0-9.]+)\s*;\s*o/)
 92 |     if (match) {
 93 |       const opener = parseInt(match[1], 16)
 94 |       const closer = parseInt(match[2], 16)
 95 |       pairs.set(opener, closer)
 96 |       reversePairs.set(closer, opener)
 97 |     }
 98 |   })
 99 | 
100 |   // Get canonical equivs for each closing bracket
101 |   response = await fetch('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
102 |   txt = await response.text()
103 |   const canonical = new Map()
104 |   txt.split('\n').forEach(line => {
105 |     if (!line || line.startsWith('#')) return
106 |     const fields = line.split(';')
107 |     const nonCanon = parseInt(fields[0], 16)
108 |     const canon = fields[5] && fields[5].replace(/^<[^>]+>\s*/, '')
109 |     if (canon && (pairs.has(nonCanon) || reversePairs.has(nonCanon))) {
110 |       canonical.set(
111 |         nonCanon,
112 |         parseInt(canon, 16)
113 |       )
114 |     }
115 |   })
116 | 
117 |   // Encode to strings
118 |   const out = {
119 |     pairs: encodeCodePointsMap(pairs),
120 |     canonical: encodeCodePointsMap(canonical)
121 |   }
122 | 
123 |   const fileContent = `// Bidi bracket pairs data, auto generated
124 | export default ${JSON.stringify(out, null, 2)}
125 | `
126 |   const filePath = new URL('../src/data/bidiBrackets.data.js', import.meta.url)
127 |   writeFileSync(filePath, fileContent)
128 | 
129 |   console.log(`Wrote file ${filePath}, size ${fileContent.length}`)
130 | }
131 | 
132 | async function generateMirroringData() {
133 |   // Build Map of opening to closing bracket codepoints
134 |   let response = await fetch('https://www.unicode.org/Public/13.0.0/ucd/BidiMirroring.txt')
135 |   let txt = await response.text()
136 |   let pairs = new Map()
137 |   let reversePairs = new Map()
138 |   txt.split('\n').forEach(line => {
139 |     line = line.trim()
140 |     if (!line || line.startsWith('#')) return
141 |     const match = line.match(/^([A-Z0-9.]+)\s*;\s*([A-Z0-9.]+)\s*/)
142 |     if (match) {
143 |       const a = parseInt(match[1], 16)
144 |       const b = parseInt(match[2], 16)
145 |       if (reversePairs.has(a) && reversePairs.get(a) !== b) {
146 |         console.warn(`mismatch: ${a} - ${b}`)
147 |       }
148 |       if (!reversePairs.has(a)) {
149 |         pairs.set(a, b)
150 |         reversePairs.set(b, a)
151 |       }
152 |     }
153 |   })
154 | 
155 |   const out = encodeCodePointsMap(pairs)
156 | 
157 |   const fileContent = `// Bidi mirrored chars data, auto generated
158 | export default ${JSON.stringify(out, null, 2)}
159 | `
160 |   const filePath = new URL('../src/data/bidiMirroring.data.js', import.meta.url)
161 |   writeFileSync(filePath, fileContent)
162 | 
163 |   console.log(`Wrote file ${filePath}, size ${fileContent.length}`)
164 | 
165 | }
166 | 
167 | function encodeCodePointsMap(map) {
168 |   let lastCode = 0
169 |   let items = [...map.entries()].sort((a, b) => a[0] - b[0]).map(([a, b]) => {
170 |     const str = `${(a - lastCode).toString(radix)}>${(b - a).toString(radix)}`
171 |     lastCode = b
172 |     return str
173 |   })
174 |   // collapse repeated items into a "+n" entry
175 |   for (let i = 1; i < items.length; i++) {
176 |     if (items[i] === items[i - 1]) {
177 |       let reps = 1
178 |       for (let j = i + 1; j < items.length && (items[j] === items[i]); j++) {
179 |         reps++
180 |       }
181 |       items.splice(i, reps, `+${reps}`)
182 |     }
183 |   }
184 |   return items.join(',')
185 | }
186 | 
187 | 
188 | 
189 | generateBidiCharTypesData()
190 | generateBracketsData()
191 | generateMirroringData()
192 | 


--------------------------------------------------------------------------------
/src/brackets.js:
--------------------------------------------------------------------------------
 1 | import data from './data/bidiBrackets.data.js'
 2 | import { parseCharacterMap } from './util/parseCharacterMap.js'
 3 | 
 4 | let openToClose, closeToOpen, canonical
 5 | 
 6 | function parse () {
 7 |   if (!openToClose) {
 8 |     //const start = performance.now()
 9 |     let { map, reverseMap } = parseCharacterMap(data.pairs, true)
10 |     openToClose = map
11 |     closeToOpen = reverseMap
12 |     canonical = parseCharacterMap(data.canonical, false).map
13 |     //console.log(`brackets parsed in ${performance.now() - start}ms`)
14 |   }
15 | }
16 | 
17 | export function openingToClosingBracket (char) {
18 |   parse()
19 |   return openToClose.get(char) || null
20 | }
21 | 
22 | export function closingToOpeningBracket (char) {
23 |   parse()
24 |   return closeToOpen.get(char) || null
25 | }
26 | 
27 | export function getCanonicalBracket (char) {
28 |   parse()
29 |   return canonical.get(char) || null
30 | }
31 | 


--------------------------------------------------------------------------------
/src/charTypes.js:
--------------------------------------------------------------------------------
 1 | import DATA from './data/bidiCharTypes.data.js'
 2 | 
 3 | const TYPES = {}
 4 | const TYPES_TO_NAMES = {}
 5 | TYPES.L = 1 //L is the default
 6 | TYPES_TO_NAMES[1] = 'L'
 7 | Object.keys(DATA).forEach((type, i) => {
 8 |   TYPES[type] = 1 << (i + 1)
 9 |   TYPES_TO_NAMES[TYPES[type]] = type
10 | })
11 | Object.freeze(TYPES)
12 | 
13 | const ISOLATE_INIT_TYPES = TYPES.LRI | TYPES.RLI | TYPES.FSI
14 | const STRONG_TYPES = TYPES.L | TYPES.R | TYPES.AL
15 | const NEUTRAL_ISOLATE_TYPES = TYPES.B | TYPES.S | TYPES.WS | TYPES.ON | TYPES.FSI | TYPES.LRI | TYPES.RLI | TYPES.PDI
16 | const BN_LIKE_TYPES = TYPES.BN | TYPES.RLE | TYPES.LRE | TYPES.RLO | TYPES.LRO | TYPES.PDF
17 | const TRAILING_TYPES = TYPES.S | TYPES.WS | TYPES.B | ISOLATE_INIT_TYPES | TYPES.PDI | BN_LIKE_TYPES
18 | 
19 | let map = null
20 | 
21 | function parseData () {
22 |   if (!map) {
23 |     //const start = performance.now()
24 |     map = new Map()
25 |     let start = 0;
26 |     for (const type in DATA) {
27 |       if (DATA.hasOwnProperty(type)) {
28 |         const segments = DATA[type];
29 |         let temp = '';
30 |         let end;
31 |         let state = false;
32 |         let lastCode = 0;
33 |         for (let i = 0; i <= segments.length + 1; i += 1) {
34 |           const char = segments[i];
35 |           if (char !== ',' && i !== segments.length) {
36 |             if (char === '+') {
37 |               state = true;
38 |               lastCode = start = lastCode + parseInt(temp, 36);
39 |               temp = '';
40 |             } else {
41 |               temp += char;
42 |             }
43 |           } else {
44 |             if (!state) {
45 |               lastCode = start = lastCode + parseInt(temp, 36);
46 |               end = start;
47 |             } else {
48 |               end = start + parseInt(temp, 36);
49 |             }
50 |             state = false;
51 |             temp = '';
52 |             lastCode = end;
53 |             for (let j = start; j < end + 1; j += 1) {
54 |               map.set(j, TYPES[type]);
55 |             }
56 |           }
57 |         }
58 |       }
59 |     }
60 |     //console.log(`char types parsed in ${performance.now() - start}ms`)
61 |   }
62 | }
63 | 
64 | /**
65 |  * @param {string} char
66 |  * @return {number}
67 |  */
68 | function getBidiCharType (char) {
69 |   parseData()
70 |   return map.get(char.codePointAt(0)) || TYPES.L
71 | }
72 | 
73 | function getBidiCharTypeName(char) {
74 |   return TYPES_TO_NAMES[getBidiCharType(char)]
75 | }
76 | 
77 | export {
78 |   getBidiCharType,
79 |   getBidiCharTypeName,
80 |   TYPES,
81 |   TYPES_TO_NAMES,
82 |   ISOLATE_INIT_TYPES,
83 |   STRONG_TYPES,
84 |   NEUTRAL_ISOLATE_TYPES,
85 |   BN_LIKE_TYPES,
86 |   TRAILING_TYPES
87 | }
88 | 


--------------------------------------------------------------------------------
/src/data/bidiBrackets.data.js:
--------------------------------------------------------------------------------
1 | // Bidi bracket pairs data, auto generated
2 | export default {
3 |   "pairs": "14>1,1e>2,u>2,2wt>1,1>1,1ge>1,1wp>1,1j>1,f>1,hm>1,1>1,u>1,u6>1,1>1,+5,28>1,w>1,1>1,+3,b8>1,1>1,+3,1>3,-1>-1,3>1,1>1,+2,1s>1,1>1,x>1,th>1,1>1,+2,db>1,1>1,+3,3>1,1>1,+2,14qm>1,1>1,+1,4q>1,1e>2,u>2,2>1,+1",
4 |   "canonical": "6f1>-6dx,6dy>-6dx,6ec>-6ed,6ee>-6ed,6ww>2jj,-2ji>2jj,14r4>-1e7l,1e7m>-1e7l,1e7m>-1e5c,1e5d>-1e5b,1e5c>-14qx,14qy>-14qx,14vn>-1ecg,1ech>-1ecg,1edu>-1ecg,1eci>-1ecg,1eda>-1ecg,1eci>-1ecg,1eci>-168q,168r>-168q,168s>-14ye,14yf>-14ye"
5 | }
6 | 


--------------------------------------------------------------------------------
/src/data/bidiCharTypes.data.js:
--------------------------------------------------------------------------------
 1 | // Bidi character types data, auto generated
 2 | export default {
 3 |   "R": "13k,1a,2,3,3,2+1j,ch+16,a+1,5+2,2+n,5,a,4,6+16,4+3,h+1b,4mo,179q,2+9,2+11,2i9+7y,2+68,4,3+4,5+13,4+3,2+4k,3+29,8+cf,1t+7z,w+17,3+3m,1t+3z,16o1+5r,8+30,8+mc,29+1r,29+4v,75+73",
 4 |   "EN": "1c+9,3d+1,6,187+9,513,4+5,7+9,sf+j,175h+9,qw+q,161f+1d,4xt+a,25i+9",
 5 |   "ES": "17,2,6dp+1,f+1,av,16vr,mx+1,4o,2",
 6 |   "ET": "z+2,3h+3,b+1,ym,3e+1,2o,p4+1,8,6u,7c,g6,1wc,1n9+4,30+1b,2n,6d,qhx+1,h0m,a+1,49+2,63+1,4+1,6bb+3,12jj",
 7 |   "AN": "16o+5,2j+9,2+1,35,ed,1ff2+9,87+u",
 8 |   "CS": "18,2+1,b,2u,12k,55v,l,17v0,2,3,53,2+1,b",
 9 |   "B": "a,3,f+2,2v,690",
10 |   "S": "9,2,k",
11 |   "WS": "c,k,4f4,1vk+a,u,1j,335",
12 |   "ON": "x+1,4+4,h+5,r+5,r+3,z,5+3,2+1,2+1,5,2+2,3+4,o,w,ci+1,8+d,3+d,6+8,2+g,39+1,9,6+1,2,33,b8,3+1,3c+1,7+1,5r,b,7h+3,sa+5,2,3i+6,jg+3,ur+9,2v,ij+1,9g+9,7+a,8m,4+1,49+x,14u,2+2,c+2,e+2,e+2,e+1,i+n,e+e,2+p,u+2,e+2,36+1,2+3,2+1,b,2+2,6+5,2,2,2,h+1,5+4,6+3,3+f,16+2,5+3l,3+81,1y+p,2+40,q+a,m+13,2r+ch,2+9e,75+hf,3+v,2+2w,6e+5,f+6,75+2a,1a+p,2+2g,d+5x,r+b,6+3,4+o,g,6+1,6+2,2k+1,4,2j,5h+z,1m+1,1e+f,t+2,1f+e,d+3,4o+3,2s+1,w,535+1r,h3l+1i,93+2,2s,b+1,3l+x,2v,4g+3,21+3,kz+1,g5v+1,5a,j+9,n+v,2,3,2+8,2+1,3+2,2,3,46+1,4+4,h+5,r+5,r+a,3h+2,4+6,b+4,78,1r+24,4+c,4,1hb,ey+6,103+j,16j+c,1ux+7,5+g,fsh,jdq+1t,4,57+2e,p1,1m,1m,1m,1m,4kt+1,7j+17,5+2r,d+e,3+e,2+e,2+10,m+4,w,1n+5,1q,4z+5,4b+rb,9+c,4+c,4+37,d+2g,8+b,l+b,5+1j,9+9,7+13,9+t,3+1,27+3c,2+29,2+3q,d+d,3+4,4+2,6+6,a+o,8+6,a+2,e+6,16+42,2+1i",
13 |   "BN": "0+8,6+d,2s+5,2+p,e,4m9,1kt+2,2b+5,5+5,17q9+v,7k,6p+8,6+1,119d+3,440+7,96s+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+1,1ekf+75,6p+2rz,1ben+1,1ekf+1,1ekf+1",
14 |   "NSM": "lc+33,7o+6,7c+18,2,2+1,2+1,2,21+a,1d+k,h,2u+6,3+5,3+1,2+3,10,v+q,2k+a,1n+8,a,p+3,2+8,2+2,2+4,18+2,3c+e,2+v,1k,2,5+7,5,4+6,b+1,u,1n,5+3,9,l+1,r,3+1,1m,5+1,5+1,3+2,4,v+1,4,c+1,1m,5+4,2+1,5,l+1,n+5,2,1n,3,2+3,9,8+1,c+1,v,1q,d,1f,4,1m+2,6+2,2+3,8+1,c+1,u,1n,g+1,l+1,t+1,1m+1,5+3,9,l+1,u,21,8+2,2,2j,3+6,d+7,2r,3+8,c+5,23+1,s,2,2,1k+d,2+4,2+1,6+a,2+z,a,2v+3,2+5,2+1,3+1,q+1,5+2,h+3,e,3+1,7,g,jk+2,qb+2,u+2,u+1,v+1,1t+1,2+6,9,3+a,a,1a+2,3c+1,z,3b+2,5+1,a,7+2,64+1,3,1n,2+6,2,2,3+7,7+9,3,1d+g,1s+3,1d,2+4,2,6,15+8,d+1,x+3,3+1,2+2,1l,2+1,4,2+2,1n+7,3+1,49+2,2+c,2+6,5,7,4+1,5j+1l,2+4,k1+w,2db+2,3y,2p+v,ff+3,30+1,n9x+3,2+9,x+1,29+1,7l,4,5,q+1,6,48+1,r+h,e,13+7,q+a,1b+2,1d,3+3,3+1,14,1w+5,3+1,3+1,d,9,1c,1g,2+2,3+1,6+1,2,17+1,9,6n,3,5,fn5,ki+f,h+f,r2,6b,46+4,1af+2,2+1,6+3,15+2,5,4m+1,fy+3,as+1,4a+a,4x,1j+e,1l+2,1e+3,3+1,1y+2,11+4,2+7,1r,d+1,1h+8,b+3,3,2o+2,3,2+1,7,4h,4+7,m+1,1m+1,4,12+6,4+4,5g+7,3+2,2,o,2d+5,2,5+1,2+1,6n+3,7+1,2+1,s+1,2e+7,3,2+1,2z,2,3+5,2,2u+2,3+3,2+4,78+8,2+1,75+1,2,5,41+3,3+1,5,x+5,3+1,15+5,3+3,9,a+5,3+2,1b+c,2+1,bb+6,2+5,2d+l,3+6,2+1,2+1,3f+5,4,2+1,2+6,2,21+1,4,2,9o+1,f0c+4,1o+6,t5,1s+3,2a,f5l+1,43t+2,i+7,3+6,v+3,45+2,1j0+1i,5+1d,9,f,n+4,2+e,11t+6,2+g,3+6,2+1,2+4,7a+6,c6+3,15t+6,32+6,gzhy+6n",
15 |   "AL": "16w,3,2,e+1b,z+2,2+2s,g+1,8+1,b+m,2+t,s+2i,c+e,4h+f,1d+1e,1bwe+dp,3+3z,x+c,2+1,35+3y,2rm+z,5+7,b+5,dt+l,c+u,17nl+27,1t+27,4x+6n,3+d",
16 |   "LRO": "6ct",
17 |   "RLO": "6cu",
18 |   "LRE": "6cq",
19 |   "RLE": "6cr",
20 |   "PDF": "6cs",
21 |   "LRI": "6ee",
22 |   "RLI": "6ef",
23 |   "FSI": "6eg",
24 |   "PDI": "6eh"
25 | }
26 | 


--------------------------------------------------------------------------------
/src/data/bidiMirroring.data.js:
--------------------------------------------------------------------------------
1 | // Bidi mirrored chars data, auto generated
2 | export default "14>1,j>2,t>2,u>2,1a>g,2v3>1,1>1,1ge>1,1wd>1,b>1,1j>1,f>1,ai>3,-2>3,+1,8>1k0,-1jq>1y7,-1y6>1hf,-1he>1h6,-1h5>1ha,-1h8>1qi,-1pu>1,6>3u,-3s>7,6>1,1>1,f>1,1>1,+2,3>1,1>1,+13,4>1,1>1,6>1eo,-1ee>1,3>1mg,-1me>1mk,-1mj>1mi,-1mg>1mi,-1md>1,1>1,+2,1>10k,-103>1,1>1,4>1,5>1,1>1,+10,3>1,1>8,-7>8,+1,-6>7,+1,a>1,1>1,u>1,u6>1,1>1,+5,26>1,1>1,2>1,2>2,8>1,7>1,4>1,1>1,+5,b8>1,1>1,+3,1>3,-2>1,2>1,1>1,+2,c>1,3>1,1>1,+2,h>1,3>1,a>1,1>1,2>1,3>1,1>1,d>1,f>1,3>1,1a>1,1>1,6>1,7>1,13>1,k>1,1>1,+19,4>1,1>1,+2,2>1,1>1,+18,m>1,a>1,1>1,lk>1,1>1,4>1,2>1,f>1,3>1,1>1,+3,db>1,1>1,+3,3>1,1>1,+2,14qm>1,1>1,+1,6>1,4j>1,j>2,t>2,u>2,2>1,+1"
3 | 


--------------------------------------------------------------------------------
/src/embeddingLevels.js:
--------------------------------------------------------------------------------
  1 | import {
  2 |   BN_LIKE_TYPES,
  3 |   getBidiCharType,
  4 |   ISOLATE_INIT_TYPES,
  5 |   NEUTRAL_ISOLATE_TYPES,
  6 |   STRONG_TYPES,
  7 |   TRAILING_TYPES,
  8 |   TYPES
  9 | } from './charTypes.js'
 10 | import { closingToOpeningBracket, getCanonicalBracket, openingToClosingBracket } from './brackets.js'
 11 | 
 12 | // Local type aliases
 13 | const {
 14 |   L: TYPE_L,
 15 |   R: TYPE_R,
 16 |   EN: TYPE_EN,
 17 |   ES: TYPE_ES,
 18 |   ET: TYPE_ET,
 19 |   AN: TYPE_AN,
 20 |   CS: TYPE_CS,
 21 |   B: TYPE_B,
 22 |   S: TYPE_S,
 23 |   ON: TYPE_ON,
 24 |   BN: TYPE_BN,
 25 |   NSM: TYPE_NSM,
 26 |   AL: TYPE_AL,
 27 |   LRO: TYPE_LRO,
 28 |   RLO: TYPE_RLO,
 29 |   LRE: TYPE_LRE,
 30 |   RLE: TYPE_RLE,
 31 |   PDF: TYPE_PDF,
 32 |   LRI: TYPE_LRI,
 33 |   RLI: TYPE_RLI,
 34 |   FSI: TYPE_FSI,
 35 |   PDI: TYPE_PDI
 36 | } = TYPES
 37 | 
 38 | /**
 39 |  * @typedef {object} GetEmbeddingLevelsResult
 40 |  * @property {{start, end, level}[]} paragraphs
 41 |  * @property {Uint8Array} levels
 42 |  */
 43 | 
 44 | /**
 45 |  * This function applies the Bidirectional Algorithm to a string, returning the resolved embedding levels
 46 |  * in a single Uint8Array plus a list of objects holding each paragraph's start and end indices and resolved
 47 |  * base embedding level.
 48 |  *
 49 |  * @param {string} string - The input string
 50 |  * @param {"ltr"|"rtl"|"auto"} [baseDirection] - Use "ltr" or "rtl" to force a base paragraph direction,
 51 |  *        otherwise a direction will be chosen automatically from each paragraph's contents.
 52 |  * @return {GetEmbeddingLevelsResult}
 53 |  */
 54 | export function getEmbeddingLevels (string, baseDirection) {
 55 |   const MAX_DEPTH = 125
 56 | 
 57 |   // Start by mapping all characters to their unicode type, as a bitmask integer
 58 |   const charTypes = new Uint32Array(string.length)
 59 |   for (let i = 0; i < string.length; i++) {
 60 |     charTypes[i] = getBidiCharType(string[i])
 61 |   }
 62 | 
 63 |   const charTypeCounts = new Map() //will be cleared at start of each paragraph
 64 |   function changeCharType(i, type) {
 65 |     const oldType = charTypes[i]
 66 |     charTypes[i] = type
 67 |     charTypeCounts.set(oldType, charTypeCounts.get(oldType) - 1)
 68 |     if (oldType & NEUTRAL_ISOLATE_TYPES) {
 69 |       charTypeCounts.set(NEUTRAL_ISOLATE_TYPES, charTypeCounts.get(NEUTRAL_ISOLATE_TYPES) - 1)
 70 |     }
 71 |     charTypeCounts.set(type, (charTypeCounts.get(type) || 0) + 1)
 72 |     if (type & NEUTRAL_ISOLATE_TYPES) {
 73 |       charTypeCounts.set(NEUTRAL_ISOLATE_TYPES, (charTypeCounts.get(NEUTRAL_ISOLATE_TYPES) || 0) + 1)
 74 |     }
 75 |   }
 76 | 
 77 |   const embedLevels = new Uint8Array(string.length)
 78 |   const isolationPairs = new Map() //init->pdi and pdi->init
 79 | 
 80 |   // === 3.3.1 The Paragraph Level ===
 81 |   // 3.3.1 P1: Split the text into paragraphs
 82 |   const paragraphs = [] // [{start, end, level}, ...]
 83 |   let paragraph = null
 84 |   for (let i = 0; i < string.length; i++) {
 85 |     if (!paragraph) {
 86 |       paragraphs.push(paragraph = {
 87 |         start: i,
 88 |         end: string.length - 1,
 89 |         // 3.3.1 P2-P3: Determine the paragraph level
 90 |         level: baseDirection === 'rtl' ? 1 : baseDirection === 'ltr' ? 0 : determineAutoEmbedLevel(i, false)
 91 |       })
 92 |     }
 93 |     if (charTypes[i] & TYPE_B) {
 94 |       paragraph.end = i
 95 |       paragraph = null
 96 |     }
 97 |   }
 98 | 
 99 |   const FORMATTING_TYPES = TYPE_RLE | TYPE_LRE | TYPE_RLO | TYPE_LRO | ISOLATE_INIT_TYPES | TYPE_PDI | TYPE_PDF | TYPE_B
100 |   const nextEven = n => n + ((n & 1) ? 1 : 2)
101 |   const nextOdd = n => n + ((n & 1) ? 2 : 1)
102 | 
103 |   // Everything from here on will operate per paragraph.
104 |   for (let paraIdx = 0; paraIdx < paragraphs.length; paraIdx++) {
105 |     paragraph = paragraphs[paraIdx]
106 |     const statusStack = [{
107 |       _level: paragraph.level,
108 |       _override: 0, //0=neutral, 1=L, 2=R
109 |       _isolate: 0 //bool
110 |     }]
111 |     let stackTop
112 |     let overflowIsolateCount = 0
113 |     let overflowEmbeddingCount = 0
114 |     let validIsolateCount = 0
115 |     charTypeCounts.clear()
116 | 
117 |     // === 3.3.2 Explicit Levels and Directions ===
118 |     for (let i = paragraph.start; i <= paragraph.end; i++) {
119 |       let charType = charTypes[i]
120 |       stackTop = statusStack[statusStack.length - 1]
121 | 
122 |       // Set initial counts
123 |       charTypeCounts.set(charType, (charTypeCounts.get(charType) || 0) + 1)
124 |       if (charType & NEUTRAL_ISOLATE_TYPES) {
125 |         charTypeCounts.set(NEUTRAL_ISOLATE_TYPES, (charTypeCounts.get(NEUTRAL_ISOLATE_TYPES) || 0) + 1)
126 |       }
127 | 
128 |       // Explicit Embeddings: 3.3.2 X2 - X3
129 |       if (charType & FORMATTING_TYPES) { //prefilter all formatters
130 |         if (charType & (TYPE_RLE | TYPE_LRE)) {
131 |           embedLevels[i] = stackTop._level // 5.2
132 |           const level = (charType === TYPE_RLE ? nextOdd : nextEven)(stackTop._level)
133 |           if (level <= MAX_DEPTH && !overflowIsolateCount && !overflowEmbeddingCount) {
134 |             statusStack.push({
135 |               _level: level,
136 |               _override: 0,
137 |               _isolate: 0
138 |             })
139 |           } else if (!overflowIsolateCount) {
140 |             overflowEmbeddingCount++
141 |           }
142 |         }
143 | 
144 |         // Explicit Overrides: 3.3.2 X4 - X5
145 |         else if (charType & (TYPE_RLO | TYPE_LRO)) {
146 |           embedLevels[i] = stackTop._level // 5.2
147 |           const level = (charType === TYPE_RLO ? nextOdd : nextEven)(stackTop._level)
148 |           if (level <= MAX_DEPTH && !overflowIsolateCount && !overflowEmbeddingCount) {
149 |             statusStack.push({
150 |               _level: level,
151 |               _override: (charType & TYPE_RLO) ? TYPE_R : TYPE_L,
152 |               _isolate: 0
153 |             })
154 |           } else if (!overflowIsolateCount) {
155 |             overflowEmbeddingCount++
156 |           }
157 |         }
158 | 
159 |         // Isolates: 3.3.2 X5a - X5c
160 |         else if (charType & ISOLATE_INIT_TYPES) {
161 |           // X5c - FSI becomes either RLI or LRI
162 |           if (charType & TYPE_FSI) {
163 |             charType = determineAutoEmbedLevel(i + 1, true) === 1 ? TYPE_RLI : TYPE_LRI
164 |           }
165 | 
166 |           embedLevels[i] = stackTop._level
167 |           if (stackTop._override) {
168 |             changeCharType(i, stackTop._override)
169 |           }
170 |           const level = (charType === TYPE_RLI ? nextOdd : nextEven)(stackTop._level)
171 |           if (level <= MAX_DEPTH && overflowIsolateCount === 0 && overflowEmbeddingCount === 0) {
172 |             validIsolateCount++
173 |             statusStack.push({
174 |               _level: level,
175 |               _override: 0,
176 |               _isolate: 1,
177 |               _isolInitIndex: i
178 |             })
179 |           } else {
180 |             overflowIsolateCount++
181 |           }
182 |         }
183 | 
184 |         // Terminating Isolates: 3.3.2 X6a
185 |         else if (charType & TYPE_PDI) {
186 |           if (overflowIsolateCount > 0) {
187 |             overflowIsolateCount--
188 |           } else if (validIsolateCount > 0) {
189 |             overflowEmbeddingCount = 0
190 |             while (!statusStack[statusStack.length - 1]._isolate) {
191 |               statusStack.pop()
192 |             }
193 |             // Add to isolation pairs bidirectional mapping:
194 |             const isolInitIndex = statusStack[statusStack.length - 1]._isolInitIndex
195 |             if (isolInitIndex != null) {
196 |               isolationPairs.set(isolInitIndex, i)
197 |               isolationPairs.set(i, isolInitIndex)
198 |             }
199 |             statusStack.pop()
200 |             validIsolateCount--
201 |           }
202 |           stackTop = statusStack[statusStack.length - 1]
203 |           embedLevels[i] = stackTop._level
204 |           if (stackTop._override) {
205 |             changeCharType(i, stackTop._override)
206 |           }
207 |         }
208 | 
209 | 
210 |         // Terminating Embeddings and Overrides: 3.3.2 X7
211 |         else if (charType & TYPE_PDF) {
212 |           if (overflowIsolateCount === 0) {
213 |             if (overflowEmbeddingCount > 0) {
214 |               overflowEmbeddingCount--
215 |             } else if (!stackTop._isolate && statusStack.length > 1) {
216 |               statusStack.pop()
217 |               stackTop = statusStack[statusStack.length - 1]
218 |             }
219 |           }
220 |           embedLevels[i] = stackTop._level // 5.2
221 |         }
222 | 
223 |         // End of Paragraph: 3.3.2 X8
224 |         else if (charType & TYPE_B) {
225 |           embedLevels[i] = paragraph.level
226 |         }
227 |       }
228 | 
229 |       // Non-formatting characters: 3.3.2 X6
230 |       else {
231 |         embedLevels[i] = stackTop._level
232 |         // NOTE: This exclusion of BN seems to go against what section 5.2 says, but is required for test passage
233 |         if (stackTop._override && charType !== TYPE_BN) {
234 |           changeCharType(i, stackTop._override)
235 |         }
236 |       }
237 |     }
238 | 
239 |     // === 3.3.3 Preparations for Implicit Processing ===
240 | 
241 |     // Remove all RLE, LRE, RLO, LRO, PDF, and BN characters: 3.3.3 X9
242 |     // Note: Due to section 5.2, we won't remove them, but we'll use the BN_LIKE_TYPES bitset to
243 |     // easily ignore them all from here on out.
244 | 
245 |     // 3.3.3 X10
246 |     // Compute the set of isolating run sequences as specified by BD13
247 |     const levelRuns = []
248 |     let currentRun = null
249 |     let isolationLevel = 0
250 |     for (let i = paragraph.start; i <= paragraph.end; i++) {
251 |       const charType = charTypes[i]
252 |       if (!(charType & BN_LIKE_TYPES)) {
253 |         const lvl = embedLevels[i]
254 |         const isIsolInit = charType & ISOLATE_INIT_TYPES
255 |         const isPDI = charType === TYPE_PDI
256 |         if (isIsolInit) {
257 |           isolationLevel++
258 |         }
259 |         if (currentRun && lvl === currentRun._level) {
260 |           currentRun._end = i
261 |           currentRun._endsWithIsolInit = isIsolInit
262 |         } else {
263 |           levelRuns.push(currentRun = {
264 |             _start: i,
265 |             _end: i,
266 |             _level: lvl,
267 |             _startsWithPDI: isPDI,
268 |             _endsWithIsolInit: isIsolInit
269 |           })
270 |         }
271 |         if (isPDI) {
272 |           isolationLevel--
273 |         }
274 |       }
275 |     }
276 |     const isolatingRunSeqs = [] // [{seqIndices: [], sosType: L|R, eosType: L|R}]
277 |     for (let runIdx = 0; runIdx < levelRuns.length; runIdx++) {
278 |       const run = levelRuns[runIdx]
279 |       if (!run._startsWithPDI || (run._startsWithPDI && !isolationPairs.has(run._start))) {
280 |         const seqRuns = [currentRun = run]
281 |         for (let pdiIndex; currentRun && currentRun._endsWithIsolInit && (pdiIndex = isolationPairs.get(currentRun._end)) != null;) {
282 |           for (let i = runIdx + 1; i < levelRuns.length; i++) {
283 |             if (levelRuns[i]._start === pdiIndex) {
284 |               seqRuns.push(currentRun = levelRuns[i])
285 |               break
286 |             }
287 |           }
288 |         }
289 |         // build flat list of indices across all runs:
290 |         const seqIndices = []
291 |         for (let i = 0; i < seqRuns.length; i++) {
292 |           const run = seqRuns[i]
293 |           for (let j = run._start; j <= run._end; j++) {
294 |             seqIndices.push(j)
295 |           }
296 |         }
297 |         // determine the sos/eos types:
298 |         let firstLevel = embedLevels[seqIndices[0]]
299 |         let prevLevel = paragraph.level
300 |         for (let i = seqIndices[0] - 1; i >= 0; i--) {
301 |           if (!(charTypes[i] & BN_LIKE_TYPES)) { //5.2
302 |             prevLevel = embedLevels[i]
303 |             break
304 |           }
305 |         }
306 |         const lastIndex = seqIndices[seqIndices.length - 1]
307 |         let lastLevel = embedLevels[lastIndex]
308 |         let nextLevel = paragraph.level
309 |         if (!(charTypes[lastIndex] & ISOLATE_INIT_TYPES)) {
310 |           for (let i = lastIndex + 1; i <= paragraph.end; i++) {
311 |             if (!(charTypes[i] & BN_LIKE_TYPES)) { //5.2
312 |               nextLevel = embedLevels[i]
313 |               break
314 |             }
315 |           }
316 |         }
317 |         isolatingRunSeqs.push({
318 |           _seqIndices: seqIndices,
319 |           _sosType: Math.max(prevLevel, firstLevel) % 2 ? TYPE_R : TYPE_L,
320 |           _eosType: Math.max(nextLevel, lastLevel) % 2 ? TYPE_R : TYPE_L
321 |         })
322 |       }
323 |     }
324 | 
325 |     // The next steps are done per isolating run sequence
326 |     for (let seqIdx = 0; seqIdx < isolatingRunSeqs.length; seqIdx++) {
327 |       const { _seqIndices: seqIndices, _sosType: sosType, _eosType: eosType } = isolatingRunSeqs[seqIdx]
328 |       /**
329 |        * All the level runs in an isolating run sequence have the same embedding level.
330 |        * 
331 |        * DO NOT change any `embedLevels[i]` within the current scope.
332 |        */
333 |       const embedDirection = ((embedLevels[seqIndices[0]]) & 1) ? TYPE_R : TYPE_L;
334 | 
335 |       // === 3.3.4 Resolving Weak Types ===
336 | 
337 |       // W1 + 5.2. Search backward from each NSM to the first character in the isolating run sequence whose
338 |       // bidirectional type is not BN, and set the NSM to ON if it is an isolate initiator or PDI, and to its
339 |       // type otherwise. If the NSM is the first non-BN character, change the NSM to the type of sos.
340 |       if (charTypeCounts.get(TYPE_NSM)) {
341 |         for (let si = 0; si < seqIndices.length; si++) {
342 |           const i = seqIndices[si]
343 |           if (charTypes[i] & TYPE_NSM) {
344 |             let prevType = sosType
345 |             for (let sj = si - 1; sj >= 0; sj--) {
346 |               if (!(charTypes[seqIndices[sj]] & BN_LIKE_TYPES)) { //5.2 scan back to first non-BN
347 |                 prevType = charTypes[seqIndices[sj]]
348 |                 break
349 |               }
350 |             }
351 |             changeCharType(i, (prevType & (ISOLATE_INIT_TYPES | TYPE_PDI)) ? TYPE_ON : prevType)
352 |           }
353 |         }
354 |       }
355 | 
356 |       // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos)
357 |       // is found. If an AL is found, change the type of the European number to Arabic number.
358 |       if (charTypeCounts.get(TYPE_EN)) {
359 |         for (let si = 0; si < seqIndices.length; si++) {
360 |           const i = seqIndices[si]
361 |           if (charTypes[i] & TYPE_EN) {
362 |             for (let sj = si - 1; sj >= -1; sj--) {
363 |               const prevCharType = sj === -1 ? sosType : charTypes[seqIndices[sj]]
364 |               if (prevCharType & STRONG_TYPES) {
365 |                 if (prevCharType === TYPE_AL) {
366 |                   changeCharType(i, TYPE_AN)
367 |                 }
368 |                 break
369 |               }
370 |             }
371 |           }
372 |         }
373 |       }
374 | 
375 |       // W3. Change all ALs to R
376 |       if (charTypeCounts.get(TYPE_AL)) {
377 |         for (let si = 0; si < seqIndices.length; si++) {
378 |           const i = seqIndices[si]
379 |           if (charTypes[i] & TYPE_AL) {
380 |             changeCharType(i, TYPE_R)
381 |           }
382 |         }
383 |       }
384 | 
385 |       // W4. A single European separator between two European numbers changes to a European number. A single common
386 |       // separator between two numbers of the same type changes to that type.
387 |       if (charTypeCounts.get(TYPE_ES) || charTypeCounts.get(TYPE_CS)) {
388 |         for (let si = 1; si < seqIndices.length - 1; si++) {
389 |           const i = seqIndices[si]
390 |           if (charTypes[i] & (TYPE_ES | TYPE_CS)) {
391 |             let prevType = 0, nextType = 0
392 |             for (let sj = si - 1; sj >= 0; sj--) {
393 |               prevType = charTypes[seqIndices[sj]]
394 |               if (!(prevType & BN_LIKE_TYPES)) { //5.2
395 |                 break
396 |               }
397 |             }
398 |             for (let sj = si + 1; sj < seqIndices.length; sj++) {
399 |               nextType = charTypes[seqIndices[sj]]
400 |               if (!(nextType & BN_LIKE_TYPES)) { //5.2
401 |                 break
402 |               }
403 |             }
404 |             if (prevType === nextType && (charTypes[i] === TYPE_ES ? prevType === TYPE_EN : (prevType & (TYPE_EN | TYPE_AN)))) {
405 |               changeCharType(i, prevType)
406 |             }
407 |           }
408 |         }
409 |       }
410 | 
411 |       // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
412 |       if (charTypeCounts.get(TYPE_EN)) {
413 |         for (let si = 0; si < seqIndices.length; si++) {
414 |           const i = seqIndices[si]
415 |           if (charTypes[i] & TYPE_EN) {
416 |             for (let sj = si - 1; sj >= 0 && (charTypes[seqIndices[sj]] & (TYPE_ET | BN_LIKE_TYPES)); sj--) {
417 |               changeCharType(seqIndices[sj], TYPE_EN)
418 |             }
419 |             for (si++; si < seqIndices.length && (charTypes[seqIndices[si]] & (TYPE_ET | BN_LIKE_TYPES | TYPE_EN)); si++) {
420 |               if (charTypes[seqIndices[si]] !== TYPE_EN) {
421 |                 changeCharType(seqIndices[si], TYPE_EN)
422 |               }
423 |             }
424 |           }
425 |         }
426 |       }
427 | 
428 |       // W6. Otherwise, separators and terminators change to Other Neutral.
429 |       if (charTypeCounts.get(TYPE_ET) || charTypeCounts.get(TYPE_ES) || charTypeCounts.get(TYPE_CS)) {
430 |         for (let si = 0; si < seqIndices.length; si++) {
431 |           const i = seqIndices[si]
432 |           if (charTypes[i] & (TYPE_ET | TYPE_ES | TYPE_CS)) {
433 |             changeCharType(i, TYPE_ON)
434 |             // 5.2 transform adjacent BNs too:
435 |             for (let sj = si - 1; sj >= 0 && (charTypes[seqIndices[sj]] & BN_LIKE_TYPES); sj--) {
436 |               changeCharType(seqIndices[sj], TYPE_ON)
437 |             }
438 |             for (let sj = si + 1; sj < seqIndices.length && (charTypes[seqIndices[sj]] & BN_LIKE_TYPES); sj++) {
439 |               changeCharType(seqIndices[sj], TYPE_ON)
440 |             }
441 |           }
442 |         }
443 |       }
444 | 
445 |       // W7. Search backward from each instance of a European number until the first strong type (R, L, or sos)
446 |       // is found. If an L is found, then change the type of the European number to L.
447 |       // NOTE: implemented in single forward pass for efficiency
448 |       if (charTypeCounts.get(TYPE_EN)) {
449 |         for (let si = 0, prevStrongType = sosType; si < seqIndices.length; si++) {
450 |           const i = seqIndices[si]
451 |           const type = charTypes[i]
452 |           if (type & TYPE_EN) {
453 |             if (prevStrongType === TYPE_L) {
454 |               changeCharType(i, TYPE_L)
455 |             }
456 |           } else if (type & STRONG_TYPES) {
457 |             prevStrongType = type
458 |           }
459 |         }
460 |       }
461 | 
462 |       // === 3.3.5 Resolving Neutral and Isolate Formatting Types ===
463 | 
464 |       if (charTypeCounts.get(NEUTRAL_ISOLATE_TYPES)) {
465 |         // N0. Process bracket pairs in an isolating run sequence sequentially in the logical order of the text
466 |         // positions of the opening paired brackets using the logic given below. Within this scope, bidirectional
467 |         // types EN and AN are treated as R.
468 |         const R_TYPES_FOR_N_STEPS = (TYPE_R | TYPE_EN | TYPE_AN)
469 |         const STRONG_TYPES_FOR_N_STEPS = R_TYPES_FOR_N_STEPS | TYPE_L
470 | 
471 |         // * Identify the bracket pairs in the current isolating run sequence according to BD16.
472 |         const bracketPairs = []
473 |         {
474 |           const openerStack = []
475 |           for (let si = 0; si < seqIndices.length; si++) {
476 |             // NOTE: for any potential bracket character we also test that it still carries a NI
477 |             // type, as that may have been changed earlier. This doesn't seem to be explicitly
478 |             // called out in the spec, but is required for passage of certain tests.
479 |             if (charTypes[seqIndices[si]] & NEUTRAL_ISOLATE_TYPES) {
480 |               const char = string[seqIndices[si]]
481 |               let oppositeBracket
482 |               // Opening bracket
483 |               if (openingToClosingBracket(char) !== null) {
484 |                 if (openerStack.length < 63) {
485 |                   openerStack.push({ char, seqIndex: si })
486 |                 } else {
487 |                   break
488 |                 }
489 |               }
490 |               // Closing bracket
491 |               else if ((oppositeBracket = closingToOpeningBracket(char)) !== null) {
492 |                 for (let stackIdx = openerStack.length - 1; stackIdx >= 0; stackIdx--) {
493 |                   const stackChar = openerStack[stackIdx].char
494 |                   if (stackChar === oppositeBracket ||
495 |                     stackChar === closingToOpeningBracket(getCanonicalBracket(char)) ||
496 |                     openingToClosingBracket(getCanonicalBracket(stackChar)) === char
497 |                   ) {
498 |                     bracketPairs.push([openerStack[stackIdx].seqIndex, si])
499 |                     openerStack.length = stackIdx //pop the matching bracket and all following
500 |                     break
501 |                   }
502 |                 }
503 |               }
504 |             }
505 |           }
506 |           bracketPairs.sort((a, b) => a[0] - b[0])
507 |         }
508 |         // * For each bracket-pair element in the list of pairs of text positions
509 |         for (let pairIdx = 0; pairIdx < bracketPairs.length; pairIdx++) {
510 |           const [openSeqIdx, closeSeqIdx] = bracketPairs[pairIdx]
511 |           // a. Inspect the bidirectional types of the characters enclosed within the bracket pair.
512 |           // b. If any strong type (either L or R) matching the embedding direction is found, set the type for both
513 |           // brackets in the pair to match the embedding direction.
514 |           let foundStrongType = false
515 |           let useStrongType = 0
516 |           for (let si = openSeqIdx + 1; si < closeSeqIdx; si++) {
517 |             const i = seqIndices[si]
518 |             if (charTypes[i] & STRONG_TYPES_FOR_N_STEPS) {
519 |               foundStrongType = true
520 |               const lr = (charTypes[i] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L
521 |               if (lr === embedDirection) {
522 |                 useStrongType = lr
523 |                 break
524 |               }
525 |             }
526 |           }
527 |           // c. Otherwise, if there is a strong type it must be opposite the embedding direction. Therefore, test
528 |           // for an established context with a preceding strong type by checking backwards before the opening paired
529 |           // bracket until the first strong type (L, R, or sos) is found.
530 |           //    1. If the preceding strong type is also opposite the embedding direction, context is established, so
531 |           //    set the type for both brackets in the pair to that direction.
532 |           //    2. Otherwise set the type for both brackets in the pair to the embedding direction.
533 |           if (foundStrongType && !useStrongType) {
534 |             useStrongType = sosType
535 |             for (let si = openSeqIdx - 1; si >= 0; si--) {
536 |               const i = seqIndices[si]
537 |               if (charTypes[i] & STRONG_TYPES_FOR_N_STEPS) {
538 |                 const lr = (charTypes[i] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L
539 |                 if (lr !== embedDirection) {
540 |                   useStrongType = lr
541 |                 } else {
542 |                   useStrongType = embedDirection
543 |                 }
544 |                 break
545 |               }
546 |             }
547 |           }
548 |           if (useStrongType) {
549 |             charTypes[seqIndices[openSeqIdx]] = charTypes[seqIndices[closeSeqIdx]] = useStrongType
550 |             // * Any number of characters that had original bidirectional character type NSM prior to the application
551 |             // of W1 that immediately follow a paired bracket which changed to L or R under N0 should change to match
552 |             // the type of their preceding bracket.
553 |             if (useStrongType !== embedDirection) {
554 |               for (let si = openSeqIdx + 1; si < seqIndices.length; si++) {
555 |                 if (!(charTypes[seqIndices[si]] & BN_LIKE_TYPES)) {
556 |                   if (getBidiCharType(string[seqIndices[si]]) & TYPE_NSM) {
557 |                     charTypes[seqIndices[si]] = useStrongType
558 |                   }
559 |                   break
560 |                 }
561 |               }
562 |             }
563 |             if (useStrongType !== embedDirection) {
564 |               for (let si = closeSeqIdx + 1; si < seqIndices.length; si++) {
565 |                 if (!(charTypes[seqIndices[si]] & BN_LIKE_TYPES)) {
566 |                   if (getBidiCharType(string[seqIndices[si]]) & TYPE_NSM) {
567 |                     charTypes[seqIndices[si]] = useStrongType
568 |                   }
569 |                   break
570 |                 }
571 |               }
572 |             }
573 |           }
574 |         }
575 | 
576 |         // N1. A sequence of NIs takes the direction of the surrounding strong text if the text on both sides has the
577 |         // same direction.
578 |         // N2. Any remaining NIs take the embedding direction.
579 |         for (let si = 0; si < seqIndices.length; si++) {
580 |           if (charTypes[seqIndices[si]] & NEUTRAL_ISOLATE_TYPES) {
581 |             let niRunStart = si, niRunEnd = si
582 |             let prevType = sosType //si === 0 ? sosType : (charTypes[seqIndices[si - 1]] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L
583 |             for (let si2 = si - 1; si2 >= 0; si2--) {
584 |               if (charTypes[seqIndices[si2]] & BN_LIKE_TYPES) {
585 |                 niRunStart = si2 //5.2 treat BNs adjacent to NIs as NIs
586 |               } else {
587 |                 prevType = (charTypes[seqIndices[si2]] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L
588 |                 break
589 |               }
590 |             }
591 |             let nextType = eosType
592 |             for (let si2 = si + 1; si2 < seqIndices.length; si2++) {
593 |               if (charTypes[seqIndices[si2]] & (NEUTRAL_ISOLATE_TYPES | BN_LIKE_TYPES)) {
594 |                 niRunEnd = si2
595 |               } else {
596 |                 nextType = (charTypes[seqIndices[si2]] & R_TYPES_FOR_N_STEPS) ? TYPE_R : TYPE_L
597 |                 break
598 |               }
599 |             }
600 |             for (let sj = niRunStart; sj <= niRunEnd; sj++) {
601 |               charTypes[seqIndices[sj]] = prevType === nextType ? prevType : embedDirection
602 |             }
603 |             si = niRunEnd
604 |           }
605 |         }
606 |       }
607 |     }
608 | 
609 |     // === 3.3.6 Resolving Implicit Levels ===
610 | 
611 |     for (let i = paragraph.start; i <= paragraph.end; i++) {
612 |       const level = embedLevels[i]
613 |       const type = charTypes[i]
614 |       // I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level.
615 |       if (level & 1) {
616 |         if (type & (TYPE_L | TYPE_EN | TYPE_AN)) {
617 |           embedLevels[i]++
618 |         }
619 |       }
620 |         // I1. For all characters with an even (left-to-right) embedding level, those of type R go up one level
621 |       // and those of type AN or EN go up two levels.
622 |       else {
623 |         if (type & TYPE_R) {
624 |           embedLevels[i]++
625 |         } else if (type & (TYPE_AN | TYPE_EN)) {
626 |           embedLevels[i] += 2
627 |         }
628 |       }
629 | 
630 |       // 5.2: Resolve any LRE, RLE, LRO, RLO, PDF, or BN to the level of the preceding character if there is one,
631 |       // and otherwise to the base level.
632 |       if (type & BN_LIKE_TYPES) {
633 |         embedLevels[i] = i === 0 ? paragraph.level : embedLevels[i - 1]
634 |       }
635 | 
636 |       // 3.4 L1.1-4: Reset the embedding level of segment/paragraph separators, and any sequence of whitespace or
637 |       // isolate formatting characters preceding them or the end of the paragraph, to the paragraph level.
638 |       // NOTE: this will also need to be applied to each individual line ending after line wrapping occurs.
639 |       if (i === paragraph.end || getBidiCharType(string[i]) & (TYPE_S | TYPE_B)) {
640 |         for (let j = i; j >= 0 && (getBidiCharType(string[j]) & TRAILING_TYPES); j--) {
641 |           embedLevels[j] = paragraph.level
642 |         }
643 |       }
644 |     }
645 |   }
646 | 
647 |   // DONE! The resolved levels can then be used, after line wrapping, to flip runs of characters
648 |   // according to section 3.4 Reordering Resolved Levels
649 |   return {
650 |     levels: embedLevels,
651 |     paragraphs
652 |   }
653 | 
654 |   function determineAutoEmbedLevel (start, isFSI) {
655 |     // 3.3.1 P2 - P3
656 |     for (let i = start; i < string.length; i++) {
657 |       const charType = charTypes[i]
658 |       if (charType & (TYPE_R | TYPE_AL)) {
659 |         return 1
660 |       }
661 |       if ((charType & (TYPE_B | TYPE_L)) || (isFSI && charType === TYPE_PDI)) {
662 |         return 0
663 |       }
664 |       if (charType & ISOLATE_INIT_TYPES) {
665 |         const pdi = indexOfMatchingPDI(i)
666 |         i = pdi === -1 ? string.length : pdi
667 |       }
668 |     }
669 |     return 0
670 |   }
671 | 
672 |   function indexOfMatchingPDI (isolateStart) {
673 |     // 3.1.2 BD9
674 |     let isolationLevel = 1
675 |     for (let i = isolateStart + 1; i < string.length; i++) {
676 |       const charType = charTypes[i]
677 |       if (charType & TYPE_B) {
678 |         break
679 |       }
680 |       if (charType & TYPE_PDI) {
681 |         if (--isolationLevel === 0) {
682 |           return i
683 |         }
684 |       } else if (charType & ISOLATE_INIT_TYPES) {
685 |         isolationLevel++
686 |       }
687 |     }
688 |     return -1
689 |   }
690 | }
691 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | export { getEmbeddingLevels } from './embeddingLevels.js'
2 | export { getReorderSegments, getReorderedIndices, getReorderedString } from './reordering.js'
3 | export { getBidiCharType, getBidiCharTypeName } from './charTypes.js'
4 | export { getMirroredCharacter, getMirroredCharactersMap } from './mirroring.js'
5 | export { closingToOpeningBracket, openingToClosingBracket, getCanonicalBracket } from './brackets.js'
6 | 


--------------------------------------------------------------------------------
/src/mirroring.js:
--------------------------------------------------------------------------------
 1 | import data from './data/bidiMirroring.data.js'
 2 | import { parseCharacterMap } from './util/parseCharacterMap.js'
 3 | 
 4 | let mirrorMap
 5 | 
 6 | function parse () {
 7 |   if (!mirrorMap) {
 8 |     //const start = performance.now()
 9 |     const { map, reverseMap } = parseCharacterMap(data, true)
10 |     // Combine both maps into one
11 |     reverseMap.forEach((value, key) => {
12 |       map.set(key, value)
13 |     })
14 |     mirrorMap = map
15 |     //console.log(`mirrored chars parsed in ${performance.now() - start}ms`)
16 |   }
17 | }
18 | 
19 | export function getMirroredCharacter (char) {
20 |   parse()
21 |   return mirrorMap.get(char) || null
22 | }
23 | 
24 | /**
25 |  * Given a string and its resolved embedding levels, build a map of indices to replacement chars
26 |  * for any characters in right-to-left segments that have defined mirrored characters.
27 |  * @param string
28 |  * @param embeddingLevels
29 |  * @param [start]
30 |  * @param [end]
31 |  * @return {Map<number, string>}
32 |  */
33 | export function getMirroredCharactersMap(string, embeddingLevels, start, end) {
34 |   let strLen = string.length
35 |   start = Math.max(0, start == null ? 0 : +start)
36 |   end = Math.min(strLen - 1, end == null ? strLen - 1 : +end)
37 | 
38 |   const map = new Map()
39 |   for (let i = start; i <= end; i++) {
40 |     if (embeddingLevels[i] & 1) { //only odd (rtl) levels
41 |       const mirror = getMirroredCharacter(string[i])
42 |       if (mirror !== null) {
43 |         map.set(i, mirror)
44 |       }
45 |     }
46 |   }
47 |   return map
48 | }
49 | 


--------------------------------------------------------------------------------
/src/reordering.js:
--------------------------------------------------------------------------------
  1 | import { getBidiCharType, TRAILING_TYPES } from './charTypes.js'
  2 | import { getMirroredCharacter } from './mirroring.js'
  3 | 
  4 | /**
  5 |  * Given a start and end denoting a single line within a string, and a set of precalculated
  6 |  * bidi embedding levels, produce a list of segments whose ordering should be flipped, in sequence.
  7 |  * @param {string} string - the full input string
  8 |  * @param {GetEmbeddingLevelsResult} embeddingLevelsResult - the result object from getEmbeddingLevels
  9 |  * @param {number} [start] - first character in a subset of the full string
 10 |  * @param {number} [end] - last character in a subset of the full string
 11 |  * @return {number[][]} - the list of start/end segments that should be flipped, in order.
 12 |  */
 13 | export function getReorderSegments(string, embeddingLevelsResult, start, end) {
 14 |   let strLen = string.length
 15 |   start = Math.max(0, start == null ? 0 : +start)
 16 |   end = Math.min(strLen - 1, end == null ? strLen - 1 : +end)
 17 | 
 18 |   const segments = []
 19 |   embeddingLevelsResult.paragraphs.forEach(paragraph => {
 20 |     const lineStart = Math.max(start, paragraph.start)
 21 |     const lineEnd = Math.min(end, paragraph.end)
 22 |     if (lineStart < lineEnd) {
 23 |       // Local slice for mutation
 24 |       const lineLevels = embeddingLevelsResult.levels.slice(lineStart, lineEnd + 1)
 25 | 
 26 |       // 3.4 L1.4: Reset any sequence of whitespace characters and/or isolate formatting characters at the
 27 |       // end of the line to the paragraph level.
 28 |       for (let i = lineEnd; i >= lineStart && (getBidiCharType(string[i]) & TRAILING_TYPES); i--) {
 29 |         lineLevels[i] = paragraph.level
 30 |       }
 31 | 
 32 |       // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels
 33 |       // not actually present in the text, reverse any contiguous sequence of characters that are at that level or higher.
 34 |       let maxLevel = paragraph.level
 35 |       let minOddLevel = Infinity
 36 |       for (let i = 0; i < lineLevels.length; i++) {
 37 |         const level = lineLevels[i]
 38 |         if (level > maxLevel) maxLevel = level
 39 |         if (level < minOddLevel) minOddLevel = level | 1
 40 |       }
 41 |       for (let lvl = maxLevel; lvl >= minOddLevel; lvl--) {
 42 |         for (let i = 0; i < lineLevels.length; i++) {
 43 |           if (lineLevels[i] >= lvl) {
 44 |             const segStart = i
 45 |             while (i + 1 < lineLevels.length && lineLevels[i + 1] >= lvl) {
 46 |               i++
 47 |             }
 48 |             if (i > segStart) {
 49 |               segments.push([segStart + lineStart, i + lineStart])
 50 |             }
 51 |           }
 52 |         }
 53 |       }
 54 |     }
 55 |   })
 56 |   return segments
 57 | }
 58 | 
 59 | /**
 60 |  * @param {string} string
 61 |  * @param {GetEmbeddingLevelsResult} embedLevelsResult
 62 |  * @param {number} [start]
 63 |  * @param {number} [end]
 64 |  * @return {string} the new string with bidi segments reordered
 65 |  */
 66 | export function getReorderedString(string, embedLevelsResult, start, end) {
 67 |   const indices = getReorderedIndices(string, embedLevelsResult, start, end)
 68 |   const chars = [...string]
 69 |   indices.forEach((charIndex, i) => {
 70 |     chars[i] = (
 71 |       (embedLevelsResult.levels[charIndex] & 1) ? getMirroredCharacter(string[charIndex]) : null
 72 |     ) || string[charIndex]
 73 |   })
 74 |   return chars.join('')
 75 | }
 76 | 
 77 | /**
 78 |  * @param {string} string
 79 |  * @param {GetEmbeddingLevelsResult} embedLevelsResult
 80 |  * @param {number} [start]
 81 |  * @param {number} [end]
 82 |  * @return {number[]} an array with character indices in their new bidi order
 83 |  */
 84 | export function getReorderedIndices(string, embedLevelsResult, start, end) {
 85 |   const segments = getReorderSegments(string, embedLevelsResult, start, end)
 86 |   // Fill an array with indices
 87 |   const indices = []
 88 |   for (let i = 0; i < string.length; i++) {
 89 |     indices[i] = i
 90 |   }
 91 |   // Reverse each segment in order
 92 |   segments.forEach(([start, end]) => {
 93 |     const slice = indices.slice(start, end + 1)
 94 |     for (let i = slice.length; i--;) {
 95 |       indices[end - i] = slice[i]
 96 |     }
 97 |   })
 98 |   return indices
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/util/parseCharacterMap.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Parses an string that holds encoded codepoint mappings, e.g. for bracket pairs or
 3 |  * mirroring characters, as encoded by scripts/generateBidiData.js. Returns an object
 4 |  * holding the `map`, and optionally a `reverseMap` if `includeReverse:true`.
 5 |  * @param {string} encodedString
 6 |  * @param {boolean} includeReverse - true if you want reverseMap in the output
 7 |  * @return {{map: Map<number, number>, reverseMap?: Map<number, number>}}
 8 |  */
 9 | export function parseCharacterMap (encodedString, includeReverse) {
10 |   const radix = 36
11 |   let lastCode = 0
12 |   const map = new Map()
13 |   const reverseMap = includeReverse && new Map()
14 |   let prevPair
15 |   encodedString.split(',').forEach(function visit(entry) {
16 |     if (entry.indexOf('+') !== -1) {
17 |       for (let i = +entry; i--;) {
18 |         visit(prevPair)
19 |       }
20 |     } else {
21 |       prevPair = entry
22 |       let [a, b] = entry.split('>')
23 |       a = String.fromCodePoint(lastCode += parseInt(a, radix))
24 |       b = String.fromCodePoint(lastCode += parseInt(b, radix))
25 |       map.set(a, b)
26 |       includeReverse && reverseMap.set(b, a)
27 |     }
28 |   })
29 |   return { map, reverseMap }
30 | }
31 | 


--------------------------------------------------------------------------------
/test/BidiCharacterTest.js:
--------------------------------------------------------------------------------
 1 | const { readFileSync } = require('fs')
 2 | const path = require('path')
 3 | const { performance } = require('perf_hooks')
 4 | 
 5 | module.exports.runBidiCharacterTest = function (bidi) {
 6 |   const text = readFileSync(path.join(__dirname, './BidiCharacterTest.txt'), 'utf-8')
 7 |   const lines = text.split('\n')
 8 | 
 9 |   const BAIL_COUNT = 10
10 | 
11 |   let testFilter = null
12 |   // testFilter = (lineNum, dir) => lineNum === 65 && dir === 'auto'
13 | 
14 |   let testCount = 0
15 |   let passCount = 0
16 |   let failCount = 0
17 |   let totalTime = 0
18 | 
19 |   lines.forEach((line, lineIdx) => {
20 |     if (line && !line.startsWith('#')) {
21 |       let [input, paraDir, , expectedLevels, expectedOrder] = line.split(';')
22 | 
23 |       const inputOrig = input
24 |       input = input.split(' ').map(d => String.fromCodePoint(parseInt(d, 16))).join('')
25 |       paraDir = paraDir === '0' ? 'ltr' : paraDir === '1' ? 'rtl' : 'auto'
26 | 
27 |       if (testFilter && testFilter(lineIdx + 1, paraDir) === false) return
28 | 
29 |       expectedLevels = expectedLevels.split(' ').map(s => s === 'x' ? s : parseInt(s, 10))
30 |       expectedOrder = expectedOrder.split(' ').map(s => parseInt(s, 10))
31 | 
32 |       const start = performance.now()
33 |       const embedLevelsResult = bidi.getEmbeddingLevels(input, paraDir)
34 |       const {levels, paragraphs} = embedLevelsResult
35 |       let reordered = bidi.getReorderedIndices(input, embedLevelsResult)
36 |       totalTime += performance.now() - start
37 | 
38 |       reordered = reordered.filter(i => expectedLevels[i] !== 'x') //those with indeterminate level are ommitted
39 | 
40 |       let ok = expectedLevels.length === levels.length && paragraphs.length === 1
41 |       if (ok) {
42 |         for (let i = 0; i < expectedLevels.length; i++) {
43 |           if (expectedLevels[i] !== 'x' && expectedLevels[i] !== levels[i]) {
44 |             ok = false
45 |             break
46 |           }
47 |         }
48 |       }
49 |       if (ok) {
50 |         for (let i = 0; i < reordered.length; i++) {
51 |           if (reordered[i] !== expectedOrder[i]) {
52 |             ok = false
53 |             break
54 |           }
55 |         }
56 |       }
57 | 
58 |       testCount++
59 |       if (ok) {
60 |         passCount++
61 |       } else {
62 |         if (++failCount <= BAIL_COUNT) {
63 |           const types = input.split('').map(ch => bidi.getBidiCharTypeName(ch))
64 |           console.error(`Test on line ${lineIdx + 1}, direction "${paraDir}":
65 |   Input codes:     ${inputOrig}
66 |   Input Types:     ${mapToColumns(types, 5)}
67 |   Expected levels: ${mapToColumns(expectedLevels, 5)}
68 |   Received levels: ${mapToColumns(levels, 5)}
69 |   Expected order:  ${mapToColumns(expectedOrder, 4)}
70 |   Received order:  ${mapToColumns(reordered, 4)}`)
71 |           //  Chars:    ${mapToColumns(input.split(''), 5)}
72 |         }
73 |       }
74 | 
75 |     }
76 |   })
77 | 
78 |   let message = `Bidi Character Tests: ${testCount} total, ${passCount} passed, ${failCount} failed`
79 |   if (failCount >= BAIL_COUNT) {
80 |     message += ` (only first ${BAIL_COUNT} failures shown)`
81 |   }
82 |   message += `\n    ${totalTime.toFixed(4)}ms total, ${(totalTime / testCount).toFixed(4)}ms average`
83 | 
84 |   console.log(message)
85 | 
86 |   return failCount ? 1 : 0
87 | }
88 | 
89 | function mapToColumns (values, colSize) {
90 |   return [...values].map(v => `${v}`.padEnd(colSize)).join('')
91 | }
92 | 


--------------------------------------------------------------------------------
/test/BidiTest.js:
--------------------------------------------------------------------------------
  1 | const { readFileSync } = require('fs')
  2 | const path = require('path')
  3 | const { performance } = require('perf_hooks')
  4 | 
  5 | module.exports.runBidiTest = function (bidi) {
  6 |   const text = readFileSync(path.join(__dirname, './BidiTest.txt'), 'utf-8')
  7 |   let lines = text.split('\n')
  8 | 
  9 |   const BAIL_COUNT = 10
 10 | 
 11 |   let testFilter = null
 12 |   // testFilter = (lineNum, dir) => lineNum === 7187 && dir === 'ltr'
 13 | 
 14 |   const paraDirBits = ['auto', 'ltr', 'rtl']
 15 | 
 16 |   const CLASS_REPS = {
 17 |     L: '\u0041',
 18 |     R: '\u05D0',
 19 |     EN: '\u0030',
 20 |     ES: '\u002B',
 21 |     ET: '\u0023',
 22 |     AN: '\u0660',
 23 |     CS: '\u002C',
 24 |     B: '\u2029',
 25 |     S: '\u0009',
 26 |     WS: '\u0020',
 27 |     ON: '\u0021',
 28 |     BN: '\u00AD',
 29 |     NSM: '\u036F',
 30 |     AL: '\u06D5',
 31 |     LRO: '\u202D',
 32 |     RLO: '\u202E',
 33 |     LRE: '\u202A',
 34 |     RLE: '\u202B',
 35 |     PDF: '\u202C',
 36 |     LRI: '\u2066',
 37 |     RLI: '\u2067',
 38 |     FSI: '\u2068',
 39 |     PDI: '\u2069'
 40 |   }
 41 | 
 42 |   let expectedLevels
 43 |   let expectedOrder
 44 | 
 45 |   let testCount = 0
 46 |   let passCount = 0
 47 |   let failCount = 0
 48 |   let totalTime = 0
 49 | 
 50 |   lines.forEach((line, lineIdx) => {
 51 |     if (line && !line.startsWith('#')) {
 52 |       let match = line.match(/^@(Levels|Reorder):\s*(.*)$/)
 53 |       if (match) {
 54 |         const values = match[2].trim() ? match[2].trim().split(/\s+/).map(s => s === 'x' ? s : parseInt(s, 10)) : []
 55 |         if (match[1] === 'Levels') {
 56 |           expectedLevels = values
 57 |         } else if (match[1] === 'Reorder') {
 58 |           expectedOrder = values
 59 |         }
 60 |         return
 61 |       }
 62 | 
 63 |       let [types, paraDirs] = line.split(/\s*;\s*/)
 64 | 
 65 |       types = types.trim().split(/\s+/)
 66 |       const inputString = types.map(type => CLASS_REPS[type]).join('')
 67 | 
 68 |       paraDirs = parseInt(paraDirs.trim(), 10)
 69 |       paraDirs = paraDirBits.filter((dirString, i) => paraDirs & (1 << i))
 70 | 
 71 |       for (let paraDir of paraDirs) {
 72 |         if (testFilter && testFilter(lineIdx + 1, paraDir) === false) continue
 73 | 
 74 |         const start = performance.now()
 75 |         const embedLevelsResult = bidi.getEmbeddingLevels(inputString, paraDir)
 76 |         const {levels, paragraphs} = embedLevelsResult
 77 |         let reordered = bidi.getReorderedIndices(inputString, embedLevelsResult)
 78 |         totalTime += performance.now() - start
 79 |         reordered = reordered.filter(i => expectedLevels[i] !== 'x') //those with indeterminate level are ommitted
 80 | 
 81 |         let ok = expectedLevels.length === levels.length && paragraphs.length === 1
 82 |         if (ok) {
 83 |           for (let i = 0; i < expectedLevels.length; i++) {
 84 |             if (expectedLevels[i] !== 'x' && expectedLevels[i] !== levels[i]) {
 85 |               ok = false
 86 |               break
 87 |             }
 88 |           }
 89 |         }
 90 |         if (ok) {
 91 |           for (let i = 0; i < reordered.length; i++) {
 92 |             if (reordered[i] !== expectedOrder[i]) {
 93 |               ok = false
 94 |               break
 95 |             }
 96 |           }
 97 |         }
 98 | 
 99 |         testCount++
100 |         if (ok) {
101 |           passCount++
102 |         } else {
103 |           if (failCount++ <= BAIL_COUNT) {
104 |             console.error(`Test on line ${lineIdx + 1}, direction "${paraDir}":
105 |   Input Types:     ${mapToColumns(types, 5)}
106 |   Expected levels: ${mapToColumns(expectedLevels, 5)}
107 |   Received levels: ${mapToColumns(levels, 5)}
108 |   Expected order:  ${mapToColumns(expectedOrder, 3)}
109 |   Received order:  ${mapToColumns(reordered, 3)}`)
110 |           }
111 |         }
112 |       }
113 |     }
114 |   })
115 | 
116 |   let message = `Bidi Tests: ${testCount} total, ${passCount} passed, ${failCount} failed`
117 |   if (failCount >= BAIL_COUNT) {
118 |     message += ` (only first ${BAIL_COUNT} failures shown)`
119 |   }
120 |   message += `\n    ${totalTime.toFixed(4)}ms total, ${(totalTime / testCount).toFixed(4)}ms average`
121 | 
122 |   console.log(message)
123 | 
124 |   return failCount ? 1 : 0
125 | }
126 | 
127 | function mapToColumns (values, colSize) {
128 |   return [...values].map(v => `${v}`.padEnd(colSize)).join('')
129 | }
130 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
1 | # Bidirectional Algorithm Tests
2 | 
3 | The files here execute both [conformance test suites](https://unicode.org/reports/tr9/#Bidi_Conformance_Testing) provided with the bidi spec. The `.txt` files are copied directly from the [unicode data](https://www.unicode.org/Public/13.0.0/ucd/) site. Execute the tests from the project root via `npm run test`. 
4 | 


--------------------------------------------------------------------------------
/test/runTestsOnBuild.js:
--------------------------------------------------------------------------------
 1 | const { runBidiTest } = require('./BidiTest.js')
 2 | const { runBidiCharacterTest } = require('./BidiCharacterTest.js')
 3 | 
 4 | const { transformFileSync } = require("@babel/core")
 5 | const requireFromString = require('require-from-string')
 6 | 
 7 | /*
 8 |  This runs the built dist file through Babel with preset-env for ES5 support,
 9 |  and runs the test suite on the result, to verify that Babel is not injecting calls to
10 |  external polyfills or other helper functions, which would break the factory
11 |  function's ability to be stringified and rehydrated in a worker.
12 | 
13 |  Assumes `npm run build` has been executed and files in dist/ are up to date.
14 | */
15 | const { code } = transformFileSync('dist/bidi.js', {
16 |   presets: ['@babel/preset-env'] //ES5 by default
17 | })
18 | const bidiFactory = requireFromString(code, 'babelified-bidi.js')
19 | const bidi = bidiFactory()
20 | 
21 | console.log('Running test suite on build file...')
22 | const results = [
23 |   runBidiTest(bidi),
24 |   runBidiCharacterTest(bidi)
25 | ]
26 | 
27 | process.exit(Math.max(...results))
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/test/runTestsOnSrc.js:
--------------------------------------------------------------------------------
 1 | import { runBidiTest } from './BidiTest.js'
 2 | import { runBidiCharacterTest } from './BidiCharacterTest.js'
 3 | 
 4 | import * as bidiFromSrc from '../src/index.js'
 5 | 
 6 | console.log('Running test suite on src files...')
 7 | 
 8 | const results = [
 9 |   runBidiTest(bidiFromSrc),
10 |   runBidiCharacterTest(bidiFromSrc)
11 | ]
12 | 
13 | process.exit(Math.max(...results))
14 | 


--------------------------------------------------------------------------------