├── .editorconfig
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── workflow-1.yml
├── .gitattributes
├── lib
    ├── dehex.js
    ├── general-category.js
    ├── ucd.js
    ├── east-asian-width.js
    ├── word-break.js
    ├── canonical-combining-class.js
    ├── normalization-properties.js
    ├── index.js
    └── index.spec.mjs
├── CHANGELOG.md
├── .gitignore
├── LICENSE.txt
├── package.json
└── README.md


/.editorconfig:
--------------------------------------------------------------------------------
1 | [*]
2 | insert_final_newline = true
3 | charset = utf-8
4 | indent_style = space
5 | indent_size = 2
6 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: 'npm'
 4 |     directory: '/'
 5 |     schedule:
 6 |       interval: 'monthly'
 7 |     ignore:
 8 |     - dependency-name: '*'
 9 |       update-types:
10 |       - 'version-update:semver-minor'
11 |       - 'version-update:semver-patch'
12 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/lib/dehex.js:
--------------------------------------------------------------------------------
 1 | // Returns an array of code points
 2 | // e.g. "0000" becomes `[0]`
 3 | // e.g. "0000..000A" becomes `[0, ..., 10]`
 4 | export default str => {
 5 |   const components = str.split('..')
 6 |   if (components.length !== 1 && components.length !== 2) {
 7 |     throw new Error('Could not dehex this string')
 8 |   }
 9 |   const lower = parseInt(components[0], 16)
10 |   const upper = components.length === 2 ? parseInt(components[1], 16) : lower
11 |   const codePoints = []
12 |   for (let codePoint = lower; codePoint <= upper; codePoint++) {
13 |     codePoints.push(codePoint)
14 |   }
15 |   return codePoints
16 | }
17 | 


--------------------------------------------------------------------------------
/.github/workflows/workflow-1.yml:
--------------------------------------------------------------------------------
 1 | name: 'Travis CI replacement'
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |     - '**'
 7 | 
 8 | jobs:
 9 |   build-job:
10 |     runs-on: 'ubuntu-latest'
11 | 
12 |     strategy:
13 |       matrix:
14 |         node-version: ['14.x', '16.x', '18.x', '20.x', '22.x', '24.x']
15 | 
16 |     steps:
17 |     - uses: 'actions/checkout@v2'
18 | 
19 |     - name: 'Use Node.js ${{ matrix.node-version }}'
20 |       uses: 'actions/setup-node@v1'
21 |       with:
22 |         node-version: '${{ matrix.node-version }}'
23 | 
24 |     - name: 'Actual npm tasks'
25 |       run: |
26 |         npm install
27 |         npm run test
28 | 


--------------------------------------------------------------------------------
/lib/general-category.js:
--------------------------------------------------------------------------------
 1 | /** UCD General_Category resources. */
 2 | 
 3 | import dehex from './dehex.js'
 4 | 
 5 | /** Get GC for a specific code point */
 6 | export default data => {
 7 |   const byCodePoint = {}
 8 | 
 9 |   data.forEach(function (row) {
10 |     const codePoints = dehex(row[0])
11 |     const gc = row[1] // E.g. "Lo", "Po"
12 | 
13 |     codePoints.forEach(function (codePoint) {
14 |       if (codePoint in byCodePoint) {
15 |         throw new Error(codePoint)
16 |       }
17 |       byCodePoint[codePoint] = gc
18 |     })
19 |   })
20 | 
21 |   return codePoint => {
22 |     if (!(codePoint in byCodePoint)) {
23 |       throw new Error('General_Category unknown for ' + String(codePoint))
24 |     }
25 |     return byCodePoint[codePoint]
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/lib/ucd.js:
--------------------------------------------------------------------------------
 1 | // This module reads in most kinds of Unicode Character Data set text files.
 2 | // Comments (marked with a #) are stripped, empty lines are removed. The
 3 | // remaining lines are divided up into fields on the semicolon delimiter, and
 4 | // the fields have whitespace trimmed.
 5 | 
 6 | import nodeFetch from 'node-fetch'
 7 | 
 8 | export const get = async (version, name) => {
 9 |   const url = `http://www.unicode.org/Public/${version}/ucd/${name}`
10 |   const res = await nodeFetch(url)
11 |   if (!res.ok) {
12 |     throw Error(`${res.status} while fetching ${url}`)
13 |   }
14 |   const body = await res.text()
15 |   return body
16 |     .split(/\r?\n/)
17 |     .map(line => line.replace(/^(.*?)(?:#.*)?$/, '$1'))
18 |     .filter(line => line !== '')
19 |     .map(line => line.split(';').map(field => field.trim()))
20 | }
21 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | ## 3.x.x
 4 | 
 5 | `safe-code-point` now uses ES modules, not CommonJS modules.
 6 | 
 7 | ## 2.x.x
 8 | 
 9 | The API of `safe-code-point` has completely changed to asynchronously look up Unicode code point data. Code like:
10 | 
11 | ```js
12 | const safeCodePoint = require('safe-code-point')
13 | 
14 | const a = safeCodePoint(codePoint, '11.0')
15 | const b = safeCodePoint.generalCategory(codePoint, '11.0')
16 | ```
17 | 
18 | should change to something like:
19 | 
20 | ```js
21 | const SafeCodePoint = require('safe-code-point')
22 | 
23 | SafeCodePoint('11.0.0').then(safeCodePoint => {
24 |   const a = safeCodePoint(codePoint)
25 |   const b = safeCodePoint.generalCategory(codePoint)
26 | })
27 | ```
28 | 
29 | `safeCodePoint.supportedVersions` has been removed.
30 | 
31 | ## 1.x.x
32 | 
33 | Initial release.
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 
49 | # Mine
50 | node_modules
51 | 


--------------------------------------------------------------------------------
/lib/east-asian-width.js:
--------------------------------------------------------------------------------
 1 | // UCD normalization property resources.
 2 | 
 3 | import dehex from './dehex.js'
 4 | 
 5 | /** Get property value for a specific code point, or default if set */
 6 | export default data => {
 7 |   const byCodePoint = {}
 8 | 
 9 |   // Defaults
10 |   for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) {
11 |     byCodePoint[codePoint] = undefined // unknown
12 |   }
13 | 
14 |   data.forEach(function (row) {
15 |     const codePoints = dehex(row[0])
16 |     const eastAsianWidth = row[1] // "A", "F", "H", "N", "Na" or "W"
17 | 
18 |     codePoints.forEach(function (codePoint) {
19 |       byCodePoint[codePoint] = eastAsianWidth
20 |     })
21 |   })
22 | 
23 |   return codePoint => {
24 |     const value = byCodePoint[codePoint]
25 |     if (value === undefined) {
26 |       throw new Error('Code point has no East_Asian_Width specified: ' + String(codePoint))
27 |     }
28 |     return value
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/lib/word-break.js:
--------------------------------------------------------------------------------
 1 | /** UCD Word_Break resources. */
 2 | 
 3 | import dehex from './dehex.js'
 4 | 
 5 | /** Get Word_Break for a specific code point */
 6 | export default data => {
 7 |   const byCodePoint = {}
 8 | 
 9 |   // Defaults
10 |   for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) {
11 |     byCodePoint[codePoint] = undefined
12 |   }
13 | 
14 |   data.forEach(function (row) {
15 |     const codePoints = dehex(row[0])
16 |     const wbp = row[1] // E.g. "Numeric", "ALetter"
17 | 
18 |     codePoints.forEach(function (codePoint) {
19 |       if (codePoint in byCodePoint && byCodePoint[codePoint] !== undefined) {
20 |         throw new Error(codePoint)
21 |       }
22 |       byCodePoint[codePoint] = wbp
23 |     })
24 |   })
25 | 
26 |   return codePoint => {
27 |     if (!(codePoint in byCodePoint)) {
28 |       throw new Error('Word_Break unknown for ' + String(codePoint))
29 |     }
30 |     return byCodePoint[codePoint]
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/lib/canonical-combining-class.js:
--------------------------------------------------------------------------------
 1 | // UCD Canonical_Combining_Class resources.
 2 | 
 3 | import dehex from './dehex.js'
 4 | 
 5 | /** Get CCC for a specific code point */
 6 | export default data => {
 7 |   const byCodePoint = {}
 8 | 
 9 |   // Defaults
10 |   const defaultCcc = 0
11 |   for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) {
12 |     byCodePoint[codePoint] = defaultCcc
13 |   }
14 | 
15 |   data.forEach(function (row) {
16 |     const codePoints = dehex(row[0])
17 |     const ccc = Number(row[1])
18 | 
19 |     codePoints.forEach(function (codePoint) {
20 |       if (codePoint in byCodePoint && byCodePoint[codePoint] !== defaultCcc) {
21 |         throw new Error(codePoint)
22 |       }
23 |       byCodePoint[codePoint] = ccc
24 |     })
25 |   })
26 | 
27 |   return codePoint => {
28 |     if (!(codePoint in byCodePoint)) {
29 |       throw new Error('Canonical_Combining_Class unknown for ' + String(codePoint))
30 |     }
31 |     return byCodePoint[codePoint]
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 qntm
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/normalization-properties.js:
--------------------------------------------------------------------------------
 1 | // UCD normalization property resources.
 2 | 
 3 | import dehex from './dehex.js'
 4 | 
 5 | /** Get property value for a specific code point, or default if set */
 6 | export default data => {
 7 |   const byCodePoint = {}
 8 | 
 9 |   // Defaults
10 |   for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) {
11 |     byCodePoint[codePoint] = {
12 |       NFD_QC: 'Y',
13 |       NFC_QC: 'Y',
14 |       NFKD_QC: 'Y',
15 |       NFKC_QC: 'Y'
16 |     }
17 |   }
18 | 
19 |   data.forEach(function (row) {
20 |     const codePoints = dehex(row[0])
21 |     const property = row[1] // E.g. "NFD_QC"
22 |     const value = row[2] // E.g. "N"
23 | 
24 |     codePoints.forEach(function (codePoint) {
25 |       byCodePoint[codePoint][property] = value
26 |     })
27 |   })
28 | 
29 |   return (codePoint, property) => {
30 |     const properties = byCodePoint[codePoint]
31 |     if (properties === undefined) {
32 |       throw new Error('Unrecognised code point: ' + String(codePoint))
33 |     }
34 |     const value = properties[property]
35 |     if (value === undefined) {
36 |       throw new Error('Unrecognised property: ' + property)
37 |     }
38 |     return value
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "safe-code-point",
 3 |   "version": "3.0.4",
 4 |   "description": "Ascertains whether a Unicode code point is 'safe' for the purposes of encoding binary data",
 5 |   "homepage": "https://github.com/qntm/safe-code-point",
 6 |   "repository": {
 7 |     "type": "git",
 8 |     "url": "git://github.com/qntm/safe-code-point.git"
 9 |   },
10 |   "main": "lib/index.js",
11 |   "type": "module",
12 |   "keywords": [
13 |     "base64",
14 |     "base65536",
15 |     "base32768",
16 |     "base2048",
17 |     "base131072",
18 |     "encoding",
19 |     "unicode",
20 |     "text",
21 |     "decoding",
22 |     "binary"
23 |   ],
24 |   "scripts": {
25 |     "jasmine": "jasmine",
26 |     "standard": "standard --fix",
27 |     "tag": "node -e \"require('child_process').spawn('git', ['tag', `v${require('./package.json').version}`], { stdio: 'inherit' })\"",
28 |     "tag-and-publish": "npm run tag && git push --tags && npm publish && npm version patch --no-git-tag-version && git add . && git commit -m \"Bump patch\" && git push",
29 |     "test": "standard && jasmine \"**/*.spec.mjs\""
30 |   },
31 |   "author": "qntm",
32 |   "dependencies": {
33 |     "node-fetch": "^3.0.0"
34 |   },
35 |   "devDependencies": {
36 |     "jasmine": "^5.0.0",
37 |     "standard": "^17.0.0"
38 |   },
39 |   "files": [
40 |     "lib",
41 |     "!lib/**/*.spec.mjs"
42 |   ],
43 |   "license": "MIT"
44 | }
45 | 


--------------------------------------------------------------------------------
/lib/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |   Ascertain whether a Unicode code point is "safe" for use in a binary encoding.
 3 | */
 4 | 
 5 | import * as ucd from './ucd.js'
 6 | import canonicalCombiningClass from './canonical-combining-class.js'
 7 | import eastAsianWidth from './east-asian-width.js'
 8 | import generalCategory from './general-category.js'
 9 | import normalizationProperties from './normalization-properties.js'
10 | import wordBreak from './word-break.js'
11 | 
12 | const quickChecks = [
13 |   'NFD_QC', // canonical decomposition
14 |   'NFC_QC', // canonical decomposition + canonical composition
15 |   'NFKD_QC', // compatibility decomposition
16 |   'NFKC_QC' // compatibility decomposition + canonical composition
17 | ]
18 | 
19 | // General Categories CONSIDERED SAFE
20 | const DEFAULT_SAFE_CATEGORIES = {
21 |   Ll: true, // Letter, Lowercase
22 |   Lm: true, // Letter, Modifier
23 |   Lo: true, // Letter, Other
24 |   Lt: true, // Letter, Titlecase
25 |   Lu: true, // Letter, Uppercase
26 |   Me: false, // Mark, Enclosing
27 |   Mn: false, // Mark, Nonspacing
28 |   Mc: false, // Mark, Spacing Combining
29 |   Nd: true, // Number, Decimal Digit
30 |   Nl: true, // Number, Letter
31 |   No: true, // Number, Other
32 |   Cc: false, // Other, Control
33 |   Cf: false, // Other, Format
34 |   Cn: false, // Other, Not Assigned (no characters in the file have this property)
35 |   Co: false, // Other, Private Use
36 |   Cs: false, // Other, Surrogate
37 |   Pe: false, // Punctuation, Close
38 |   Pc: false, // Punctuation, Connector
39 |   Pd: false, // Punctuation, Dash
40 |   Pf: false, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
41 |   Pi: false, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
42 |   Ps: false, // Punctuation, Open
43 |   Po: false, // Punctuation, Other
44 |   Zl: false, // Separator, Line
45 |   Zp: false, // Separator, Paragraph
46 |   Zs: false, // Separator, Space
47 |   Sc: true, // Symbol, Currency
48 |   Sm: true, // Symbol, Math
49 |   Sk: true, // Symbol, Modifier
50 |   So: true // Symbol, Other
51 | }
52 | 
53 | export default async (version, { safeCategories = DEFAULT_SAFE_CATEGORIES } = {}) => {
54 |   // First load up a bunch of data
55 |   const cccData = await ucd.get(version, 'extracted/DerivedCombiningClass.txt')
56 |   const eawData = await ucd.get(version, 'EastAsianWidth.txt')
57 |   const gcData = await ucd.get(version, 'extracted/DerivedGeneralCategory.txt')
58 |   const npData = await ucd.get(version, 'DerivedNormalizationProps.txt')
59 |   const wbpData = await ucd.get(version, 'auxiliary/WordBreakProperty.txt')
60 | 
61 |   const ccc = canonicalCombiningClass(cccData)
62 |   const eaw = eastAsianWidth(eawData)
63 |   const gc = generalCategory(gcData)
64 |   const np = normalizationProperties(npData)
65 |   const wb = wordBreak(wbpData)
66 | 
67 |   const safeCodePoint = codePoint => {
68 |     const passesQuickChecks = quickChecks.every(property =>
69 |       np(codePoint, property) === 'Y'
70 |     )
71 |     const inSafeGc = safeCategories[gc(codePoint)] === true
72 |     const hasCcc0 = ccc(codePoint) === 0
73 | 
74 |     return passesQuickChecks && inSafeGc && hasCcc0
75 |   }
76 | 
77 |   safeCodePoint.canonicalCombiningClass = ccc
78 |   safeCodePoint.eastAsianWidth = eaw
79 |   safeCodePoint.generalCategory = gc
80 |   safeCodePoint.normalizationProperties = np
81 |   safeCodePoint.wordBreak = wb
82 | 
83 |   return safeCodePoint
84 | }
85 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # safe-code-point
 2 | 
 3 | Package for determining whether the supplied code point is ["safe"](https://qntm.org/safe). This module (well, code very much like it) was used to generate code points for [Base65536](https://github.com/qntm/base65536), [Base32768](https://github.com/qntm/base32768) and [Base2048](https://github.com/qntm/base2048).
 4 | 
 5 | This package supports ES modules only.
 6 | 
 7 | ## Example
 8 | 
 9 | ```js
10 | import SafeCodePoint from 'safe-code-point'
11 | 
12 | const safeCodePoint = await SafeCodePoint('17.0.0')
13 | const numCodePoints = (1 << 16) + (1 << 20)
14 | 
15 | let numSafeCodePoints = 0
16 | for (let codePoint = 0; codePoint < numCodePoints; codePoint++) {
17 |   if (safeCodePoint(codePoint)) {
18 |     numSafeCodePoints++
19 |   }
20 | }
21 | 
22 | console.log(numSafeCodePoints)
23 | ```
24 | 
25 | ## API
26 | 
27 | ### SafeCodePoint(version, options)
28 | 
29 | Returns a promise which resolves to a `safeCodePoint` function for the supplied version of Unicode.
30 | 
31 | Data is fetched from [the Unicode website](http://www.unicode.org/Public) at run time. At the time of writing, supported version strings are a subset of those seen in that directory: `'4.1.0'` to `'17.0.0'`. (Earlier versions do not provide the data in the same consumable structure.)
32 | 
33 | #### options
34 | 
35 | An optional object with the keys acting as additional options. The options are as follows:
36 | - **`safeCategories`**: An object that determines whether a codepoint category is safe or not. The keys are the category, and the value is `true` if it is safe, and anything else if it is not safe. If not passed, the default parameters are shown in the example below.
37 | 
38 | Example:
39 | ```js
40 | const safeCodePoint = await SafeCodePoint('17.0.0', {
41 |   safeCategories: {
42 |     Ll: true, // Letter, Lowercase
43 |     Lm: true, // Letter, Modifier
44 |     Lo: true, // Letter, Other
45 |     Lt: true, // Letter, Titlecase
46 |     Lu: true, // Letter, Uppercase
47 |     Me: false, // Mark, Enclosing
48 |     Mn: false, // Mark, Nonspacing
49 |     Mc: false, // Mark, Spacing Combining
50 |     Nd: true, // Number, Decimal Digit
51 |     Nl: true, // Number, Letter
52 |     No: true, // Number, Other
53 |     Cc: false, // Other, Control
54 |     Cf: false, // Other, Format
55 |     Cn: false, // Other, Not Assigned (no characters in the file have this property)
56 |     Co: false, // Other, Private Use
57 |     Cs: false, // Other, Surrogate
58 |     Pe: false, // Punctuation, Close
59 |     Pc: false, // Punctuation, Connector
60 |     Pd: false, // Punctuation, Dash
61 |     Pf: false, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
62 |     Pi: false, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
63 |     Ps: false, // Punctuation, Open
64 |     Po: false, // Punctuation, Other
65 |     Zl: false, // Separator, Line
66 |     Zp: false, // Separator, Paragraph
67 |     Zs: false, // Separator, Space
68 |     Sc: true, // Symbol, Currency
69 |     Sm: true, // Symbol, Math
70 |     Sk: true, // Symbol, Modifier
71 |     So: true // Symbol, Other
72 |   }
73 | })
74 | ```
75 | 
76 | ### safeCodePoint(codePoint)
77 | 
78 | Returns a Boolean indicating whether the supplied code point is safe (is not a member of any unsafe Unicode General Categories, has a canonical combining class of 0 and survives all forms of normalization). `codePoint` should be an integer from `0` to `1114111` inclusive.
79 | 
80 | ### safeCodePoint.generalCategory(codepoint)
81 | 
82 | Returns the Unicode General Category of the supplied code point as a two-character string, *e.g.* `"Lo"` for "Letter, other".
83 | 
84 | ### safeCodePoint.wordBreak(codepoint)
85 | 
86 | Returns the [Word_Break property](https://unicode.org/reports/tr29/#Table_Word_Break_Property_Values) value of the supplied code point, *e.g.* `'Numeric'`, `'ALetter'` or (in most cases) `undefined`.
87 | 


--------------------------------------------------------------------------------
/lib/index.spec.mjs:
--------------------------------------------------------------------------------
  1 | /* eslint-env jasmine */
  2 | 
  3 | // Jasmine can be configured to handle ES modules with .js extension,
  4 | // but naming this file .mjs is easier
  5 | 
  6 | import dehex from './dehex.js'
  7 | import SafeCodePoint from './index.js'
  8 | 
  9 | const numCodePoints = (1 << 16) + (1 << 20)
 10 | const getNumSafeCodePoints = safeCodePoint => {
 11 |   let numSafeCodePoints = 0
 12 |   for (let codePoint = 0; codePoint < numCodePoints; codePoint++) {
 13 |     if (safeCodePoint(codePoint)) {
 14 |       numSafeCodePoints++
 15 |     }
 16 |   }
 17 |   return numSafeCodePoints
 18 | }
 19 | 
 20 | describe('safe-code-point', () => {
 21 |   let scp7
 22 |   let scp8
 23 |   let scp9
 24 |   let scp10
 25 |   let scp11
 26 |   let scp12
 27 |   let scp13
 28 |   let scp17
 29 | 
 30 |   // First load some files
 31 |   beforeAll(async () => {
 32 |     scp7 = await SafeCodePoint('7.0.0')
 33 |   })
 34 |   beforeAll(async () => {
 35 |     scp8 = await SafeCodePoint('8.0.0')
 36 |   })
 37 |   beforeAll(async () => {
 38 |     scp9 = await SafeCodePoint('9.0.0')
 39 |   })
 40 |   beforeAll(async () => {
 41 |     scp10 = await SafeCodePoint('10.0.0')
 42 |   })
 43 |   beforeAll(async () => {
 44 |     scp11 = await SafeCodePoint('11.0.0')
 45 |   })
 46 |   beforeAll(async () => {
 47 |     scp12 = await SafeCodePoint('12.0.0')
 48 |   })
 49 |   beforeAll(async () => {
 50 |     scp13 = await SafeCodePoint('13.0.0')
 51 |   })
 52 |   beforeAll(async () => {
 53 |     scp17 = await SafeCodePoint('17.0.0')
 54 |   })
 55 | 
 56 |   describe('canonicalCombiningClass', () => {
 57 |     it('works', () => {
 58 |       expect(scp8.canonicalCombiningClass(0)).toBe(0)
 59 |       expect(scp8.canonicalCombiningClass(30)).toBe(0)
 60 |       expect(scp8.canonicalCombiningClass(parseInt('0345', 16))).toBe(240)
 61 |     })
 62 |   })
 63 | 
 64 |   describe('dehex', () => {
 65 |     it('works', () => {
 66 |       expect(dehex('0000')).toEqual([0])
 67 |       expect(dehex('0000..0002')).toEqual([0, 1, 2])
 68 |       expect(dehex('000A..000C')).toEqual([10, 11, 12])
 69 |     })
 70 |   })
 71 | 
 72 |   describe('eastAsianWidth', () => {
 73 |     it('works', () => {
 74 |       expect(scp8.eastAsianWidth(0)).toBe('N')
 75 |       expect(scp8.eastAsianWidth(0x001F)).toBe('N')
 76 |       expect(scp8.eastAsianWidth(0x0020)).toBe('Na')
 77 | 
 78 |       expect(scp9.eastAsianWidth(0x00F8)).toBe('A')
 79 |       expect(() => scp9.eastAsianWidth(0xABFF)).toThrow()
 80 | 
 81 |       expect(() => scp10.eastAsianWidth(0xE1000)).toThrow()
 82 |       expect(scp10.eastAsianWidth(0x30000)).toBe('W')
 83 |     })
 84 |   })
 85 | 
 86 |   describe('generalCategory', () => {
 87 |     it('works', () => {
 88 |       expect(scp9.generalCategory(0)).toBe('Cc')
 89 |       expect(scp9.generalCategory(31)).toBe('Cc')
 90 |       expect(scp9.generalCategory(parseInt('055A', 16))).toBe('Po')
 91 |     })
 92 |   })
 93 | 
 94 |   describe('normalizationProperties', () => {
 95 |     it('works', () => {
 96 |       expect(scp10.normalizationProperties(parseInt('037A', 16), 'FC_NFKC')).toBe('0020 03B9')
 97 |       expect(scp10.normalizationProperties(parseInt('E0002', 16), 'NFKC_CF')).toBe('')
 98 |       expect(scp10.normalizationProperties(parseInt('FB1D', 16), 'NFC_QC')).toBe('N')
 99 |       expect(scp10.normalizationProperties(parseInt('10FFFF', 16), 'NFKD_QC')).toBe('Y')
100 |     })
101 |   })
102 | 
103 |   describe('wordBreak', () => {
104 |     it('works', () => {
105 |       expect(() => scp13.wordBreak(-1)).toThrow()
106 |       expect(scp13.wordBreak(parseInt('0000', 16))).toBeUndefined()
107 |       expect(scp13.wordBreak(parseInt('0022', 16))).toBe('Double_Quote')
108 |       expect(scp13.wordBreak(parseInt('0065', 16))).toBe('ALetter')
109 |     })
110 |   })
111 | 
112 |   describe('safeCodePoint', () => {
113 |     it('works', () => {
114 |       expect(scp12(0)).toBe(false)
115 |       expect(scp12(36)).toBe(true)
116 |       expect(scp12(65)).toBe(true)
117 |     })
118 | 
119 |     it('README example', () => {
120 |       expect(getNumSafeCodePoints(scp7)).toBe(93510)
121 |       expect(getNumSafeCodePoints(scp8)).toBe(101064)
122 |       expect(getNumSafeCodePoints(scp9)).toBe(108397)
123 |       expect(getNumSafeCodePoints(scp10)).toBe(116813)
124 |       expect(getNumSafeCodePoints(scp11)).toBe(117422)
125 |       expect(getNumSafeCodePoints(scp12)).toBe(117927)
126 |     })
127 |   })
128 | 
129 |   describe('base65536', () => {
130 |     it('works', () => {
131 |       const safeRange = (min, max) => {
132 |         for (let codePoint = min; codePoint < max; codePoint++) {
133 |           if (scp8.generalCategory(codePoint) !== 'Lo' || !scp8(codePoint)) {
134 |             return false
135 |           }
136 |         }
137 |         return true
138 |       }
139 | 
140 |       const getAllSafeRanges = rangeSize => {
141 |         const allSafeRanges = []
142 |         for (let codePoint = 0; codePoint < (1 << 16) + (1 << 20); codePoint += rangeSize) {
143 |           if (safeRange(codePoint, codePoint + rangeSize)) {
144 |             allSafeRanges.push(codePoint)
145 |           }
146 |         }
147 |         return allSafeRanges
148 |       }
149 | 
150 |       const allSafeRanges = getAllSafeRanges(1 << 8)
151 | 
152 |       const paddingBlockStart = String.fromCodePoint(allSafeRanges.shift())
153 |       expect(paddingBlockStart).toBe('ᔀ')
154 | 
155 |       const blockStarts = allSafeRanges.slice(0, 1 << 8).map(x => String.fromCodePoint(x)).join('')
156 |       expect(blockStarts).toBe(
157 |         '㐀㔀㘀㜀㠀㤀㨀㬀㰀㴀㸀㼀䀀䄀䈀䌀' +
158 |         '䐀䔀䘀䜀䠀䤀䨀䬀䰀一伀倀儀刀匀吀' +
159 |         '唀嘀圀堀夀娀嬀尀崀帀开怀愀戀挀搀' +
160 |         '攀昀最栀椀樀欀氀洀渀漀瀀焀爀猀琀' +
161 |         '甀瘀眀砀礀稀笀簀紀縀缀耀脀舀茀萀' +
162 |         '蔀蘀蜀蠀褀言謀谀贀踀輀退鄀鈀錀鐀' +
163 |         '销阀需頀餀騀鬀鰀鴀鸀ꄀꈀꌀꔀ𐘀𒀀' +
164 |         '𒄀𒈀𓀀𓄀𓈀𓌀𔐀𔔀𖠀𖤀𠀀𠄀𠈀𠌀𠐀𠔀' +
165 |         '𠘀𠜀𠠀𠤀𠨀𠬀𠰀𠴀𠸀𠼀𡀀𡄀𡈀𡌀𡐀𡔀' +
166 |         '𡘀𡜀𡠀𡤀𡨀𡬀𡰀𡴀𡸀𡼀𢀀𢄀𢈀𢌀𢐀𢔀' +
167 |         '𢘀𢜀𢠀𢤀𢨀𢬀𢰀𢴀𢸀𢼀𣀀𣄀𣈀𣌀𣐀𣔀' +
168 |         '𣘀𣜀𣠀𣤀𣨀𣬀𣰀𣴀𣸀𣼀𤀀𤄀𤈀𤌀𤐀𤔀' +
169 |         '𤘀𤜀𤠀𤤀𤨀𤬀𤰀𤴀𤸀𤼀𥀀𥄀𥈀𥌀𥐀𥔀' +
170 |         '𥘀𥜀𥠀𥤀𥨀𥬀𥰀𥴀𥸀𥼀𦀀𦄀𦈀𦌀𦐀𦔀' +
171 |         '𦘀𦜀𦠀𦤀𦨀𦬀𦰀𦴀𦸀𦼀𧀀𧄀𧈀𧌀𧐀𧔀' +
172 |         '𧘀𧜀𧠀𧤀𧨀𧬀𧰀𧴀𧸀𧼀𨀀𨄀𨈀𨌀𨐀𨔀'
173 |       )
174 | 
175 |       // Check East_Asian_Width properties. Each block of 256 characters
176 |       // has the same East_Asian_Width property. 243 of the blocks are 'W' (wide),
177 |       // the other 13 + 1 are 'N' (neutral, which in effect is narrow).
178 |       // This is significant when considering rendering and wrapping.
179 |       const allBlockStarts = [...blockStarts].map(x => x.codePointAt(0))
180 |       const neutralBlockStarts = [...'ᔀꔀ𐘀𒀀𒄀𒈀𓀀𓄀𓈀𓌀𔐀𔔀𖠀𖤀'].map(x => x.codePointAt(0))
181 |       allBlockStarts.forEach(blockStart => {
182 |         for (let i = 0; i < 1 << 8; i++) {
183 |           const codePoint = blockStart + i
184 |           const isInNeutralBlock = neutralBlockStarts
185 |             .some(neutralBlockStart => neutralBlockStart <= codePoint && codePoint < neutralBlockStart + (1 << 8))
186 |           expect(scp8.eastAsianWidth(codePoint)).toBe(isInNeutralBlock ? 'N' : 'W')
187 |         }
188 |       })
189 |     })
190 |   })
191 | 
192 |   describe('base32768', () => {
193 |     it('works', () => {
194 |       const safeRange = function (min, max) {
195 |         for (let codePoint = min; codePoint < max; codePoint++) {
196 |           if (!scp9(codePoint)) {
197 |             return false
198 |           }
199 |         }
200 |         return true
201 |       }
202 | 
203 |       const getAllSafeRanges = rangeSize => {
204 |         const allSafeRanges = []
205 |         for (let codePoint = 0; codePoint < (1 << 16) + (1 << 20); codePoint += rangeSize) {
206 |           if (safeRange(codePoint, codePoint + rangeSize)) {
207 |             allSafeRanges.push(codePoint)
208 |           }
209 |         }
210 |         return allSafeRanges
211 |       }
212 | 
213 |       const rangeSize = 5
214 |       const allSafeRanges = getAllSafeRanges(1 << rangeSize)
215 | 
216 |       const repertoireSizes = []
217 |       for (let i = 15; i > 0; i -= 8) { // Base32768 is a 15-bit encoding of 8-bit binary data
218 |         repertoireSizes.unshift(i - rangeSize)
219 |       }
220 | 
221 |       const repertoireOffsets = repertoireSizes
222 |         .map(x => 1 << x)
223 |         .map((x, i, arr) => x + (i === 0 ? 0 : arr[i - 1])) // cumulative sum
224 |         .map((offset, i, arr) => allSafeRanges
225 |           .slice(i === 0 ? 0 : arr[i - 1], arr[i])
226 |           .map(x => String.fromCodePoint(x))
227 |           .join('')
228 |         )
229 |         .reverse()
230 | 
231 |       expect(repertoireOffsets).toEqual([
232 |         'ҠԀڀڠݠހ߀ကႠᄀᄠᅀᆀᇠሀሠበዠጠᎠᏀᐠᑀᑠᒀᒠᓀᓠᔀᔠᕀᕠᖀᖠᗀᗠᘀᘠᙀᚠᛀកᠠᡀᣀᦀ᧠ᨠᯀᰀᴀ⇠⋀⍀⍠⎀⎠⏀␀─┠╀╠▀■◀◠☀☠♀♠⚀⚠⛀⛠✀✠❀➀➠⠀⠠⡀⡠⢀⢠⣀⣠⤀⤠⥀⥠⦠⨠⩀⪀⪠⫠⬀⬠⭀ⰀⲀⲠⳀⴀⵀ⺠⻀㇀㐀㐠㑀㑠㒀㒠㓀㓠㔀㔠㕀㕠㖀㖠㗀㗠㘀㘠㙀㙠㚀㚠㛀㛠㜀㜠㝀㝠㞀㞠㟀㟠㠀㠠㡀㡠㢀㢠㣀㣠㤀㤠㥀㥠㦀㦠㧀㧠㨀㨠㩀㩠㪀㪠㫀㫠㬀㬠㭀㭠㮀㮠㯀㯠㰀㰠㱀㱠㲀㲠㳀㳠㴀㴠㵀㵠㶀㶠㷀㷠㸀㸠㹀㹠㺀㺠㻀㻠㼀㼠㽀㽠㾀㾠㿀㿠䀀䀠䁀䁠䂀䂠䃀䃠䄀䄠䅀䅠䆀䆠䇀䇠䈀䈠䉀䉠䊀䊠䋀䋠䌀䌠䍀䍠䎀䎠䏀䏠䐀䐠䑀䑠䒀䒠䓀䓠䔀䔠䕀䕠䖀䖠䗀䗠䘀䘠䙀䙠䚀䚠䛀䛠䜀䜠䝀䝠䞀䞠䟀䟠䠀䠠䡀䡠䢀䢠䣀䣠䤀䤠䥀䥠䦀䦠䧀䧠䨀䨠䩀䩠䪀䪠䫀䫠䬀䬠䭀䭠䮀䮠䯀䯠䰀䰠䱀䱠䲀䲠䳀䳠䴀䴠䵀䵠䶀䷀䷠一丠乀习亀亠什仠伀传佀你侀侠俀俠倀倠偀偠傀傠僀僠儀儠兀兠冀冠净几刀删剀剠劀加勀勠匀匠區占厀厠叀叠吀吠呀呠咀咠哀哠唀唠啀啠喀喠嗀嗠嘀嘠噀噠嚀嚠囀因圀圠址坠垀垠埀埠堀堠塀塠墀墠壀壠夀夠奀奠妀妠姀姠娀娠婀婠媀媠嫀嫠嬀嬠孀孠宀宠寀寠尀尠局屠岀岠峀峠崀崠嵀嵠嶀嶠巀巠帀帠幀幠庀庠廀廠开张彀彠往徠忀忠怀怠恀恠悀悠惀惠愀愠慀慠憀憠懀懠戀戠所扠技抠拀拠挀挠捀捠掀掠揀揠搀搠摀摠撀撠擀擠攀攠敀敠斀斠旀无昀映晀晠暀暠曀曠最朠杀杠枀枠柀柠栀栠桀桠梀梠检棠椀椠楀楠榀榠槀槠樀樠橀橠檀檠櫀櫠欀欠歀歠殀殠毀毠氀氠汀池沀沠泀泠洀洠浀浠涀涠淀淠渀渠湀湠満溠滀滠漀漠潀潠澀澠激濠瀀瀠灀灠炀炠烀烠焀焠煀煠熀熠燀燠爀爠牀牠犀犠狀狠猀猠獀獠玀玠珀珠琀琠瑀瑠璀璠瓀瓠甀甠畀畠疀疠痀痠瘀瘠癀癠皀皠盀盠眀眠着睠瞀瞠矀矠砀砠础硠碀碠磀磠礀礠祀祠禀禠秀秠稀稠穀穠窀窠竀章笀笠筀筠简箠節篠簀簠籀籠粀粠糀糠紀素絀絠綀綠緀締縀縠繀繠纀纠绀绠缀缠罀罠羀羠翀翠耀耠聀聠肀肠胀胠脀脠腀腠膀膠臀臠舀舠艀艠芀芠苀苠茀茠荀荠莀莠菀菠萀萠葀葠蒀蒠蓀蓠蔀蔠蕀蕠薀薠藀藠蘀蘠虀虠蚀蚠蛀蛠蜀蜠蝀蝠螀螠蟀蟠蠀蠠血衠袀袠裀裠褀褠襀襠覀覠觀觠言訠詀詠誀誠諀諠謀謠譀譠讀讠诀诠谀谠豀豠貀負賀賠贀贠赀赠趀趠跀跠踀踠蹀蹠躀躠軀軠輀輠轀轠辀辠迀迠退造遀遠邀邠郀郠鄀鄠酀酠醀醠釀釠鈀鈠鉀鉠銀銠鋀鋠錀錠鍀鍠鎀鎠鏀鏠鐀鐠鑀鑠钀钠铀铠销锠镀镠門閠闀闠阀阠陀陠隀隠雀雠需霠靀靠鞀鞠韀韠頀頠顀顠颀颠飀飠餀餠饀饠馀馠駀駠騀騠驀驠骀骠髀髠鬀鬠魀魠鮀鮠鯀鯠鰀鰠鱀鱠鲀鲠鳀鳠鴀鴠鵀鵠鶀鶠鷀鷠鸀鸠鹀鹠麀麠黀黠鼀鼠齀齠龀龠ꀀꀠꁀꁠꂀꂠꃀꃠꄀꄠꅀꅠꆀꆠꇀꇠꈀꈠꉀꉠꊀꊠꋀꋠꌀꌠꍀꍠꎀꎠꏀꏠꐀꐠꑀꑠ꒠ꔀꔠꕀꕠꖀꖠꗀꗠꙀꚠꛀ꜀꜠ꝀꞀꡀ',
233 |         'ƀɀɠʀ'
234 |       ])
235 |     })
236 |   })
237 | 
238 |   describe('base2048', () => {
239 |     it('works', () => {
240 |       const repertoireSizes = []
241 |       for (let i = 11; i > 0; i -= 8) { // Base2048 is an 11-bit encoding of 8-bit binary data
242 |         repertoireSizes.unshift(1 << i)
243 |       }
244 | 
245 |       let codePoint = 0
246 |       const repertoires = repertoireSizes
247 |         .map(repertoireSize => {
248 |           const codePoints = []
249 |           while (codePoints.length < repertoireSize) {
250 |             if (
251 |               scp10(codePoint) &&
252 |               !scp10.generalCategory(codePoint).startsWith('S') &&
253 |               scp10.generalCategory(codePoint) !== 'Lm'
254 |             ) {
255 |               codePoints.push(codePoint)
256 |             }
257 |             codePoint++
258 |           }
259 |           return codePoints
260 |         })
261 |         .map(codePoints => codePoints.map(x => String.fromCodePoint(x)).join(''))
262 |         .reverse()
263 | 
264 |       expect(repertoires).toEqual([
265 |         '89ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÆÐØÞßæðøþĐđĦħıĸŁłŊŋŒœŦŧƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƢƣƤƥƦƧƨƩƪƫƬƭƮƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃǝǤǥǶǷȜȝȠȡȢȣȤȥȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯͰͱͲͳͶͷͻͼͽͿΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϏϗϘϙϚϛϜϝϞϟϠϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϳϷϸϺϻϼϽϾϿЂЄЅІЈЉЊЋЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзиклмнопрстуфхцчшщъыьэюяђєѕіјљњћџѠѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѸѹѺѻѼѽѾѿҀҁҊҋҌҍҎҏҐґҒғҔҕҖҗҘҙҚқҜҝҞҟҠҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿӀӃӄӅӆӇӈӉӊӋӌӍӎӏӔӕӘәӠӡӨөӶӷӺӻӼӽӾӿԀԁԂԃԄԅԆԇԈԉԊԋԌԍԎԏԐԑԒԓԔԕԖԗԘԙԚԛԜԝԞԟԠԡԢԣԤԥԦԧԨԩԪԫԬԭԮԯԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆאבגדהוזחטיךכלםמןנסעףפץצקרשתװױײؠءابةتثجحخدذرزسشصضطظعغػؼؽؾؿفقكلمنهوىي٠١٢٣٤٥٦٧٨٩ٮٯٱٲٳٴٹٺٻټٽپٿڀځڂڃڄڅچڇڈډڊڋڌڍڎڏڐڑڒړڔڕږڗژڙښڛڜڝڞڟڠڡڢڣڤڥڦڧڨکڪګڬڭڮگڰڱڲڳڴڵڶڷڸڹںڻڼڽھڿہۃۄۅۆۇۈۉۊۋیۍێۏېۑےەۮۯ۰۱۲۳۴۵۶۷۸۹ۺۻۼۿܐܒܓܔܕܖܗܘܙܚܛܜܝܞܟܠܡܢܣܤܥܦܧܨܩܪܫܬܭܮܯݍݎݏݐݑݒݓݔݕݖݗݘݙݚݛݜݝݞݟݠݡݢݣݤݥݦݧݨݩݪݫݬݭݮݯݰݱݲݳݴݵݶݷݸݹݺݻݼݽݾݿހށނރބޅކއވމފދތލގޏސޑޒޓޔޕޖޗޘޙޚޛޜޝޞޟޠޡޢޣޤޥޱ߀߁߂߃߄߅߆߇߈߉ߊߋߌߍߎߏߐߑߒߓߔߕߖߗߘߙߚߛߜߝߞߟߠߡߢߣߤߥߦߧߨߩߪࠀࠁࠂࠃࠄࠅࠆࠇࠈࠉࠊࠋࠌࠍࠎࠏࠐࠑࠒࠓࠔࠕࡀࡁࡂࡃࡄࡅࡆࡇࡈࡉࡊࡋࡌࡍࡎࡏࡐࡑࡒࡓࡔࡕࡖࡗࡘࡠࡡࡢࡣࡤࡥࡦࡧࡨࡩࡪࢠࢡࢢࢣࢤࢥࢦࢧࢨࢩࢪࢫࢬࢭࢮࢯࢰࢱࢲࢳࢴࢶࢷࢸࢹࢺࢻࢼࢽऄअआइईउऊऋऌऍऎएऐऑऒओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसहऽॐॠॡ०१२३४५६७८९ॲॳॴॵॶॷॸॹॺॻॼॽॾॿঀঅআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহঽৎৠৡ০১২৩৪৫৬৭৮৯ৰৱ৴৵৶৷৸৹ৼਅਆਇਈਉਊਏਐਓਔਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵਸਹੜ੦੧੨੩੪੫੬੭੮੯ੲੳੴઅઆઇઈઉઊઋઌઍએઐઑઓઔકખગઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલળવશષસહઽૐૠૡ૦૧૨૩૪૫૬૭૮૯ૹଅଆଇଈଉଊଋଌଏଐଓଔକଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଭମଯରଲଳଵଶଷସହଽୟୠୡ୦୧୨୩୪୫୬୭୮୯ୱ୲୳୴୵୶୷ஃஅஆஇஈஉஊஎஏஐஒஓகஙசஜஞடணதநனபமயரறலளழவஶஷஸஹௐ௦௧௨௩௪௫௬௭௮௯௰௱௲అఆఇఈఉఊఋఌఎఏఐఒఓఔకఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరఱలళఴవశషసహఽౘౙౚౠౡ౦౧౨౩౪౫౬౭౮౯౸౹౺౻౼౽౾ಀಅಆಇಈಉಊಋಌಎಏಐಒಓಔಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಱಲಳವಶಷಸಹಽೞೠೡ೦೧೨೩೪೫೬೭೮೯ೱೲഅആഇഈഉഊഋഌഎഏഐഒഓഔകഖഗഘങചഛജഝഞടഠഡഢണതഥദധനഩപഫബഭമയരറലളഴവശഷസഹഺഽൎൔൕൖ൘൙൚൛൜൝൞ൟൠൡ൦൧൨൩൪൫൬൭൮൯൰൱൲൳൴൵൶൷൸ൺൻർൽൾൿඅආඇඈඉඊඋඌඍඎඏඐඑඒඓඔඕඖකඛගඝඞඟචඡජඣඤඥඦටඨඩඪණඬතථදධනඳපඵබභමඹයරලවශෂසහළෆ෦෧෨෩෪෫෬෭෮෯กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะาเแโใไๅ๐๑๒๓๔๕๖๗๘๙ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯະາຽເແໂໃໄ໐໑໒໓໔໕໖໗໘໙ໞໟༀ༠༡༢༣༤༥༦༧༨༩༪༫༬༭༮༯༰༱༲༳ཀཁགངཅཆཇཉཊཋཌཎཏཐདནཔཕབམཙཚཛཝཞཟའཡརལཤཥསཧཨཪཫཬྈྉྊྋྌကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဢဣဤဥဧဨဩဪဿ၀၁၂၃၄၅၆၇၈၉ၐၑၒၓၔၕ',
266 |         '01234567'
267 |       ])
268 |     })
269 |   })
270 | 
271 |   describe('base2e15', () => {
272 |     it('is not safe', () => {
273 |       const repertoires = [
274 |         [0x3480, 0x4DB6],
275 |         [0x4E00, 0x8926],
276 |         [0xAC00, 0xD7A4],
277 |         [0x3400, 0x3480]
278 |       ]
279 |       const badGc = []
280 |       const badCcc = []
281 |       const badNfdQc = []
282 |       const badNfkdQc = []
283 |       repertoires.forEach(repertoire => {
284 |         for (let i = repertoire[0]; i < repertoire[1]; i++) {
285 |           if (scp10.generalCategory(i) !== 'Lo') {
286 |             badGc.push(i)
287 |           }
288 |           if (scp10.canonicalCombiningClass(i) !== 0) {
289 |             badCcc.push(i)
290 |           }
291 |           if (scp10.normalizationProperties(i, 'NFD_QC') !== 'Y') {
292 |             badNfdQc.push(i)
293 |           }
294 |           if (scp10.normalizationProperties(i, 'NFKD_QC') !== 'Y') {
295 |             badNfkdQc.push(i)
296 |           }
297 |         }
298 |       })
299 |       expect(badGc).toEqual([])
300 |       expect(badCcc).toEqual([])
301 |       expect(badNfdQc.length).toBe(11172)
302 |       expect(badNfkdQc.length).toBe(11172)
303 |     })
304 |   })
305 | 
306 |   describe('base32k', () => {
307 |     it('is not safe', () => {
308 |       const lanes = [
309 |         [0x4000, 0xA000],
310 |         [0xB000, 0xD000]
311 |       ]
312 |       const badNfdQc = []
313 |       const badNfkdQc = []
314 |       lanes.forEach(lane => {
315 |         for (let i = lane[0]; i < lane[1]; i++) {
316 |           if (scp10.normalizationProperties(i, 'NFD_QC') !== 'Y') {
317 |             badNfdQc.push(i)
318 |           }
319 |           if (scp10.normalizationProperties(i, 'NFKD_QC') !== 'Y') {
320 |             badNfkdQc.push(i)
321 |           }
322 |         }
323 |       })
324 |       expect(badNfdQc.length).toBe(8192)
325 |       expect(badNfkdQc.length).toBe(8192)
326 |     })
327 |   })
328 | 
329 |   describe('14', () => {
330 |     it('works', async () => {
331 |       const scp14 = await SafeCodePoint('14.0.0')
332 |       const numCodePoints = (1 << 16) + (1 << 20)
333 |       let numAssigned = 0
334 |       let numSafe = 0
335 |       let numSafeLetter = 0
336 |       let numSafeLetterOther = 0
337 |       for (let codePoint = 0; codePoint < numCodePoints; codePoint++) {
338 |         const gc = scp14.generalCategory(codePoint)
339 |         if (gc !== 'Cn') {
340 |           numAssigned++
341 |           if (scp14(codePoint)) {
342 |             numSafe++
343 |             if (gc.startsWith('L')) {
344 |               numSafeLetter++
345 |               if (gc === 'Lo') {
346 |                 numSafeLetterOther++
347 |               }
348 |             }
349 |           }
350 |         }
351 |       }
352 |       expect(numAssigned).toBe(284278)
353 |       expect(numSafe).toBe(124456)
354 |       expect(numSafeLetter).toBe(116231)
355 |       expect(numSafeLetterOther).toBe(113876)
356 |     })
357 |   })
358 | 
359 |   describe('safeCategories', () => {
360 |     it('works', async () => {
361 |       const scp17WithNothingSafe = await SafeCodePoint('17.0.0', { safeCategories: {} })
362 |       const scp17WithEverythingSafe = await SafeCodePoint('17.0.0', { safeCategories: new Proxy({}, { get (_, prop) { return true } }) })
363 | 
364 |       const numSafeCodePoints17 = getNumSafeCodePoints(scp17)
365 |       const numSafeCodePoints17WithNothingSafe = getNumSafeCodePoints(scp17WithNothingSafe)
366 |       const numSafeCodePoints17WithEverythingSafe = getNumSafeCodePoints(scp17WithEverythingSafe)
367 | 
368 |       expect(numSafeCodePoints17WithNothingSafe).toBeLessThan(numSafeCodePoints17)
369 |       expect(numSafeCodePoints17).toBeLessThan(numSafeCodePoints17WithEverythingSafe)
370 |     })
371 |   })
372 | })
373 | 


--------------------------------------------------------------------------------