├── .editorconfig ├── .github ├── dependabot.yml └── workflows │ └── workflow-1.yml ├── .gitattributes ├── lib ├── dehex.js ├── general-category.js ├── ucd.js ├── east-asian-width.js ├── word-break.js ├── canonical-combining-class.js ├── normalization-properties.js ├── index.js └── index.spec.mjs ├── CHANGELOG.md ├── .gitignore ├── LICENSE.txt ├── package.json └── README.md /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | insert_final_newline = true 3 | charset = utf-8 4 | indent_style = space 5 | indent_size = 2 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'npm' 4 | directory: '/' 5 | schedule: 6 | interval: 'monthly' 7 | ignore: 8 | - dependency-name: '*' 9 | update-types: 10 | - 'version-update:semver-minor' 11 | - 'version-update:semver-patch' 12 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /lib/dehex.js: -------------------------------------------------------------------------------- 1 | // Returns an array of code points 2 | // e.g. "0000" becomes `[0]` 3 | // e.g. "0000..000A" becomes `[0, ..., 10]` 4 | export default str => { 5 | const components = str.split('..') 6 | if (components.length !== 1 && components.length !== 2) { 7 | throw new Error('Could not dehex this string') 8 | } 9 | const lower = parseInt(components[0], 16) 10 | const upper = components.length === 2 ? parseInt(components[1], 16) : lower 11 | const codePoints = [] 12 | for (let codePoint = lower; codePoint <= upper; codePoint++) { 13 | codePoints.push(codePoint) 14 | } 15 | return codePoints 16 | } 17 | -------------------------------------------------------------------------------- /.github/workflows/workflow-1.yml: -------------------------------------------------------------------------------- 1 | name: 'Travis CI replacement' 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - '**' 7 | 8 | jobs: 9 | build-job: 10 | runs-on: 'ubuntu-latest' 11 | 12 | strategy: 13 | matrix: 14 | node-version: ['14.x', '16.x', '18.x', '20.x', '22.x', '24.x'] 15 | 16 | steps: 17 | - uses: 'actions/checkout@v2' 18 | 19 | - name: 'Use Node.js ${{ matrix.node-version }}' 20 | uses: 'actions/setup-node@v1' 21 | with: 22 | node-version: '${{ matrix.node-version }}' 23 | 24 | - name: 'Actual npm tasks' 25 | run: | 26 | npm install 27 | npm run test 28 | -------------------------------------------------------------------------------- /lib/general-category.js: -------------------------------------------------------------------------------- 1 | /** UCD General_Category resources. */ 2 | 3 | import dehex from './dehex.js' 4 | 5 | /** Get GC for a specific code point */ 6 | export default data => { 7 | const byCodePoint = {} 8 | 9 | data.forEach(function (row) { 10 | const codePoints = dehex(row[0]) 11 | const gc = row[1] // E.g. "Lo", "Po" 12 | 13 | codePoints.forEach(function (codePoint) { 14 | if (codePoint in byCodePoint) { 15 | throw new Error(codePoint) 16 | } 17 | byCodePoint[codePoint] = gc 18 | }) 19 | }) 20 | 21 | return codePoint => { 22 | if (!(codePoint in byCodePoint)) { 23 | throw new Error('General_Category unknown for ' + String(codePoint)) 24 | } 25 | return byCodePoint[codePoint] 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /lib/ucd.js: -------------------------------------------------------------------------------- 1 | // This module reads in most kinds of Unicode Character Data set text files. 2 | // Comments (marked with a #) are stripped, empty lines are removed. The 3 | // remaining lines are divided up into fields on the semicolon delimiter, and 4 | // the fields have whitespace trimmed. 5 | 6 | import nodeFetch from 'node-fetch' 7 | 8 | export const get = async (version, name) => { 9 | const url = `http://www.unicode.org/Public/${version}/ucd/${name}` 10 | const res = await nodeFetch(url) 11 | if (!res.ok) { 12 | throw Error(`${res.status} while fetching ${url}`) 13 | } 14 | const body = await res.text() 15 | return body 16 | .split(/\r?\n/) 17 | .map(line => line.replace(/^(.*?)(?:#.*)?$/, '$1')) 18 | .filter(line => line !== '') 19 | .map(line => line.split(';').map(field => field.trim())) 20 | } 21 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## 3.x.x 4 | 5 | `safe-code-point` now uses ES modules, not CommonJS modules. 6 | 7 | ## 2.x.x 8 | 9 | The API of `safe-code-point` has completely changed to asynchronously look up Unicode code point data. Code like: 10 | 11 | ```js 12 | const safeCodePoint = require('safe-code-point') 13 | 14 | const a = safeCodePoint(codePoint, '11.0') 15 | const b = safeCodePoint.generalCategory(codePoint, '11.0') 16 | ``` 17 | 18 | should change to something like: 19 | 20 | ```js 21 | const SafeCodePoint = require('safe-code-point') 22 | 23 | SafeCodePoint('11.0.0').then(safeCodePoint => { 24 | const a = safeCodePoint(codePoint) 25 | const b = safeCodePoint.generalCategory(codePoint) 26 | }) 27 | ``` 28 | 29 | `safeCodePoint.supportedVersions` has been removed. 30 | 31 | ## 1.x.x 32 | 33 | Initial release. 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | # Mine 50 | node_modules 51 | -------------------------------------------------------------------------------- /lib/east-asian-width.js: -------------------------------------------------------------------------------- 1 | // UCD normalization property resources. 2 | 3 | import dehex from './dehex.js' 4 | 5 | /** Get property value for a specific code point, or default if set */ 6 | export default data => { 7 | const byCodePoint = {} 8 | 9 | // Defaults 10 | for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) { 11 | byCodePoint[codePoint] = undefined // unknown 12 | } 13 | 14 | data.forEach(function (row) { 15 | const codePoints = dehex(row[0]) 16 | const eastAsianWidth = row[1] // "A", "F", "H", "N", "Na" or "W" 17 | 18 | codePoints.forEach(function (codePoint) { 19 | byCodePoint[codePoint] = eastAsianWidth 20 | }) 21 | }) 22 | 23 | return codePoint => { 24 | const value = byCodePoint[codePoint] 25 | if (value === undefined) { 26 | throw new Error('Code point has no East_Asian_Width specified: ' + String(codePoint)) 27 | } 28 | return value 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /lib/word-break.js: -------------------------------------------------------------------------------- 1 | /** UCD Word_Break resources. */ 2 | 3 | import dehex from './dehex.js' 4 | 5 | /** Get Word_Break for a specific code point */ 6 | export default data => { 7 | const byCodePoint = {} 8 | 9 | // Defaults 10 | for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) { 11 | byCodePoint[codePoint] = undefined 12 | } 13 | 14 | data.forEach(function (row) { 15 | const codePoints = dehex(row[0]) 16 | const wbp = row[1] // E.g. "Numeric", "ALetter" 17 | 18 | codePoints.forEach(function (codePoint) { 19 | if (codePoint in byCodePoint && byCodePoint[codePoint] !== undefined) { 20 | throw new Error(codePoint) 21 | } 22 | byCodePoint[codePoint] = wbp 23 | }) 24 | }) 25 | 26 | return codePoint => { 27 | if (!(codePoint in byCodePoint)) { 28 | throw new Error('Word_Break unknown for ' + String(codePoint)) 29 | } 30 | return byCodePoint[codePoint] 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /lib/canonical-combining-class.js: -------------------------------------------------------------------------------- 1 | // UCD Canonical_Combining_Class resources. 2 | 3 | import dehex from './dehex.js' 4 | 5 | /** Get CCC for a specific code point */ 6 | export default data => { 7 | const byCodePoint = {} 8 | 9 | // Defaults 10 | const defaultCcc = 0 11 | for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) { 12 | byCodePoint[codePoint] = defaultCcc 13 | } 14 | 15 | data.forEach(function (row) { 16 | const codePoints = dehex(row[0]) 17 | const ccc = Number(row[1]) 18 | 19 | codePoints.forEach(function (codePoint) { 20 | if (codePoint in byCodePoint && byCodePoint[codePoint] !== defaultCcc) { 21 | throw new Error(codePoint) 22 | } 23 | byCodePoint[codePoint] = ccc 24 | }) 25 | }) 26 | 27 | return codePoint => { 28 | if (!(codePoint in byCodePoint)) { 29 | throw new Error('Canonical_Combining_Class unknown for ' + String(codePoint)) 30 | } 31 | return byCodePoint[codePoint] 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 qntm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/normalization-properties.js: -------------------------------------------------------------------------------- 1 | // UCD normalization property resources. 2 | 3 | import dehex from './dehex.js' 4 | 5 | /** Get property value for a specific code point, or default if set */ 6 | export default data => { 7 | const byCodePoint = {} 8 | 9 | // Defaults 10 | for (let codePoint = 0; codePoint < (1 << 20) + (1 << 16); codePoint++) { 11 | byCodePoint[codePoint] = { 12 | NFD_QC: 'Y', 13 | NFC_QC: 'Y', 14 | NFKD_QC: 'Y', 15 | NFKC_QC: 'Y' 16 | } 17 | } 18 | 19 | data.forEach(function (row) { 20 | const codePoints = dehex(row[0]) 21 | const property = row[1] // E.g. "NFD_QC" 22 | const value = row[2] // E.g. "N" 23 | 24 | codePoints.forEach(function (codePoint) { 25 | byCodePoint[codePoint][property] = value 26 | }) 27 | }) 28 | 29 | return (codePoint, property) => { 30 | const properties = byCodePoint[codePoint] 31 | if (properties === undefined) { 32 | throw new Error('Unrecognised code point: ' + String(codePoint)) 33 | } 34 | const value = properties[property] 35 | if (value === undefined) { 36 | throw new Error('Unrecognised property: ' + property) 37 | } 38 | return value 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "safe-code-point", 3 | "version": "3.0.4", 4 | "description": "Ascertains whether a Unicode code point is 'safe' for the purposes of encoding binary data", 5 | "homepage": "https://github.com/qntm/safe-code-point", 6 | "repository": { 7 | "type": "git", 8 | "url": "git://github.com/qntm/safe-code-point.git" 9 | }, 10 | "main": "lib/index.js", 11 | "type": "module", 12 | "keywords": [ 13 | "base64", 14 | "base65536", 15 | "base32768", 16 | "base2048", 17 | "base131072", 18 | "encoding", 19 | "unicode", 20 | "text", 21 | "decoding", 22 | "binary" 23 | ], 24 | "scripts": { 25 | "jasmine": "jasmine", 26 | "standard": "standard --fix", 27 | "tag": "node -e \"require('child_process').spawn('git', ['tag', `v${require('./package.json').version}`], { stdio: 'inherit' })\"", 28 | "tag-and-publish": "npm run tag && git push --tags && npm publish && npm version patch --no-git-tag-version && git add . && git commit -m \"Bump patch\" && git push", 29 | "test": "standard && jasmine \"**/*.spec.mjs\"" 30 | }, 31 | "author": "qntm", 32 | "dependencies": { 33 | "node-fetch": "^3.0.0" 34 | }, 35 | "devDependencies": { 36 | "jasmine": "^5.0.0", 37 | "standard": "^17.0.0" 38 | }, 39 | "files": [ 40 | "lib", 41 | "!lib/**/*.spec.mjs" 42 | ], 43 | "license": "MIT" 44 | } 45 | -------------------------------------------------------------------------------- /lib/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | Ascertain whether a Unicode code point is "safe" for use in a binary encoding. 3 | */ 4 | 5 | import * as ucd from './ucd.js' 6 | import canonicalCombiningClass from './canonical-combining-class.js' 7 | import eastAsianWidth from './east-asian-width.js' 8 | import generalCategory from './general-category.js' 9 | import normalizationProperties from './normalization-properties.js' 10 | import wordBreak from './word-break.js' 11 | 12 | const quickChecks = [ 13 | 'NFD_QC', // canonical decomposition 14 | 'NFC_QC', // canonical decomposition + canonical composition 15 | 'NFKD_QC', // compatibility decomposition 16 | 'NFKC_QC' // compatibility decomposition + canonical composition 17 | ] 18 | 19 | // General Categories CONSIDERED SAFE 20 | const DEFAULT_SAFE_CATEGORIES = { 21 | Ll: true, // Letter, Lowercase 22 | Lm: true, // Letter, Modifier 23 | Lo: true, // Letter, Other 24 | Lt: true, // Letter, Titlecase 25 | Lu: true, // Letter, Uppercase 26 | Me: false, // Mark, Enclosing 27 | Mn: false, // Mark, Nonspacing 28 | Mc: false, // Mark, Spacing Combining 29 | Nd: true, // Number, Decimal Digit 30 | Nl: true, // Number, Letter 31 | No: true, // Number, Other 32 | Cc: false, // Other, Control 33 | Cf: false, // Other, Format 34 | Cn: false, // Other, Not Assigned (no characters in the file have this property) 35 | Co: false, // Other, Private Use 36 | Cs: false, // Other, Surrogate 37 | Pe: false, // Punctuation, Close 38 | Pc: false, // Punctuation, Connector 39 | Pd: false, // Punctuation, Dash 40 | Pf: false, // Punctuation, Final quote (may behave like Ps or Pe depending on usage) 41 | Pi: false, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage) 42 | Ps: false, // Punctuation, Open 43 | Po: false, // Punctuation, Other 44 | Zl: false, // Separator, Line 45 | Zp: false, // Separator, Paragraph 46 | Zs: false, // Separator, Space 47 | Sc: true, // Symbol, Currency 48 | Sm: true, // Symbol, Math 49 | Sk: true, // Symbol, Modifier 50 | So: true // Symbol, Other 51 | } 52 | 53 | export default async (version, { safeCategories = DEFAULT_SAFE_CATEGORIES } = {}) => { 54 | // First load up a bunch of data 55 | const cccData = await ucd.get(version, 'extracted/DerivedCombiningClass.txt') 56 | const eawData = await ucd.get(version, 'EastAsianWidth.txt') 57 | const gcData = await ucd.get(version, 'extracted/DerivedGeneralCategory.txt') 58 | const npData = await ucd.get(version, 'DerivedNormalizationProps.txt') 59 | const wbpData = await ucd.get(version, 'auxiliary/WordBreakProperty.txt') 60 | 61 | const ccc = canonicalCombiningClass(cccData) 62 | const eaw = eastAsianWidth(eawData) 63 | const gc = generalCategory(gcData) 64 | const np = normalizationProperties(npData) 65 | const wb = wordBreak(wbpData) 66 | 67 | const safeCodePoint = codePoint => { 68 | const passesQuickChecks = quickChecks.every(property => 69 | np(codePoint, property) === 'Y' 70 | ) 71 | const inSafeGc = safeCategories[gc(codePoint)] === true 72 | const hasCcc0 = ccc(codePoint) === 0 73 | 74 | return passesQuickChecks && inSafeGc && hasCcc0 75 | } 76 | 77 | safeCodePoint.canonicalCombiningClass = ccc 78 | safeCodePoint.eastAsianWidth = eaw 79 | safeCodePoint.generalCategory = gc 80 | safeCodePoint.normalizationProperties = np 81 | safeCodePoint.wordBreak = wb 82 | 83 | return safeCodePoint 84 | } 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # safe-code-point 2 | 3 | Package for determining whether the supplied code point is ["safe"](https://qntm.org/safe). This module (well, code very much like it) was used to generate code points for [Base65536](https://github.com/qntm/base65536), [Base32768](https://github.com/qntm/base32768) and [Base2048](https://github.com/qntm/base2048). 4 | 5 | This package supports ES modules only. 6 | 7 | ## Example 8 | 9 | ```js 10 | import SafeCodePoint from 'safe-code-point' 11 | 12 | const safeCodePoint = await SafeCodePoint('17.0.0') 13 | const numCodePoints = (1 << 16) + (1 << 20) 14 | 15 | let numSafeCodePoints = 0 16 | for (let codePoint = 0; codePoint < numCodePoints; codePoint++) { 17 | if (safeCodePoint(codePoint)) { 18 | numSafeCodePoints++ 19 | } 20 | } 21 | 22 | console.log(numSafeCodePoints) 23 | ``` 24 | 25 | ## API 26 | 27 | ### SafeCodePoint(version, options) 28 | 29 | Returns a promise which resolves to a `safeCodePoint` function for the supplied version of Unicode. 30 | 31 | Data is fetched from [the Unicode website](http://www.unicode.org/Public) at run time. At the time of writing, supported version strings are a subset of those seen in that directory: `'4.1.0'` to `'17.0.0'`. (Earlier versions do not provide the data in the same consumable structure.) 32 | 33 | #### options 34 | 35 | An optional object with the keys acting as additional options. The options are as follows: 36 | - **`safeCategories`**: An object that determines whether a codepoint category is safe or not. The keys are the category, and the value is `true` if it is safe, and anything else if it is not safe. If not passed, the default parameters are shown in the example below. 37 | 38 | Example: 39 | ```js 40 | const safeCodePoint = await SafeCodePoint('17.0.0', { 41 | safeCategories: { 42 | Ll: true, // Letter, Lowercase 43 | Lm: true, // Letter, Modifier 44 | Lo: true, // Letter, Other 45 | Lt: true, // Letter, Titlecase 46 | Lu: true, // Letter, Uppercase 47 | Me: false, // Mark, Enclosing 48 | Mn: false, // Mark, Nonspacing 49 | Mc: false, // Mark, Spacing Combining 50 | Nd: true, // Number, Decimal Digit 51 | Nl: true, // Number, Letter 52 | No: true, // Number, Other 53 | Cc: false, // Other, Control 54 | Cf: false, // Other, Format 55 | Cn: false, // Other, Not Assigned (no characters in the file have this property) 56 | Co: false, // Other, Private Use 57 | Cs: false, // Other, Surrogate 58 | Pe: false, // Punctuation, Close 59 | Pc: false, // Punctuation, Connector 60 | Pd: false, // Punctuation, Dash 61 | Pf: false, // Punctuation, Final quote (may behave like Ps or Pe depending on usage) 62 | Pi: false, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage) 63 | Ps: false, // Punctuation, Open 64 | Po: false, // Punctuation, Other 65 | Zl: false, // Separator, Line 66 | Zp: false, // Separator, Paragraph 67 | Zs: false, // Separator, Space 68 | Sc: true, // Symbol, Currency 69 | Sm: true, // Symbol, Math 70 | Sk: true, // Symbol, Modifier 71 | So: true // Symbol, Other 72 | } 73 | }) 74 | ``` 75 | 76 | ### safeCodePoint(codePoint) 77 | 78 | Returns a Boolean indicating whether the supplied code point is safe (is not a member of any unsafe Unicode General Categories, has a canonical combining class of 0 and survives all forms of normalization). `codePoint` should be an integer from `0` to `1114111` inclusive. 79 | 80 | ### safeCodePoint.generalCategory(codepoint) 81 | 82 | Returns the Unicode General Category of the supplied code point as a two-character string, *e.g.* `"Lo"` for "Letter, other". 83 | 84 | ### safeCodePoint.wordBreak(codepoint) 85 | 86 | Returns the [Word_Break property](https://unicode.org/reports/tr29/#Table_Word_Break_Property_Values) value of the supplied code point, *e.g.* `'Numeric'`, `'ALetter'` or (in most cases) `undefined`. 87 | -------------------------------------------------------------------------------- /lib/index.spec.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-env jasmine */ 2 | 3 | // Jasmine can be configured to handle ES modules with .js extension, 4 | // but naming this file .mjs is easier 5 | 6 | import dehex from './dehex.js' 7 | import SafeCodePoint from './index.js' 8 | 9 | const numCodePoints = (1 << 16) + (1 << 20) 10 | const getNumSafeCodePoints = safeCodePoint => { 11 | let numSafeCodePoints = 0 12 | for (let codePoint = 0; codePoint < numCodePoints; codePoint++) { 13 | if (safeCodePoint(codePoint)) { 14 | numSafeCodePoints++ 15 | } 16 | } 17 | return numSafeCodePoints 18 | } 19 | 20 | describe('safe-code-point', () => { 21 | let scp7 22 | let scp8 23 | let scp9 24 | let scp10 25 | let scp11 26 | let scp12 27 | let scp13 28 | let scp17 29 | 30 | // First load some files 31 | beforeAll(async () => { 32 | scp7 = await SafeCodePoint('7.0.0') 33 | }) 34 | beforeAll(async () => { 35 | scp8 = await SafeCodePoint('8.0.0') 36 | }) 37 | beforeAll(async () => { 38 | scp9 = await SafeCodePoint('9.0.0') 39 | }) 40 | beforeAll(async () => { 41 | scp10 = await SafeCodePoint('10.0.0') 42 | }) 43 | beforeAll(async () => { 44 | scp11 = await SafeCodePoint('11.0.0') 45 | }) 46 | beforeAll(async () => { 47 | scp12 = await SafeCodePoint('12.0.0') 48 | }) 49 | beforeAll(async () => { 50 | scp13 = await SafeCodePoint('13.0.0') 51 | }) 52 | beforeAll(async () => { 53 | scp17 = await SafeCodePoint('17.0.0') 54 | }) 55 | 56 | describe('canonicalCombiningClass', () => { 57 | it('works', () => { 58 | expect(scp8.canonicalCombiningClass(0)).toBe(0) 59 | expect(scp8.canonicalCombiningClass(30)).toBe(0) 60 | expect(scp8.canonicalCombiningClass(parseInt('0345', 16))).toBe(240) 61 | }) 62 | }) 63 | 64 | describe('dehex', () => { 65 | it('works', () => { 66 | expect(dehex('0000')).toEqual([0]) 67 | expect(dehex('0000..0002')).toEqual([0, 1, 2]) 68 | expect(dehex('000A..000C')).toEqual([10, 11, 12]) 69 | }) 70 | }) 71 | 72 | describe('eastAsianWidth', () => { 73 | it('works', () => { 74 | expect(scp8.eastAsianWidth(0)).toBe('N') 75 | expect(scp8.eastAsianWidth(0x001F)).toBe('N') 76 | expect(scp8.eastAsianWidth(0x0020)).toBe('Na') 77 | 78 | expect(scp9.eastAsianWidth(0x00F8)).toBe('A') 79 | expect(() => scp9.eastAsianWidth(0xABFF)).toThrow() 80 | 81 | expect(() => scp10.eastAsianWidth(0xE1000)).toThrow() 82 | expect(scp10.eastAsianWidth(0x30000)).toBe('W') 83 | }) 84 | }) 85 | 86 | describe('generalCategory', () => { 87 | it('works', () => { 88 | expect(scp9.generalCategory(0)).toBe('Cc') 89 | expect(scp9.generalCategory(31)).toBe('Cc') 90 | expect(scp9.generalCategory(parseInt('055A', 16))).toBe('Po') 91 | }) 92 | }) 93 | 94 | describe('normalizationProperties', () => { 95 | it('works', () => { 96 | expect(scp10.normalizationProperties(parseInt('037A', 16), 'FC_NFKC')).toBe('0020 03B9') 97 | expect(scp10.normalizationProperties(parseInt('E0002', 16), 'NFKC_CF')).toBe('') 98 | expect(scp10.normalizationProperties(parseInt('FB1D', 16), 'NFC_QC')).toBe('N') 99 | expect(scp10.normalizationProperties(parseInt('10FFFF', 16), 'NFKD_QC')).toBe('Y') 100 | }) 101 | }) 102 | 103 | describe('wordBreak', () => { 104 | it('works', () => { 105 | expect(() => scp13.wordBreak(-1)).toThrow() 106 | expect(scp13.wordBreak(parseInt('0000', 16))).toBeUndefined() 107 | expect(scp13.wordBreak(parseInt('0022', 16))).toBe('Double_Quote') 108 | expect(scp13.wordBreak(parseInt('0065', 16))).toBe('ALetter') 109 | }) 110 | }) 111 | 112 | describe('safeCodePoint', () => { 113 | it('works', () => { 114 | expect(scp12(0)).toBe(false) 115 | expect(scp12(36)).toBe(true) 116 | expect(scp12(65)).toBe(true) 117 | }) 118 | 119 | it('README example', () => { 120 | expect(getNumSafeCodePoints(scp7)).toBe(93510) 121 | expect(getNumSafeCodePoints(scp8)).toBe(101064) 122 | expect(getNumSafeCodePoints(scp9)).toBe(108397) 123 | expect(getNumSafeCodePoints(scp10)).toBe(116813) 124 | expect(getNumSafeCodePoints(scp11)).toBe(117422) 125 | expect(getNumSafeCodePoints(scp12)).toBe(117927) 126 | }) 127 | }) 128 | 129 | describe('base65536', () => { 130 | it('works', () => { 131 | const safeRange = (min, max) => { 132 | for (let codePoint = min; codePoint < max; codePoint++) { 133 | if (scp8.generalCategory(codePoint) !== 'Lo' || !scp8(codePoint)) { 134 | return false 135 | } 136 | } 137 | return true 138 | } 139 | 140 | const getAllSafeRanges = rangeSize => { 141 | const allSafeRanges = [] 142 | for (let codePoint = 0; codePoint < (1 << 16) + (1 << 20); codePoint += rangeSize) { 143 | if (safeRange(codePoint, codePoint + rangeSize)) { 144 | allSafeRanges.push(codePoint) 145 | } 146 | } 147 | return allSafeRanges 148 | } 149 | 150 | const allSafeRanges = getAllSafeRanges(1 << 8) 151 | 152 | const paddingBlockStart = String.fromCodePoint(allSafeRanges.shift()) 153 | expect(paddingBlockStart).toBe('ᔀ') 154 | 155 | const blockStarts = allSafeRanges.slice(0, 1 << 8).map(x => String.fromCodePoint(x)).join('') 156 | expect(blockStarts).toBe( 157 | '㐀㔀㘀㜀㠀㤀㨀㬀㰀㴀㸀㼀䀀䄀䈀䌀' + 158 | '䐀䔀䘀䜀䠀䤀䨀䬀䰀一伀倀儀刀匀吀' + 159 | '唀嘀圀堀夀娀嬀尀崀帀开怀愀戀挀搀' + 160 | '攀昀最栀椀樀欀氀洀渀漀瀀焀爀猀琀' + 161 | '甀瘀眀砀礀稀笀簀紀縀缀耀脀舀茀萀' + 162 | '蔀蘀蜀蠀褀言謀谀贀踀輀退鄀鈀錀鐀' + 163 | '销阀需頀餀騀鬀鰀鴀鸀ꄀꈀꌀꔀ𐘀𒀀' + 164 | '𒄀𒈀𓀀𓄀𓈀𓌀𔐀𔔀𖠀𖤀𠀀𠄀𠈀𠌀𠐀𠔀' + 165 | '𠘀𠜀𠠀𠤀𠨀𠬀𠰀𠴀𠸀𠼀𡀀𡄀𡈀𡌀𡐀𡔀' + 166 | '𡘀𡜀𡠀𡤀𡨀𡬀𡰀𡴀𡸀𡼀𢀀𢄀𢈀𢌀𢐀𢔀' + 167 | '𢘀𢜀𢠀𢤀𢨀𢬀𢰀𢴀𢸀𢼀𣀀𣄀𣈀𣌀𣐀𣔀' + 168 | '𣘀𣜀𣠀𣤀𣨀𣬀𣰀𣴀𣸀𣼀𤀀𤄀𤈀𤌀𤐀𤔀' + 169 | '𤘀𤜀𤠀𤤀𤨀𤬀𤰀𤴀𤸀𤼀𥀀𥄀𥈀𥌀𥐀𥔀' + 170 | '𥘀𥜀𥠀𥤀𥨀𥬀𥰀𥴀𥸀𥼀𦀀𦄀𦈀𦌀𦐀𦔀' + 171 | '𦘀𦜀𦠀𦤀𦨀𦬀𦰀𦴀𦸀𦼀𧀀𧄀𧈀𧌀𧐀𧔀' + 172 | '𧘀𧜀𧠀𧤀𧨀𧬀𧰀𧴀𧸀𧼀𨀀𨄀𨈀𨌀𨐀𨔀' 173 | ) 174 | 175 | // Check East_Asian_Width properties. Each block of 256 characters 176 | // has the same East_Asian_Width property. 243 of the blocks are 'W' (wide), 177 | // the other 13 + 1 are 'N' (neutral, which in effect is narrow). 178 | // This is significant when considering rendering and wrapping. 179 | const allBlockStarts = [...blockStarts].map(x => x.codePointAt(0)) 180 | const neutralBlockStarts = [...'ᔀꔀ𐘀𒀀𒄀𒈀𓀀𓄀𓈀𓌀𔐀𔔀𖠀𖤀'].map(x => x.codePointAt(0)) 181 | allBlockStarts.forEach(blockStart => { 182 | for (let i = 0; i < 1 << 8; i++) { 183 | const codePoint = blockStart + i 184 | const isInNeutralBlock = neutralBlockStarts 185 | .some(neutralBlockStart => neutralBlockStart <= codePoint && codePoint < neutralBlockStart + (1 << 8)) 186 | expect(scp8.eastAsianWidth(codePoint)).toBe(isInNeutralBlock ? 'N' : 'W') 187 | } 188 | }) 189 | }) 190 | }) 191 | 192 | describe('base32768', () => { 193 | it('works', () => { 194 | const safeRange = function (min, max) { 195 | for (let codePoint = min; codePoint < max; codePoint++) { 196 | if (!scp9(codePoint)) { 197 | return false 198 | } 199 | } 200 | return true 201 | } 202 | 203 | const getAllSafeRanges = rangeSize => { 204 | const allSafeRanges = [] 205 | for (let codePoint = 0; codePoint < (1 << 16) + (1 << 20); codePoint += rangeSize) { 206 | if (safeRange(codePoint, codePoint + rangeSize)) { 207 | allSafeRanges.push(codePoint) 208 | } 209 | } 210 | return allSafeRanges 211 | } 212 | 213 | const rangeSize = 5 214 | const allSafeRanges = getAllSafeRanges(1 << rangeSize) 215 | 216 | const repertoireSizes = [] 217 | for (let i = 15; i > 0; i -= 8) { // Base32768 is a 15-bit encoding of 8-bit binary data 218 | repertoireSizes.unshift(i - rangeSize) 219 | } 220 | 221 | const repertoireOffsets = repertoireSizes 222 | .map(x => 1 << x) 223 | .map((x, i, arr) => x + (i === 0 ? 0 : arr[i - 1])) // cumulative sum 224 | .map((offset, i, arr) => allSafeRanges 225 | .slice(i === 0 ? 0 : arr[i - 1], arr[i]) 226 | .map(x => String.fromCodePoint(x)) 227 | .join('') 228 | ) 229 | .reverse() 230 | 231 | expect(repertoireOffsets).toEqual([ 232 | 'ҠԀڀڠݠހ߀ကႠᄀᄠᅀᆀᇠሀሠበዠጠᎠᏀᐠᑀᑠᒀᒠᓀᓠᔀᔠᕀᕠᖀᖠᗀᗠᘀᘠᙀᚠᛀកᠠᡀᣀᦀ᧠ᨠᯀᰀᴀ⇠⋀⍀⍠⎀⎠⏀␀─┠╀╠▀■◀◠☀☠♀♠⚀⚠⛀⛠✀✠❀➀➠⠀⠠⡀⡠⢀⢠⣀⣠⤀⤠⥀⥠⦠⨠⩀⪀⪠⫠⬀⬠⭀ⰀⲀⲠⳀⴀⵀ⺠⻀㇀㐀㐠㑀㑠㒀㒠㓀㓠㔀㔠㕀㕠㖀㖠㗀㗠㘀㘠㙀㙠㚀㚠㛀㛠㜀㜠㝀㝠㞀㞠㟀㟠㠀㠠㡀㡠㢀㢠㣀㣠㤀㤠㥀㥠㦀㦠㧀㧠㨀㨠㩀㩠㪀㪠㫀㫠㬀㬠㭀㭠㮀㮠㯀㯠㰀㰠㱀㱠㲀㲠㳀㳠㴀㴠㵀㵠㶀㶠㷀㷠㸀㸠㹀㹠㺀㺠㻀㻠㼀㼠㽀㽠㾀㾠㿀㿠䀀䀠䁀䁠䂀䂠䃀䃠䄀䄠䅀䅠䆀䆠䇀䇠䈀䈠䉀䉠䊀䊠䋀䋠䌀䌠䍀䍠䎀䎠䏀䏠䐀䐠䑀䑠䒀䒠䓀䓠䔀䔠䕀䕠䖀䖠䗀䗠䘀䘠䙀䙠䚀䚠䛀䛠䜀䜠䝀䝠䞀䞠䟀䟠䠀䠠䡀䡠䢀䢠䣀䣠䤀䤠䥀䥠䦀䦠䧀䧠䨀䨠䩀䩠䪀䪠䫀䫠䬀䬠䭀䭠䮀䮠䯀䯠䰀䰠䱀䱠䲀䲠䳀䳠䴀䴠䵀䵠䶀䷀䷠一丠乀习亀亠什仠伀传佀你侀侠俀俠倀倠偀偠傀傠僀僠儀儠兀兠冀冠净几刀删剀剠劀加勀勠匀匠區占厀厠叀叠吀吠呀呠咀咠哀哠唀唠啀啠喀喠嗀嗠嘀嘠噀噠嚀嚠囀因圀圠址坠垀垠埀埠堀堠塀塠墀墠壀壠夀夠奀奠妀妠姀姠娀娠婀婠媀媠嫀嫠嬀嬠孀孠宀宠寀寠尀尠局屠岀岠峀峠崀崠嵀嵠嶀嶠巀巠帀帠幀幠庀庠廀廠开张彀彠往徠忀忠怀怠恀恠悀悠惀惠愀愠慀慠憀憠懀懠戀戠所扠技抠拀拠挀挠捀捠掀掠揀揠搀搠摀摠撀撠擀擠攀攠敀敠斀斠旀无昀映晀晠暀暠曀曠最朠杀杠枀枠柀柠栀栠桀桠梀梠检棠椀椠楀楠榀榠槀槠樀樠橀橠檀檠櫀櫠欀欠歀歠殀殠毀毠氀氠汀池沀沠泀泠洀洠浀浠涀涠淀淠渀渠湀湠満溠滀滠漀漠潀潠澀澠激濠瀀瀠灀灠炀炠烀烠焀焠煀煠熀熠燀燠爀爠牀牠犀犠狀狠猀猠獀獠玀玠珀珠琀琠瑀瑠璀璠瓀瓠甀甠畀畠疀疠痀痠瘀瘠癀癠皀皠盀盠眀眠着睠瞀瞠矀矠砀砠础硠碀碠磀磠礀礠祀祠禀禠秀秠稀稠穀穠窀窠竀章笀笠筀筠简箠節篠簀簠籀籠粀粠糀糠紀素絀絠綀綠緀締縀縠繀繠纀纠绀绠缀缠罀罠羀羠翀翠耀耠聀聠肀肠胀胠脀脠腀腠膀膠臀臠舀舠艀艠芀芠苀苠茀茠荀荠莀莠菀菠萀萠葀葠蒀蒠蓀蓠蔀蔠蕀蕠薀薠藀藠蘀蘠虀虠蚀蚠蛀蛠蜀蜠蝀蝠螀螠蟀蟠蠀蠠血衠袀袠裀裠褀褠襀襠覀覠觀觠言訠詀詠誀誠諀諠謀謠譀譠讀讠诀诠谀谠豀豠貀負賀賠贀贠赀赠趀趠跀跠踀踠蹀蹠躀躠軀軠輀輠轀轠辀辠迀迠退造遀遠邀邠郀郠鄀鄠酀酠醀醠釀釠鈀鈠鉀鉠銀銠鋀鋠錀錠鍀鍠鎀鎠鏀鏠鐀鐠鑀鑠钀钠铀铠销锠镀镠門閠闀闠阀阠陀陠隀隠雀雠需霠靀靠鞀鞠韀韠頀頠顀顠颀颠飀飠餀餠饀饠馀馠駀駠騀騠驀驠骀骠髀髠鬀鬠魀魠鮀鮠鯀鯠鰀鰠鱀鱠鲀鲠鳀鳠鴀鴠鵀鵠鶀鶠鷀鷠鸀鸠鹀鹠麀麠黀黠鼀鼠齀齠龀龠ꀀꀠꁀꁠꂀꂠꃀꃠꄀꄠꅀꅠꆀꆠꇀꇠꈀꈠꉀꉠꊀꊠꋀꋠꌀꌠꍀꍠꎀꎠꏀꏠꐀꐠꑀꑠ꒠ꔀꔠꕀꕠꖀꖠꗀꗠꙀꚠꛀ꜀꜠ꝀꞀꡀ', 233 | 'ƀɀɠʀ' 234 | ]) 235 | }) 236 | }) 237 | 238 | describe('base2048', () => { 239 | it('works', () => { 240 | const repertoireSizes = [] 241 | for (let i = 11; i > 0; i -= 8) { // Base2048 is an 11-bit encoding of 8-bit binary data 242 | repertoireSizes.unshift(1 << i) 243 | } 244 | 245 | let codePoint = 0 246 | const repertoires = repertoireSizes 247 | .map(repertoireSize => { 248 | const codePoints = [] 249 | while (codePoints.length < repertoireSize) { 250 | if ( 251 | scp10(codePoint) && 252 | !scp10.generalCategory(codePoint).startsWith('S') && 253 | scp10.generalCategory(codePoint) !== 'Lm' 254 | ) { 255 | codePoints.push(codePoint) 256 | } 257 | codePoint++ 258 | } 259 | return codePoints 260 | }) 261 | .map(codePoints => codePoints.map(x => String.fromCodePoint(x)).join('')) 262 | .reverse() 263 | 264 | expect(repertoires).toEqual([ 265 | '89ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÆÐØÞßæðøþĐđĦħıĸŁłŊŋŒœŦŧƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƢƣƤƥƦƧƨƩƪƫƬƭƮƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃǝǤǥǶǷȜȝȠȡȢȣȤȥȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯͰͱͲͳͶͷͻͼͽͿΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϏϗϘϙϚϛϜϝϞϟϠϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϳϷϸϺϻϼϽϾϿЂЄЅІЈЉЊЋЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзиклмнопрстуфхцчшщъыьэюяђєѕіјљњћџѠѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѸѹѺѻѼѽѾѿҀҁҊҋҌҍҎҏҐґҒғҔҕҖҗҘҙҚқҜҝҞҟҠҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿӀӃӄӅӆӇӈӉӊӋӌӍӎӏӔӕӘәӠӡӨөӶӷӺӻӼӽӾӿԀԁԂԃԄԅԆԇԈԉԊԋԌԍԎԏԐԑԒԓԔԕԖԗԘԙԚԛԜԝԞԟԠԡԢԣԤԥԦԧԨԩԪԫԬԭԮԯԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆאבגדהוזחטיךכלםמןנסעףפץצקרשתװױײؠءابةتثجحخدذرزسشصضطظعغػؼؽؾؿفقكلمنهوىي٠١٢٣٤٥٦٧٨٩ٮٯٱٲٳٴٹٺٻټٽپٿڀځڂڃڄڅچڇڈډڊڋڌڍڎڏڐڑڒړڔڕږڗژڙښڛڜڝڞڟڠڡڢڣڤڥڦڧڨکڪګڬڭڮگڰڱڲڳڴڵڶڷڸڹںڻڼڽھڿہۃۄۅۆۇۈۉۊۋیۍێۏېۑےەۮۯ۰۱۲۳۴۵۶۷۸۹ۺۻۼۿܐܒܓܔܕܖܗܘܙܚܛܜܝܞܟܠܡܢܣܤܥܦܧܨܩܪܫܬܭܮܯݍݎݏݐݑݒݓݔݕݖݗݘݙݚݛݜݝݞݟݠݡݢݣݤݥݦݧݨݩݪݫݬݭݮݯݰݱݲݳݴݵݶݷݸݹݺݻݼݽݾݿހށނރބޅކއވމފދތލގޏސޑޒޓޔޕޖޗޘޙޚޛޜޝޞޟޠޡޢޣޤޥޱ߀߁߂߃߄߅߆߇߈߉ߊߋߌߍߎߏߐߑߒߓߔߕߖߗߘߙߚߛߜߝߞߟߠߡߢߣߤߥߦߧߨߩߪࠀࠁࠂࠃࠄࠅࠆࠇࠈࠉࠊࠋࠌࠍࠎࠏࠐࠑࠒࠓࠔࠕࡀࡁࡂࡃࡄࡅࡆࡇࡈࡉࡊࡋࡌࡍࡎࡏࡐࡑࡒࡓࡔࡕࡖࡗࡘࡠࡡࡢࡣࡤࡥࡦࡧࡨࡩࡪࢠࢡࢢࢣࢤࢥࢦࢧࢨࢩࢪࢫࢬࢭࢮࢯࢰࢱࢲࢳࢴࢶࢷࢸࢹࢺࢻࢼࢽऄअआइईउऊऋऌऍऎएऐऑऒओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसहऽॐॠॡ०१२३४५६७८९ॲॳॴॵॶॷॸॹॺॻॼॽॾॿঀঅআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহঽৎৠৡ০১২৩৪৫৬৭৮৯ৰৱ৴৵৶৷৸৹ৼਅਆਇਈਉਊਏਐਓਔਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵਸਹੜ੦੧੨੩੪੫੬੭੮੯ੲੳੴઅઆઇઈઉઊઋઌઍએઐઑઓઔકખગઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલળવશષસહઽૐૠૡ૦૧૨૩૪૫૬૭૮૯ૹଅଆଇଈଉଊଋଌଏଐଓଔକଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଭମଯରଲଳଵଶଷସହଽୟୠୡ୦୧୨୩୪୫୬୭୮୯ୱ୲୳୴୵୶୷ஃஅஆஇஈஉஊஎஏஐஒஓகஙசஜஞடணதநனபமயரறலளழவஶஷஸஹௐ௦௧௨௩௪௫௬௭௮௯௰௱௲అఆఇఈఉఊఋఌఎఏఐఒఓఔకఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరఱలళఴవశషసహఽౘౙౚౠౡ౦౧౨౩౪౫౬౭౮౯౸౹౺౻౼౽౾ಀಅಆಇಈಉಊಋಌಎಏಐಒಓಔಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಱಲಳವಶಷಸಹಽೞೠೡ೦೧೨೩೪೫೬೭೮೯ೱೲഅആഇഈഉഊഋഌഎഏഐഒഓഔകഖഗഘങചഛജഝഞടഠഡഢണതഥദധനഩപഫബഭമയരറലളഴവശഷസഹഺഽൎൔൕൖ൘൙൚൛൜൝൞ൟൠൡ൦൧൨൩൪൫൬൭൮൯൰൱൲൳൴൵൶൷൸ൺൻർൽൾൿඅආඇඈඉඊඋඌඍඎඏඐඑඒඓඔඕඖකඛගඝඞඟචඡජඣඤඥඦටඨඩඪණඬතථදධනඳපඵබභමඹයරලවශෂසහළෆ෦෧෨෩෪෫෬෭෮෯กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะาเแโใไๅ๐๑๒๓๔๕๖๗๘๙ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯະາຽເແໂໃໄ໐໑໒໓໔໕໖໗໘໙ໞໟༀ༠༡༢༣༤༥༦༧༨༩༪༫༬༭༮༯༰༱༲༳ཀཁགངཅཆཇཉཊཋཌཎཏཐདནཔཕབམཙཚཛཝཞཟའཡརལཤཥསཧཨཪཫཬྈྉྊྋྌကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဢဣဤဥဧဨဩဪဿ၀၁၂၃၄၅၆၇၈၉ၐၑၒၓၔၕ', 266 | '01234567' 267 | ]) 268 | }) 269 | }) 270 | 271 | describe('base2e15', () => { 272 | it('is not safe', () => { 273 | const repertoires = [ 274 | [0x3480, 0x4DB6], 275 | [0x4E00, 0x8926], 276 | [0xAC00, 0xD7A4], 277 | [0x3400, 0x3480] 278 | ] 279 | const badGc = [] 280 | const badCcc = [] 281 | const badNfdQc = [] 282 | const badNfkdQc = [] 283 | repertoires.forEach(repertoire => { 284 | for (let i = repertoire[0]; i < repertoire[1]; i++) { 285 | if (scp10.generalCategory(i) !== 'Lo') { 286 | badGc.push(i) 287 | } 288 | if (scp10.canonicalCombiningClass(i) !== 0) { 289 | badCcc.push(i) 290 | } 291 | if (scp10.normalizationProperties(i, 'NFD_QC') !== 'Y') { 292 | badNfdQc.push(i) 293 | } 294 | if (scp10.normalizationProperties(i, 'NFKD_QC') !== 'Y') { 295 | badNfkdQc.push(i) 296 | } 297 | } 298 | }) 299 | expect(badGc).toEqual([]) 300 | expect(badCcc).toEqual([]) 301 | expect(badNfdQc.length).toBe(11172) 302 | expect(badNfkdQc.length).toBe(11172) 303 | }) 304 | }) 305 | 306 | describe('base32k', () => { 307 | it('is not safe', () => { 308 | const lanes = [ 309 | [0x4000, 0xA000], 310 | [0xB000, 0xD000] 311 | ] 312 | const badNfdQc = [] 313 | const badNfkdQc = [] 314 | lanes.forEach(lane => { 315 | for (let i = lane[0]; i < lane[1]; i++) { 316 | if (scp10.normalizationProperties(i, 'NFD_QC') !== 'Y') { 317 | badNfdQc.push(i) 318 | } 319 | if (scp10.normalizationProperties(i, 'NFKD_QC') !== 'Y') { 320 | badNfkdQc.push(i) 321 | } 322 | } 323 | }) 324 | expect(badNfdQc.length).toBe(8192) 325 | expect(badNfkdQc.length).toBe(8192) 326 | }) 327 | }) 328 | 329 | describe('14', () => { 330 | it('works', async () => { 331 | const scp14 = await SafeCodePoint('14.0.0') 332 | const numCodePoints = (1 << 16) + (1 << 20) 333 | let numAssigned = 0 334 | let numSafe = 0 335 | let numSafeLetter = 0 336 | let numSafeLetterOther = 0 337 | for (let codePoint = 0; codePoint < numCodePoints; codePoint++) { 338 | const gc = scp14.generalCategory(codePoint) 339 | if (gc !== 'Cn') { 340 | numAssigned++ 341 | if (scp14(codePoint)) { 342 | numSafe++ 343 | if (gc.startsWith('L')) { 344 | numSafeLetter++ 345 | if (gc === 'Lo') { 346 | numSafeLetterOther++ 347 | } 348 | } 349 | } 350 | } 351 | } 352 | expect(numAssigned).toBe(284278) 353 | expect(numSafe).toBe(124456) 354 | expect(numSafeLetter).toBe(116231) 355 | expect(numSafeLetterOther).toBe(113876) 356 | }) 357 | }) 358 | 359 | describe('safeCategories', () => { 360 | it('works', async () => { 361 | const scp17WithNothingSafe = await SafeCodePoint('17.0.0', { safeCategories: {} }) 362 | const scp17WithEverythingSafe = await SafeCodePoint('17.0.0', { safeCategories: new Proxy({}, { get (_, prop) { return true } }) }) 363 | 364 | const numSafeCodePoints17 = getNumSafeCodePoints(scp17) 365 | const numSafeCodePoints17WithNothingSafe = getNumSafeCodePoints(scp17WithNothingSafe) 366 | const numSafeCodePoints17WithEverythingSafe = getNumSafeCodePoints(scp17WithEverythingSafe) 367 | 368 | expect(numSafeCodePoints17WithNothingSafe).toBeLessThan(numSafeCodePoints17) 369 | expect(numSafeCodePoints17).toBeLessThan(numSafeCodePoints17WithEverythingSafe) 370 | }) 371 | }) 372 | }) 373 | --------------------------------------------------------------------------------