├── .eslintignore ├── .eslintrc.json ├── .gitignore ├── .gitreview ├── .mailmap ├── AUTHORS.txt ├── CONTRIBUTING.md ├── Gruntfile.js ├── History.md ├── LICENSE-template.txt ├── LICENSE.txt ├── README.md ├── build ├── .eslintrc.json ├── banner.txt ├── footer.txt ├── moduleUtils.js ├── modules.json └── tasks │ ├── set-dev.js │ └── set-meta.js ├── jsdoc.json ├── package-lock.json ├── package.json ├── src ├── .eslintrc.json ├── generated │ ├── unicodejs.derivedbidiclasses.js │ ├── unicodejs.derivedcoreproperties.js │ ├── unicodejs.derivedgeneralcategories.js │ ├── unicodejs.emojiproperties.js │ ├── unicodejs.graphemebreakproperties.js │ └── unicodejs.wordbreakproperties.js ├── unicodejs.characterclass.js ├── unicodejs.graphemebreak.js ├── unicodejs.js ├── unicodejs.textstring.js └── unicodejs.wordbreak.js ├── tests ├── .eslintrc.json ├── generated │ ├── unicodejs.graphemebreak.testdata.js │ └── unicodejs.wordbreak.testdata.js ├── index.html ├── unicodejs.characterclass.test.js ├── unicodejs.graphemebreak.test.js ├── unicodejs.namespace.js ├── unicodejs.test.js └── unicodejs.wordbreak.test.js └── tools ├── .eslintrc.json ├── strongDir.js.html ├── strongDir.php.html ├── unicodejs-properties.js └── unicodejs-tests.js /.eslintignore: -------------------------------------------------------------------------------- 1 | /dist 2 | /docs 3 | /coverage 4 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "extends": [ 4 | "wikimedia/server" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist 2 | /docs 3 | /coverage 4 | node_modules 5 | .eslintcache 6 | -------------------------------------------------------------------------------- /.gitreview: -------------------------------------------------------------------------------- 1 | [gerrit] 2 | host=gerrit.wikimedia.org 3 | port=29418 4 | project=unicodejs.git 5 | defaultbranch=master 6 | defaultrebase=0 7 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | David Chan 2 | David Chan 3 | Roan Kattouw 4 | Roan Kattouw 5 | Timo Tijhof 6 | Timo Tijhof 7 | Timo Tijhof 8 | -------------------------------------------------------------------------------- /AUTHORS.txt: -------------------------------------------------------------------------------- 1 | Principal Authors (major contributors) 2 | 3 | Ed Sanders 4 | David Chan 5 | 6 | Patch Contributors (minor contributors, alphabetically) 7 | 8 | Antoine Musso 9 | Chad 10 | James D. Forrester 11 | Kunal Mehta 12 | Mehmet Coskun 13 | Paladox 14 | Ricordisamoa 15 | Roan Kattouw 16 | Timo Tijhof 17 | Umherirrender 18 | Zach -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribute to UnicodeJS 2 | 3 | ## Release process 4 | 5 | 1. Create or reset your `release` branch to the latest head of the repository 6 | ``` 7 | git remote update && git checkout -B release -t origin/HEAD 8 | ``` 9 | 10 | 2. Ensure build and tests pass locally. 11 | NOTE: This does not require privileges and should be run in isolation. 12 | ``` 13 | npm ci && npm test 14 | ``` 15 | 16 | 3. Prepare the release commit 17 | - Add release notes to a new section on top of [History.md](./History.md). 18 | ``` 19 | git log --format='* %s (%aN)' --no-merges --reverse v$(node -e 'console.log(require("./package.json").version);')...HEAD | sort | grep -vE '^\* (build|docs?|tests?):' 20 | ``` 21 | - Set the next release version in [package.json](./package.json). 22 | - Review and stage your commit: 23 | ``` 24 | git add -p 25 | ``` 26 | - Save your commit and push for review. 27 | ``` 28 | git commit -m "Tag vX.Y.Z" 29 | git review 30 | ``` 31 | 32 | After the release commit has been merged by CI, perform the actual release: 33 | 34 | 1. Update and reset your `release` branch, confirm it is at your merged commit. 35 | ``` 36 | git remote update && git checkout -B release -t origin/HEAD 37 | # … 38 | git show 39 | # Tag vX.Y.Z 40 | # … 41 | ``` 42 | 43 | 3. Create a signed tag and push it to the Git server: 44 | ``` 45 | git tag -s "vX.Y.Z" 46 | git push --tags 47 | ``` 48 | 49 | 4. Run the build and review the release file (e.g. proper release version header 50 | in the header, and not a development build). 51 | NOTE: This does not require privileges and should be run in isolation. 52 | ``` 53 | npm run build 54 | # … 55 | head dist/unicodejs.js 56 | # UnicodeJS v12.0.0 57 | # … 58 | ``` 59 | 60 | 5. Publish to npm: 61 | ``` 62 | npm publish 63 | ``` -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = function ( grunt ) { 4 | const modules = require( './build/modules.json' ), 5 | moduleUtils = require( './build/moduleUtils.js' ), 6 | srcFiles = moduleUtils.makeBuildList( modules, [ 'unicodejs' ] ).scripts, 7 | testFiles = moduleUtils.makeBuildList( modules, [ 'unicodejs.tests' ] ).scripts; 8 | 9 | grunt.loadNpmTasks( 'grunt-contrib-clean' ); 10 | grunt.loadNpmTasks( 'grunt-contrib-concat' ); 11 | grunt.loadNpmTasks( 'grunt-contrib-copy' ); 12 | grunt.loadNpmTasks( 'grunt-eslint' ); 13 | grunt.loadNpmTasks( 'grunt-exec' ); 14 | grunt.loadNpmTasks( 'grunt-karma' ); 15 | grunt.loadTasks( 'build/tasks' ); 16 | 17 | grunt.initConfig( { 18 | clean: { 19 | dist: [ 'dist', 'coverage' ] 20 | }, 21 | copy: { 22 | dist: { 23 | src: [ 24 | 'AUTHORS.txt', 25 | 'History.md', 26 | 'README.md' 27 | ], 28 | dest: 'dist/' 29 | }, 30 | licence: { 31 | // This substitues the current year into the license file's copyright statement' 32 | options: { process: grunt.template.process }, 33 | src: 'LICENSE-template.txt', 34 | dest: 'LICENSE.txt' 35 | } 36 | }, 37 | concat: { 38 | all: { 39 | options: { 40 | banner: grunt.file.read( 'build/banner.txt' ), 41 | footer: grunt.file.read( 'build/footer.txt' ) 42 | }, 43 | dest: 'dist/unicodejs.js', 44 | src: srcFiles 45 | } 46 | }, 47 | exec: { 48 | cmd: 'node tools/unicodejs-properties.js && node tools/unicodejs-tests.js' 49 | }, 50 | eslint: { 51 | options: { 52 | cache: true, 53 | fix: grunt.option( 'fix' ) 54 | }, 55 | all: '.' 56 | }, 57 | karma: { 58 | options: { 59 | files: testFiles, 60 | frameworks: [ 'qunit' ], 61 | reporters: [ 'dots', 'coverage' ], 62 | singleRun: true, 63 | autoWatch: false, 64 | preprocessors: { 65 | 'src/*.js': [ 'coverage' ] 66 | }, 67 | customLaunchers: { 68 | ChromeCustom: { 69 | base: 'ChromeHeadless', 70 | // Chrome requires --no-sandbox in Docker/CI. 71 | flags: process.env.CHROMIUM_FLAGS ? 72 | process.env.CHROMIUM_FLAGS.split( ' ' ) : 73 | undefined 74 | } 75 | }, 76 | coverageReporter: { 77 | dir: 'coverage/', 78 | subdir: '.', 79 | reporters: [ 80 | { type: 'clover' }, 81 | { type: 'html' }, 82 | { type: 'text-summary' } 83 | ], 84 | check: { global: { 85 | functions: 100, 86 | statements: 100, 87 | branches: 100, 88 | lines: 100 89 | } } 90 | } 91 | }, 92 | firefox: { 93 | browsers: [ 'FirefoxHeadless' ] 94 | }, 95 | chrome: { 96 | browsers: [ 'ChromeCustom' ] 97 | } 98 | } 99 | } ); 100 | 101 | grunt.registerTask( 'lint', [ 'eslint' ] ); 102 | grunt.registerTask( 'update', [ 'exec' ] ); 103 | // Workaround for T280935, and T240955. 104 | // Firefox 68esr is incompatible with Docker. 105 | // TODO: Try this again when Firefox 84esr reaches our CI images. 106 | grunt.registerTask( 'unit', ( process.env.ZUUL_PIPELINE ? 107 | [ 'karma:chrome' ] : 108 | [ 'karma:chrome', 'karma:firefox' ] 109 | ) ); 110 | grunt.registerTask( '_build', [ 'clean', 'concat', 'copy' ] ); 111 | grunt.registerTask( 'build', [ 'set-meta', '_build' ] ); 112 | grunt.registerTask( 'test', [ 'set-meta', 'set-dev', '_build', 'lint', 'unit' ] ); 113 | }; 114 | -------------------------------------------------------------------------------- /History.md: -------------------------------------------------------------------------------- 1 | # UnicodeJS Release History 2 | 3 | ## v15.0.0 / 2025-06-08 4 | * Update to Unicode 15.0.0 (Ed Sanders) 5 | 6 | ## v13.0.3 / 2021-04-23 7 | * Always check prevCodepoint is non-null (Ed Sanders) 8 | 9 | ## v13.0.2 / 2021-01-14 10 | * Use "this" instead of "wordbreak" to make wordbreak extendable (Mehmet Coskun) 11 | 12 | ## v13.0.1 / 2020-10-08 13 | * Use next/prevCodepoint for checking for sot/eot (Ed Sanders) 14 | * Use local wordbreak cached variable consistently (Ed Sanders) 15 | * Set code coverage requirement to 100% with one inline ignore (Ed Sanders) 16 | * Wordbreak: Drop rule WB14 (Ed Sanders) 17 | 18 | ## v13.0.0 / 2020-05-27 19 | * Update data to Unicode 13.0.0 (Ed Sanders) 20 | * Update data to Unicode 12.1.0 (Ed Sanders) 21 | * Rewrite grapheme break without regexes (Ed Sanders) 22 | 23 | ## v12.0.0 / 2019-05-29 24 | We now number our releases based on Unicode codepoint releases. 25 | 26 | * [BREAKING CHANGE] Go back to storing strings as code units (Ed Sanders) 27 | * Update to Unicode 9.0.0 (Ed Sanders) 28 | * Update to Unicode 10.0.0 (James D. Forrester) 29 | * Update to Unicode 11.0.0 (Ed Sanders) 30 | * Update to Unicode 12.0.0 (Ed Sanders) 31 | * Add official grapheme break tests (Ed Sanders) 32 | 33 | ## v0.2.2 / 2018-09-22 34 | * Add UMD wrapper (Ed Sanders) 35 | 36 | ## v0.2.1 / 2018-02-11 37 | * Bump copyright year (James D. Forrester) 38 | 39 | ## v0.2.0 / 2017-11-14 40 | * Build automated wordbreak tests from official test data (Ed Sanders) 41 | 42 | ## v0.1.6 / 2016-12-09 43 | * Update to Unicode 8 (Ed Sanders) 44 | * Remove duplicated check for surrogates (Ed Sanders) 45 | 46 | ## v0.1.5 / 2015-07-02 47 | * Duck typing test for isBreak (David Chan) 48 | * Update generated data for Unicode 8.0.0 (David Chan) 49 | * Strong directionality support (David Chan) 50 | 51 | ## v0.1.4 / 2015-03-18 52 | * Add isBreak surrogate pair support for code unit strings (David Chan) 53 | 54 | ## v0.1.3 / 2015-02-04 55 | * Bump copyright notices to 2015 (James D. Forrester) 56 | * Word character class regex (David Chan) 57 | 58 | ## v0.1.2 / 2014-12-04 59 | * Fix lots of spelling mistakes and typos (Ed Sanders) 60 | * Update to Unicode 7.0.0 (Ed Sanders) 61 | 62 | ## v0.1.1 / 2014-08-12 63 | * readme: Document release process (James D. Forrester) 64 | 65 | ## v0.1.0 / 2014-08-12 66 | * build: Make into a built library with test infrastructure (James D. Forrester) 67 | * Initial import of UnicodeJS namespace from VisualEditor (James D. Forrester) 68 | -------------------------------------------------------------------------------- /LICENSE-template.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013–<%= grunt.template.today("yyyy") %> UnicodeJS team and others under the terms 2 | of The MIT License (MIT), as follows: 3 | 4 | This software consists of voluntary contributions made by many 5 | individuals (AUTHORS.txt) For exact contribution history, see the 6 | revision history and logs, available at https://gerrit.wikimedia.org 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | "Software"), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 23 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013–2025 UnicodeJS team and others under the terms 2 | of The MIT License (MIT), as follows: 3 | 4 | This software consists of voluntary contributions made by many 5 | individuals (AUTHORS.txt) For exact contribution history, see the 6 | revision history and logs, available at https://gerrit.wikimedia.org 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | "Software"), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 23 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![NPM version](https://badge.fury.io/js/unicodejs.svg)](https://badge.fury.io/js/unicodejs) 2 | 3 | UnicodeJS 4 | ================= 5 | 6 | UnicodeJS is a JavaScript library for working with the Unicode standard. 7 | 8 | Quick start 9 | ---------- 10 | 11 | This library is available as an [npm](https://npmjs.org/) package! Install it right away: 12 |
13 | npm install unicodejs
14 | 
15 | 16 | Or clone the repo, `git clone https://gerrit.wikimedia.org/r/unicodejs`. 17 | 18 | Documentation 19 | ---------- 20 | 21 | The library provide methods for detecting **Word Boundaries** and **Grapheme Cluster Boundaries** as defined in the *[Text Segmentation specification](https://unicode.org/reports/tr29/)* 22 | 23 | Full documentation is pubished at https://doc.wikimedia.org/unicodejs/master/. 24 | 25 | Versioning 26 | ---------- 27 | 28 | We use the Semantic Versioning guidelines as much as possible; major versions reflect the version of Unicode we target. 29 | 30 | Releases will be numbered in the following format: 31 | 32 | `..` 33 | 34 | For more information on SemVer, please visit http://semver.org/. 35 | 36 | Bug tracker 37 | ----------- 38 | 39 | Found a bug? Please report it in the [issue tracker](https://phabricator.wikimedia.org/maniphest/task/edit/form/1/?project=Utilities-UnicodeJS)! 40 | -------------------------------------------------------------------------------- /build/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "extends": "wikimedia/server" 4 | } 5 | -------------------------------------------------------------------------------- /build/banner.txt: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS v<%= build.version %> 3 | * https://www.mediawiki.org/wiki/UnicodeJS 4 | * 5 | * Copyright 2013-<%= build.year %> UnicodeJS Team and other contributors. 6 | * Released under the MIT license 7 | * https://unicodejs.mit-license.org/ 8 | */ 9 | ( function ( root, factory ) { 10 | if ( typeof define === 'function' && define.amd ) { 11 | // AMD. Register as an anonymous module. 12 | define( factory ); 13 | } else if ( typeof exports === 'object' && typeof exports.nodeName !== 'string' ) { 14 | // CommonJS 15 | module.exports = factory(); 16 | } else { 17 | // Browser globals 18 | root.unicodeJS = factory(); 19 | } 20 | }( this, function () { 21 | var unicodeJS = {}; 22 | -------------------------------------------------------------------------------- /build/footer.txt: -------------------------------------------------------------------------------- 1 | return unicodeJS; 2 | } ) ); 3 | -------------------------------------------------------------------------------- /build/moduleUtils.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Utility methods for interpreting the modules.json manifest. 3 | * 4 | * Code shared with the OOjs UI project 5 | */ 6 | 7 | 'use strict'; 8 | 9 | const hasOwn = Object.hasOwnProperty; 10 | 11 | module.exports = { 12 | /** 13 | * Expand an array of file paths and variant-objects into 14 | * a flattened list by variant. 15 | * 16 | * input = [ 17 | * 'foo.js', 18 | * 'bar.js', 19 | * { default: 'baz-fallback.js', svg: 'baz-svg.js', }. 20 | * 'quux.js' 21 | * ] 22 | * 23 | * output = { 24 | * default: [ 25 | * 'foo.js', 26 | * 'bar.js' 27 | * 'baz-fallback.js' 28 | * 'quux.js' 29 | * ], 30 | * svg: [ 31 | * 'foo.js', 32 | * 'bar.js' 33 | * 'baz-svg.js' 34 | * 'quux.js' 35 | * ] 36 | * ] 37 | * 38 | * @param {Array} resources List of expandable resources 39 | * @return {Array} Flat list of file paths 40 | */ 41 | expandResources: function ( resources ) { 42 | // Figure out what the different css targets will be, 43 | // we need this to be shared between the recess task 44 | // (which will compile the less code) and the concat task 45 | // (which will prepend intro.css without it being stripped 46 | // like recess would). 47 | const targets = { default: [] }; 48 | resources.forEach( ( filepath ) => { 49 | if ( typeof filepath !== 'object' ) { 50 | filepath = { default: filepath }; 51 | } 52 | // Fetch copy of buffer before filepath/variant loop, otherwise 53 | // it can incorrectly include the default file in a non-default variant. 54 | const buffer = targets.default.slice(); 55 | for ( const variant in filepath ) { 56 | if ( !targets[ variant ] ) { 57 | targets[ variant ] = buffer.slice(); 58 | } 59 | targets[ variant ].push( filepath[ variant ] ); 60 | } 61 | 62 | } ); 63 | return targets; 64 | }, 65 | 66 | /** 67 | * Create a build list 68 | * 69 | * @param {Array} modules List of modules and their dependencies 70 | * @param {Array} targets List of target modules to load including any dependencies 71 | * @return {Object} An object containing arrays of the scripts and styles 72 | */ 73 | makeBuildList: function ( modules, targets ) { 74 | /** 75 | * Expands an array of arrays of file paths with dependencies into an ordered 76 | * lit of dependencies stemming from one or more given top-level modules. 77 | * 78 | * @param {Array} load List of targets to return and their dependencies 79 | * @param {Array} [list] Extant flat list of file paths to extend 80 | * @return {Array} Flat list of file paths 81 | */ 82 | function buildDependencyList( load, list = [] ) { 83 | load.forEach( ( module ) => { 84 | if ( !hasOwn.call( modules, module ) ) { 85 | throw new Error( 'Dependency ' + module + ' not found' ); 86 | } 87 | 88 | // Add in any dependencies 89 | if ( modules[ module ].dependencies ) { 90 | buildDependencyList( modules[ module ].dependencies, list ); 91 | } 92 | 93 | // Append target load module to the end of the current list 94 | if ( !list.includes( module ) ) { 95 | list.push( module ); 96 | } 97 | } ); 98 | 99 | return list; 100 | } 101 | 102 | /** 103 | * Given a list of targets, returns an object splitting the scripts 104 | * and styles. 105 | * 106 | * @param {Array} buildlist List of targets to work through 107 | * @param {Object} [filelist] Object to extend 108 | * @return {Object} Object of two arrays listing the file paths 109 | */ 110 | function expandBuildList( buildlist, filelist = {} ) { 111 | filelist.scripts = filelist.scripts || []; 112 | filelist.styles = filelist.styles || []; 113 | 114 | for ( const build in buildlist ) { 115 | const moduleName = buildlist[ build ]; 116 | 117 | for ( const script in modules[ moduleName ].scripts ) { 118 | if ( !modules[ moduleName ].scripts[ script ].debug ) { 119 | filelist.scripts.push( modules[ moduleName ].scripts[ script ] ); 120 | } 121 | } 122 | 123 | for ( const style in modules[ moduleName ].styles ) { 124 | if ( !modules[ moduleName ].styles[ style ].debug ) { 125 | filelist.styles.push( modules[ moduleName ].styles[ style ] ); 126 | } 127 | } 128 | } 129 | return filelist; 130 | } 131 | 132 | return expandBuildList( buildDependencyList( targets ) ); 133 | } 134 | }; 135 | -------------------------------------------------------------------------------- /build/modules.json: -------------------------------------------------------------------------------- 1 | { 2 | "unicodejs": { 3 | "scripts": [ 4 | "src/unicodejs.js", 5 | "src/generated/unicodejs.derivedbidiclasses.js", 6 | "src/generated/unicodejs.derivedcoreproperties.js", 7 | "src/generated/unicodejs.derivedgeneralcategories.js", 8 | "src/unicodejs.characterclass.js", 9 | "src/unicodejs.textstring.js", 10 | "src/generated/unicodejs.emojiproperties.js", 11 | "src/generated/unicodejs.graphemebreakproperties.js", 12 | "src/unicodejs.graphemebreak.js", 13 | "src/generated/unicodejs.wordbreakproperties.js", 14 | "src/unicodejs.wordbreak.js" 15 | ] 16 | }, 17 | "unicodejs.namespace": { 18 | "scripts": [ 19 | "tests/unicodejs.namespace.js" 20 | ] 21 | }, 22 | "unicodejs.tests": { 23 | "scripts": [ 24 | "tests/unicodejs.test.js", 25 | "tests/unicodejs.characterclass.test.js", 26 | "tests/generated/unicodejs.graphemebreak.testdata.js", 27 | "tests/unicodejs.graphemebreak.test.js", 28 | "tests/generated/unicodejs.wordbreak.testdata.js", 29 | "tests/unicodejs.wordbreak.test.js" 30 | ], 31 | "dependencies": [ 32 | "unicodejs.namespace", 33 | "unicodejs" 34 | ] 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /build/tasks/set-dev.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Mark development build. 3 | * 4 | * E.g. output produced from `npm test`, instead of `npm run build`. 5 | */ 6 | 7 | 'use strict'; 8 | 9 | module.exports = function ( grunt ) { 10 | grunt.registerTask( 'set-dev', () => { 11 | grunt.config.set( 'build.version', grunt.config( 'build.version' ) + '-dev' ); 12 | } ); 13 | }; 14 | -------------------------------------------------------------------------------- /build/tasks/set-meta.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Set build metadata. 3 | */ 4 | 5 | 'use strict'; 6 | 7 | module.exports = function ( grunt ) { 8 | 9 | grunt.registerTask( 'set-meta', () => { 10 | const cp = require( 'child_process' ); 11 | 12 | // Support reproducible builds from only the source code 13 | // https://reproducible-builds.org/docs/source-date-epoch/ 14 | let releaseEpoch; 15 | try { 16 | releaseEpoch = process.env.SOURCE_DATE_EPOCH || cp.execSync( 'git log -s --format=%at -1' ); 17 | } catch ( e ) { 18 | grunt.log.err( e ); 19 | return false; 20 | } 21 | grunt.config.set( 'build.year', new Date( releaseEpoch * 1000 ).getUTCFullYear() ); 22 | grunt.config.set( 'build.version', require( '../../package.json' ).version ); 23 | } ); 24 | 25 | }; 26 | -------------------------------------------------------------------------------- /jsdoc.json: -------------------------------------------------------------------------------- 1 | { 2 | "opts": { 3 | "destination": "docs/js", 4 | "package": "package.json", 5 | "pedantic": true, 6 | "readme": "README.md", 7 | "recurse": true, 8 | "template": "node_modules/jsdoc-wmf-theme" 9 | }, 10 | "plugins": [ 11 | "plugins/markdown" 12 | ], 13 | "source": { 14 | "include": [ "src" ] 15 | }, 16 | "templates": { 17 | "cleverLinks": true, 18 | "default": { 19 | "useLongnameInNav": true 20 | }, 21 | "wmf": { 22 | "linkMap": { 23 | "Array": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array", 24 | "Promise": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise", 25 | "Set": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set" 26 | } 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "unicodejs", 3 | "version": "15.0.0", 4 | "description": "A library for working with the Unicode standard", 5 | "keywords": [ 6 | "unicode" 7 | ], 8 | "homepage": "https://www.mediawiki.org/wiki/UnicodeJS", 9 | "repository": { 10 | "type": "git", 11 | "url": "https://gerrit.wikimedia.org/g/unicodejs" 12 | }, 13 | "license": "MIT", 14 | "files": [ 15 | "dist/" 16 | ], 17 | "scripts": { 18 | "build": "grunt build", 19 | "test": "grunt test", 20 | "doc": "jsdoc -c jsdoc.json -p" 21 | }, 22 | "devDependencies": { 23 | "eslint-config-wikimedia": "0.30.0", 24 | "eslint-plugin-html": "8.1.3", 25 | "fs-extra": "11.3.0", 26 | "grunt": "1.6.1", 27 | "grunt-contrib-clean": "2.0.1", 28 | "grunt-contrib-concat": "2.1.0", 29 | "grunt-contrib-copy": "1.0.0", 30 | "grunt-eslint": "24.3.0", 31 | "grunt-exec": "3.0.0", 32 | "grunt-karma": "4.0.2", 33 | "jsdoc": "4.0.4", 34 | "jsdoc-wmf-theme": "1.1.0", 35 | "karma": "6.4.4", 36 | "karma-chrome-launcher": "3.2.0", 37 | "karma-coverage": "2.2.1", 38 | "karma-firefox-launcher": "2.1.3", 39 | "karma-qunit": "4.2.1", 40 | "qunit": "2.24.1" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "extends": [ 4 | "wikimedia/common", 5 | "wikimedia/language/es2017" 6 | ], 7 | "env": { 8 | "shared-node-browser": true 9 | }, 10 | "globals": { 11 | "unicodeJS": "writable" 12 | }, 13 | "rules": { 14 | "max-len": "off" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/generated/unicodejs.derivedbidiclasses.js: -------------------------------------------------------------------------------- 1 | // This file is GENERATED by tools/unicodejs-properties.js 2 | // DO NOT EDIT 3 | unicodeJS.derivedbidiclasses = { 4 | // partial extraction only 5 | L: [ [ 0x0041, 0x005A ], [ 0x0061, 0x007A ], 0x00AA, 0x00B5, 0x00BA, [ 0x00C0, 0x00D6 ], [ 0x00D8, 0x00F6 ], [ 0x00F8, 0x02B8 ], [ 0x02BB, 0x02C1 ], 0x02D0, 0x02D1, [ 0x02E0, 0x02E4 ], 0x02EE, [ 0x0370, 0x0373 ], 0x0376, 0x0377, [ 0x037A, 0x037D ], 0x037F, 0x0386, [ 0x0388, 0x038A ], 0x038C, [ 0x038E, 0x03A1 ], [ 0x03A3, 0x03F5 ], [ 0x03F7, 0x0482 ], [ 0x048A, 0x052F ], [ 0x0531, 0x0556 ], [ 0x0559, 0x0589 ], [ 0x0903, 0x0939 ], 0x093B, [ 0x093D, 0x0940 ], [ 0x0949, 0x094C ], [ 0x094E, 0x0950 ], [ 0x0958, 0x0961 ], [ 0x0964, 0x0980 ], 0x0982, 0x0983, [ 0x0985, 0x098C ], 0x098F, 0x0990, [ 0x0993, 0x09A8 ], [ 0x09AA, 0x09B0 ], 0x09B2, [ 0x09B6, 0x09B9 ], [ 0x09BD, 0x09C0 ], 0x09C7, 0x09C8, 0x09CB, 0x09CC, 0x09CE, 0x09D7, 0x09DC, 0x09DD, [ 0x09DF, 0x09E1 ], [ 0x09E6, 0x09F1 ], [ 0x09F4, 0x09FA ], 0x09FC, 0x09FD, 0x0A03, [ 0x0A05, 0x0A0A ], 0x0A0F, 0x0A10, [ 0x0A13, 0x0A28 ], [ 0x0A2A, 0x0A30 ], 0x0A32, 0x0A33, 0x0A35, 0x0A36, 0x0A38, 0x0A39, [ 0x0A3E, 0x0A40 ], [ 0x0A59, 0x0A5C ], 0x0A5E, [ 0x0A66, 0x0A6F ], [ 0x0A72, 0x0A74 ], 0x0A76, 0x0A83, [ 0x0A85, 0x0A8D ], [ 0x0A8F, 0x0A91 ], [ 0x0A93, 0x0AA8 ], [ 0x0AAA, 0x0AB0 ], 0x0AB2, 0x0AB3, [ 0x0AB5, 0x0AB9 ], [ 0x0ABD, 0x0AC0 ], 0x0AC9, 0x0ACB, 0x0ACC, 0x0AD0, 0x0AE0, 0x0AE1, [ 0x0AE6, 0x0AF0 ], 0x0AF9, 0x0B02, 0x0B03, [ 0x0B05, 0x0B0C ], 0x0B0F, 0x0B10, [ 0x0B13, 0x0B28 ], [ 0x0B2A, 0x0B30 ], 0x0B32, 0x0B33, [ 0x0B35, 0x0B39 ], 0x0B3D, 0x0B3E, 0x0B40, 0x0B47, 0x0B48, 0x0B4B, 0x0B4C, 0x0B57, 0x0B5C, 0x0B5D, [ 0x0B5F, 0x0B61 ], [ 0x0B66, 0x0B77 ], 0x0B83, [ 0x0B85, 0x0B8A ], [ 0x0B8E, 0x0B90 ], [ 0x0B92, 0x0B95 ], 0x0B99, 0x0B9A, 0x0B9C, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, [ 0x0BA8, 0x0BAA ], [ 0x0BAE, 0x0BB9 ], 0x0BBE, 0x0BBF, 0x0BC1, 0x0BC2, [ 0x0BC6, 0x0BC8 ], [ 0x0BCA, 0x0BCC ], 0x0BD0, 0x0BD7, [ 0x0BE6, 0x0BF2 ], [ 0x0C01, 0x0C03 ], [ 0x0C05, 0x0C0C ], [ 0x0C0E, 0x0C10 ], [ 0x0C12, 0x0C28 ], [ 0x0C2A, 0x0C39 ], 0x0C3D, [ 0x0C41, 0x0C44 ], [ 0x0C58, 0x0C5A ], 0x0C5D, 0x0C60, 0x0C61, [ 0x0C66, 0x0C6F ], 0x0C77, 0x0C7F, 0x0C80, [ 0x0C82, 0x0C8C ], [ 0x0C8E, 0x0C90 ], [ 0x0C92, 0x0CA8 ], [ 0x0CAA, 0x0CB3 ], [ 0x0CB5, 0x0CB9 ], [ 0x0CBD, 0x0CC4 ], [ 0x0CC6, 0x0CC8 ], 0x0CCA, 0x0CCB, 0x0CD5, 0x0CD6, 0x0CDD, 0x0CDE, 0x0CE0, 0x0CE1, [ 0x0CE6, 0x0CEF ], [ 0x0CF1, 0x0CF3 ], [ 0x0D02, 0x0D0C ], [ 0x0D0E, 0x0D10 ], [ 0x0D12, 0x0D3A ], [ 0x0D3D, 0x0D40 ], [ 0x0D46, 0x0D48 ], [ 0x0D4A, 0x0D4C ], 0x0D4E, 0x0D4F, [ 0x0D54, 0x0D61 ], [ 0x0D66, 0x0D7F ], 0x0D82, 0x0D83, [ 0x0D85, 0x0D96 ], [ 0x0D9A, 0x0DB1 ], [ 0x0DB3, 0x0DBB ], 0x0DBD, [ 0x0DC0, 0x0DC6 ], [ 0x0DCF, 0x0DD1 ], [ 0x0DD8, 0x0DDF ], [ 0x0DE6, 0x0DEF ], [ 0x0DF2, 0x0DF4 ], [ 0x0E01, 0x0E30 ], 0x0E32, 0x0E33, [ 0x0E40, 0x0E46 ], [ 0x0E4F, 0x0E5B ], 0x0E81, 0x0E82, 0x0E84, [ 0x0E86, 0x0E8A ], [ 0x0E8C, 0x0EA3 ], 0x0EA5, [ 0x0EA7, 0x0EB0 ], 0x0EB2, 0x0EB3, 0x0EBD, [ 0x0EC0, 0x0EC4 ], 0x0EC6, [ 0x0ED0, 0x0ED9 ], [ 0x0EDC, 0x0EDF ], [ 0x0F00, 0x0F17 ], [ 0x0F1A, 0x0F34 ], 0x0F36, 0x0F38, [ 0x0F3E, 0x0F47 ], [ 0x0F49, 0x0F6C ], 0x0F7F, 0x0F85, [ 0x0F88, 0x0F8C ], [ 0x0FBE, 0x0FC5 ], [ 0x0FC7, 0x0FCC ], [ 0x0FCE, 0x0FDA ], [ 0x1000, 0x102C ], 0x1031, 0x1038, 0x103B, 0x103C, [ 0x103F, 0x1057 ], [ 0x105A, 0x105D ], [ 0x1061, 0x1070 ], [ 0x1075, 0x1081 ], 0x1083, 0x1084, [ 0x1087, 0x108C ], [ 0x108E, 0x109C ], [ 0x109E, 0x10C5 ], 0x10C7, 0x10CD, [ 0x10D0, 0x1248 ], [ 0x124A, 0x124D ], [ 0x1250, 0x1256 ], 0x1258, [ 0x125A, 0x125D ], [ 0x1260, 0x1288 ], [ 0x128A, 0x128D ], [ 0x1290, 0x12B0 ], [ 0x12B2, 0x12B5 ], [ 0x12B8, 0x12BE ], 0x12C0, [ 0x12C2, 0x12C5 ], [ 0x12C8, 0x12D6 ], [ 0x12D8, 0x1310 ], [ 0x1312, 0x1315 ], [ 0x1318, 0x135A ], [ 0x1360, 0x137C ], [ 0x1380, 0x138F ], [ 0x13A0, 0x13F5 ], [ 0x13F8, 0x13FD ], [ 0x1401, 0x167F ], [ 0x1681, 0x169A ], [ 0x16A0, 0x16F8 ], [ 0x1700, 0x1711 ], 0x1715, [ 0x171F, 0x1731 ], [ 0x1734, 0x1736 ], [ 0x1740, 0x1751 ], [ 0x1760, 0x176C ], [ 0x176E, 0x1770 ], [ 0x1780, 0x17B3 ], 0x17B6, [ 0x17BE, 0x17C5 ], 0x17C7, 0x17C8, [ 0x17D4, 0x17DA ], 0x17DC, [ 0x17E0, 0x17E9 ], [ 0x1810, 0x1819 ], [ 0x1820, 0x1878 ], [ 0x1880, 0x1884 ], [ 0x1887, 0x18A8 ], 0x18AA, [ 0x18B0, 0x18F5 ], [ 0x1900, 0x191E ], [ 0x1923, 0x1926 ], [ 0x1929, 0x192B ], 0x1930, 0x1931, [ 0x1933, 0x1938 ], [ 0x1946, 0x196D ], [ 0x1970, 0x1974 ], [ 0x1980, 0x19AB ], [ 0x19B0, 0x19C9 ], [ 0x19D0, 0x19DA ], [ 0x1A00, 0x1A16 ], 0x1A19, 0x1A1A, [ 0x1A1E, 0x1A55 ], 0x1A57, 0x1A61, 0x1A63, 0x1A64, [ 0x1A6D, 0x1A72 ], [ 0x1A80, 0x1A89 ], [ 0x1A90, 0x1A99 ], [ 0x1AA0, 0x1AAD ], [ 0x1B04, 0x1B33 ], 0x1B35, 0x1B3B, [ 0x1B3D, 0x1B41 ], [ 0x1B43, 0x1B4C ], [ 0x1B50, 0x1B6A ], [ 0x1B74, 0x1B7E ], [ 0x1B82, 0x1BA1 ], 0x1BA6, 0x1BA7, 0x1BAA, [ 0x1BAE, 0x1BE5 ], 0x1BE7, [ 0x1BEA, 0x1BEC ], 0x1BEE, 0x1BF2, 0x1BF3, [ 0x1BFC, 0x1C2B ], 0x1C34, 0x1C35, [ 0x1C3B, 0x1C49 ], [ 0x1C4D, 0x1C88 ], [ 0x1C90, 0x1CBA ], [ 0x1CBD, 0x1CC7 ], 0x1CD3, 0x1CE1, [ 0x1CE9, 0x1CEC ], [ 0x1CEE, 0x1CF3 ], [ 0x1CF5, 0x1CF7 ], 0x1CFA, [ 0x1D00, 0x1DBF ], [ 0x1E00, 0x1F15 ], [ 0x1F18, 0x1F1D ], [ 0x1F20, 0x1F45 ], [ 0x1F48, 0x1F4D ], [ 0x1F50, 0x1F57 ], 0x1F59, 0x1F5B, 0x1F5D, [ 0x1F5F, 0x1F7D ], [ 0x1F80, 0x1FB4 ], [ 0x1FB6, 0x1FBC ], 0x1FBE, [ 0x1FC2, 0x1FC4 ], [ 0x1FC6, 0x1FCC ], [ 0x1FD0, 0x1FD3 ], [ 0x1FD6, 0x1FDB ], [ 0x1FE0, 0x1FEC ], [ 0x1FF2, 0x1FF4 ], [ 0x1FF6, 0x1FFC ], 0x200E, 0x2071, 0x207F, [ 0x2090, 0x209C ], 0x2102, 0x2107, [ 0x210A, 0x2113 ], 0x2115, [ 0x2119, 0x211D ], 0x2124, 0x2126, 0x2128, [ 0x212A, 0x212D ], [ 0x212F, 0x2139 ], [ 0x213C, 0x213F ], [ 0x2145, 0x2149 ], 0x214E, 0x214F, [ 0x2160, 0x2188 ], [ 0x2336, 0x237A ], 0x2395, [ 0x249C, 0x24E9 ], 0x26AC, [ 0x2800, 0x28FF ], [ 0x2C00, 0x2CE4 ], [ 0x2CEB, 0x2CEE ], 0x2CF2, 0x2CF3, [ 0x2D00, 0x2D25 ], 0x2D27, 0x2D2D, [ 0x2D30, 0x2D67 ], 0x2D6F, 0x2D70, [ 0x2D80, 0x2D96 ], [ 0x2DA0, 0x2DA6 ], [ 0x2DA8, 0x2DAE ], [ 0x2DB0, 0x2DB6 ], [ 0x2DB8, 0x2DBE ], [ 0x2DC0, 0x2DC6 ], [ 0x2DC8, 0x2DCE ], [ 0x2DD0, 0x2DD6 ], [ 0x2DD8, 0x2DDE ], [ 0x3005, 0x3007 ], [ 0x3021, 0x3029 ], 0x302E, 0x302F, [ 0x3031, 0x3035 ], [ 0x3038, 0x303C ], [ 0x3041, 0x3096 ], [ 0x309D, 0x309F ], [ 0x30A1, 0x30FA ], [ 0x30FC, 0x30FF ], [ 0x3105, 0x312F ], [ 0x3131, 0x318E ], [ 0x3190, 0x31BF ], [ 0x31F0, 0x321C ], [ 0x3220, 0x324F ], [ 0x3260, 0x327B ], [ 0x327F, 0x32B0 ], [ 0x32C0, 0x32CB ], [ 0x32D0, 0x3376 ], [ 0x337B, 0x33DD ], [ 0x33E0, 0x33FE ], [ 0x3400, 0x4DBF ], [ 0x4E00, 0xA48C ], [ 0xA4D0, 0xA60C ], [ 0xA610, 0xA62B ], [ 0xA640, 0xA66E ], [ 0xA680, 0xA69D ], [ 0xA6A0, 0xA6EF ], [ 0xA6F2, 0xA6F7 ], [ 0xA722, 0xA787 ], [ 0xA789, 0xA7CA ], 0xA7D0, 0xA7D1, 0xA7D3, [ 0xA7D5, 0xA7D9 ], [ 0xA7F2, 0xA801 ], [ 0xA803, 0xA805 ], [ 0xA807, 0xA80A ], [ 0xA80C, 0xA824 ], 0xA827, [ 0xA830, 0xA837 ], [ 0xA840, 0xA873 ], [ 0xA880, 0xA8C3 ], [ 0xA8CE, 0xA8D9 ], [ 0xA8F2, 0xA8FE ], [ 0xA900, 0xA925 ], [ 0xA92E, 0xA946 ], 0xA952, 0xA953, [ 0xA95F, 0xA97C ], [ 0xA983, 0xA9B2 ], 0xA9B4, 0xA9B5, 0xA9BA, 0xA9BB, [ 0xA9BE, 0xA9CD ], [ 0xA9CF, 0xA9D9 ], [ 0xA9DE, 0xA9E4 ], [ 0xA9E6, 0xA9FE ], [ 0xAA00, 0xAA28 ], 0xAA2F, 0xAA30, 0xAA33, 0xAA34, [ 0xAA40, 0xAA42 ], [ 0xAA44, 0xAA4B ], 0xAA4D, [ 0xAA50, 0xAA59 ], [ 0xAA5C, 0xAA7B ], [ 0xAA7D, 0xAAAF ], 0xAAB1, 0xAAB5, 0xAAB6, [ 0xAAB9, 0xAABD ], 0xAAC0, 0xAAC2, [ 0xAADB, 0xAAEB ], [ 0xAAEE, 0xAAF5 ], [ 0xAB01, 0xAB06 ], [ 0xAB09, 0xAB0E ], [ 0xAB11, 0xAB16 ], [ 0xAB20, 0xAB26 ], [ 0xAB28, 0xAB2E ], [ 0xAB30, 0xAB69 ], [ 0xAB70, 0xABE4 ], 0xABE6, 0xABE7, [ 0xABE9, 0xABEC ], [ 0xABF0, 0xABF9 ], [ 0xAC00, 0xD7A3 ], [ 0xD7B0, 0xD7C6 ], [ 0xD7CB, 0xD7FB ], [ 0xE000, 0xFA6D ], [ 0xFA70, 0xFAD9 ], [ 0xFB00, 0xFB06 ], [ 0xFB13, 0xFB17 ], [ 0xFF21, 0xFF3A ], [ 0xFF41, 0xFF5A ], [ 0xFF66, 0xFFBE ], [ 0xFFC2, 0xFFC7 ], [ 0xFFCA, 0xFFCF ], [ 0xFFD2, 0xFFD7 ], [ 0xFFDA, 0xFFDC ], [ 0x10000, 0x1000B ], [ 0x1000D, 0x10026 ], [ 0x10028, 0x1003A ], 0x1003C, 0x1003D, [ 0x1003F, 0x1004D ], [ 0x10050, 0x1005D ], [ 0x10080, 0x100FA ], 0x10100, 0x10102, [ 0x10107, 0x10133 ], [ 0x10137, 0x1013F ], 0x1018D, 0x1018E, [ 0x101D0, 0x101FC ], [ 0x10280, 0x1029C ], [ 0x102A0, 0x102D0 ], [ 0x10300, 0x10323 ], [ 0x1032D, 0x1034A ], [ 0x10350, 0x10375 ], [ 0x10380, 0x1039D ], [ 0x1039F, 0x103C3 ], [ 0x103C8, 0x103D5 ], [ 0x10400, 0x1049D ], [ 0x104A0, 0x104A9 ], [ 0x104B0, 0x104D3 ], [ 0x104D8, 0x104FB ], [ 0x10500, 0x10527 ], [ 0x10530, 0x10563 ], [ 0x1056F, 0x1057A ], [ 0x1057C, 0x1058A ], [ 0x1058C, 0x10592 ], 0x10594, 0x10595, [ 0x10597, 0x105A1 ], [ 0x105A3, 0x105B1 ], [ 0x105B3, 0x105B9 ], 0x105BB, 0x105BC, [ 0x10600, 0x10736 ], [ 0x10740, 0x10755 ], [ 0x10760, 0x10767 ], [ 0x10780, 0x10785 ], [ 0x10787, 0x107B0 ], [ 0x107B2, 0x107BA ], 0x11000, [ 0x11002, 0x11037 ], [ 0x11047, 0x1104D ], [ 0x11066, 0x1106F ], 0x11071, 0x11072, 0x11075, [ 0x11082, 0x110B2 ], 0x110B7, 0x110B8, [ 0x110BB, 0x110C1 ], 0x110CD, [ 0x110D0, 0x110E8 ], [ 0x110F0, 0x110F9 ], [ 0x11103, 0x11126 ], 0x1112C, [ 0x11136, 0x11147 ], [ 0x11150, 0x11172 ], [ 0x11174, 0x11176 ], [ 0x11182, 0x111B5 ], [ 0x111BF, 0x111C8 ], 0x111CD, 0x111CE, [ 0x111D0, 0x111DF ], [ 0x111E1, 0x111F4 ], [ 0x11200, 0x11211 ], [ 0x11213, 0x1122E ], 0x11232, 0x11233, 0x11235, [ 0x11238, 0x1123D ], 0x1123F, 0x11240, [ 0x11280, 0x11286 ], 0x11288, [ 0x1128A, 0x1128D ], [ 0x1128F, 0x1129D ], [ 0x1129F, 0x112A9 ], [ 0x112B0, 0x112DE ], [ 0x112E0, 0x112E2 ], [ 0x112F0, 0x112F9 ], 0x11302, 0x11303, [ 0x11305, 0x1130C ], 0x1130F, 0x11310, [ 0x11313, 0x11328 ], [ 0x1132A, 0x11330 ], 0x11332, 0x11333, [ 0x11335, 0x11339 ], [ 0x1133D, 0x1133F ], [ 0x11341, 0x11344 ], 0x11347, 0x11348, [ 0x1134B, 0x1134D ], 0x11350, 0x11357, [ 0x1135D, 0x11363 ], [ 0x11400, 0x11437 ], 0x11440, 0x11441, 0x11445, [ 0x11447, 0x1145B ], 0x1145D, [ 0x1145F, 0x11461 ], [ 0x11480, 0x114B2 ], 0x114B9, [ 0x114BB, 0x114BE ], 0x114C1, [ 0x114C4, 0x114C7 ], [ 0x114D0, 0x114D9 ], [ 0x11580, 0x115B1 ], [ 0x115B8, 0x115BB ], 0x115BE, [ 0x115C1, 0x115DB ], [ 0x11600, 0x11632 ], 0x1163B, 0x1163C, 0x1163E, [ 0x11641, 0x11644 ], [ 0x11650, 0x11659 ], [ 0x11680, 0x116AA ], 0x116AC, 0x116AE, 0x116AF, 0x116B6, 0x116B8, 0x116B9, [ 0x116C0, 0x116C9 ], [ 0x11700, 0x1171A ], 0x11720, 0x11721, 0x11726, [ 0x11730, 0x11746 ], [ 0x11800, 0x1182E ], 0x11838, 0x1183B, [ 0x118A0, 0x118F2 ], [ 0x118FF, 0x11906 ], 0x11909, [ 0x1190C, 0x11913 ], 0x11915, 0x11916, [ 0x11918, 0x11935 ], 0x11937, 0x11938, 0x1193D, [ 0x1193F, 0x11942 ], [ 0x11944, 0x11946 ], [ 0x11950, 0x11959 ], [ 0x119A0, 0x119A7 ], [ 0x119AA, 0x119D3 ], [ 0x119DC, 0x119DF ], [ 0x119E1, 0x119E4 ], 0x11A00, 0x11A07, 0x11A08, [ 0x11A0B, 0x11A32 ], 0x11A39, 0x11A3A, [ 0x11A3F, 0x11A46 ], 0x11A50, 0x11A57, 0x11A58, [ 0x11A5C, 0x11A89 ], 0x11A97, [ 0x11A9A, 0x11AA2 ], [ 0x11AB0, 0x11AF8 ], [ 0x11B00, 0x11B09 ], [ 0x11C00, 0x11C08 ], [ 0x11C0A, 0x11C2F ], [ 0x11C3E, 0x11C45 ], [ 0x11C50, 0x11C6C ], [ 0x11C70, 0x11C8F ], 0x11CA9, 0x11CB1, 0x11CB4, [ 0x11D00, 0x11D06 ], 0x11D08, 0x11D09, [ 0x11D0B, 0x11D30 ], 0x11D46, [ 0x11D50, 0x11D59 ], [ 0x11D60, 0x11D65 ], 0x11D67, 0x11D68, [ 0x11D6A, 0x11D8E ], 0x11D93, 0x11D94, 0x11D96, 0x11D98, [ 0x11DA0, 0x11DA9 ], [ 0x11EE0, 0x11EF2 ], [ 0x11EF5, 0x11EF8 ], [ 0x11F02, 0x11F10 ], [ 0x11F12, 0x11F35 ], 0x11F3E, 0x11F3F, 0x11F41, [ 0x11F43, 0x11F59 ], 0x11FB0, [ 0x11FC0, 0x11FD4 ], [ 0x11FFF, 0x12399 ], [ 0x12400, 0x1246E ], [ 0x12470, 0x12474 ], [ 0x12480, 0x12543 ], [ 0x12F90, 0x12FF2 ], [ 0x13000, 0x1343F ], [ 0x13441, 0x13446 ], [ 0x14400, 0x14646 ], [ 0x16800, 0x16A38 ], [ 0x16A40, 0x16A5E ], [ 0x16A60, 0x16A69 ], [ 0x16A6E, 0x16ABE ], [ 0x16AC0, 0x16AC9 ], [ 0x16AD0, 0x16AED ], 0x16AF5, [ 0x16B00, 0x16B2F ], [ 0x16B37, 0x16B45 ], [ 0x16B50, 0x16B59 ], [ 0x16B5B, 0x16B61 ], [ 0x16B63, 0x16B77 ], [ 0x16B7D, 0x16B8F ], [ 0x16E40, 0x16E9A ], [ 0x16F00, 0x16F4A ], [ 0x16F50, 0x16F87 ], [ 0x16F93, 0x16F9F ], 0x16FE0, 0x16FE1, 0x16FE3, 0x16FF0, 0x16FF1, [ 0x17000, 0x187F7 ], [ 0x18800, 0x18CD5 ], [ 0x18D00, 0x18D08 ], [ 0x1AFF0, 0x1AFF3 ], [ 0x1AFF5, 0x1AFFB ], 0x1AFFD, 0x1AFFE, [ 0x1B000, 0x1B122 ], 0x1B132, [ 0x1B150, 0x1B152 ], 0x1B155, [ 0x1B164, 0x1B167 ], [ 0x1B170, 0x1B2FB ], [ 0x1BC00, 0x1BC6A ], [ 0x1BC70, 0x1BC7C ], [ 0x1BC80, 0x1BC88 ], [ 0x1BC90, 0x1BC99 ], 0x1BC9C, 0x1BC9F, [ 0x1CF50, 0x1CFC3 ], [ 0x1D000, 0x1D0F5 ], [ 0x1D100, 0x1D126 ], [ 0x1D129, 0x1D166 ], [ 0x1D16A, 0x1D172 ], 0x1D183, 0x1D184, [ 0x1D18C, 0x1D1A9 ], [ 0x1D1AE, 0x1D1E8 ], [ 0x1D2C0, 0x1D2D3 ], [ 0x1D2E0, 0x1D2F3 ], [ 0x1D360, 0x1D378 ], [ 0x1D400, 0x1D454 ], [ 0x1D456, 0x1D49C ], 0x1D49E, 0x1D49F, 0x1D4A2, 0x1D4A5, 0x1D4A6, [ 0x1D4A9, 0x1D4AC ], [ 0x1D4AE, 0x1D4B9 ], 0x1D4BB, [ 0x1D4BD, 0x1D4C3 ], [ 0x1D4C5, 0x1D505 ], [ 0x1D507, 0x1D50A ], [ 0x1D50D, 0x1D514 ], [ 0x1D516, 0x1D51C ], [ 0x1D51E, 0x1D539 ], [ 0x1D53B, 0x1D53E ], [ 0x1D540, 0x1D544 ], 0x1D546, [ 0x1D54A, 0x1D550 ], [ 0x1D552, 0x1D6A5 ], [ 0x1D6A8, 0x1D6DA ], [ 0x1D6DC, 0x1D714 ], [ 0x1D716, 0x1D74E ], [ 0x1D750, 0x1D788 ], [ 0x1D78A, 0x1D7C2 ], [ 0x1D7C4, 0x1D7CB ], [ 0x1D800, 0x1D9FF ], [ 0x1DA37, 0x1DA3A ], [ 0x1DA6D, 0x1DA74 ], [ 0x1DA76, 0x1DA83 ], [ 0x1DA85, 0x1DA8B ], [ 0x1DF00, 0x1DF1E ], [ 0x1DF25, 0x1DF2A ], [ 0x1E030, 0x1E06D ], [ 0x1E100, 0x1E12C ], [ 0x1E137, 0x1E13D ], [ 0x1E140, 0x1E149 ], 0x1E14E, 0x1E14F, [ 0x1E290, 0x1E2AD ], [ 0x1E2C0, 0x1E2EB ], [ 0x1E2F0, 0x1E2F9 ], [ 0x1E4D0, 0x1E4EB ], [ 0x1E4F0, 0x1E4F9 ], [ 0x1E7E0, 0x1E7E6 ], [ 0x1E7E8, 0x1E7EB ], 0x1E7ED, 0x1E7EE, [ 0x1E7F0, 0x1E7FE ], [ 0x1F110, 0x1F12E ], [ 0x1F130, 0x1F169 ], [ 0x1F170, 0x1F1AC ], [ 0x1F1E6, 0x1F202 ], [ 0x1F210, 0x1F23B ], [ 0x1F240, 0x1F248 ], 0x1F250, 0x1F251, [ 0x20000, 0x2A6DF ], [ 0x2A700, 0x2B739 ], [ 0x2B740, 0x2B81D ], [ 0x2B820, 0x2CEA1 ], [ 0x2CEB0, 0x2EBE0 ], [ 0x2F800, 0x2FA1D ], [ 0x30000, 0x3134A ], [ 0x31350, 0x323AF ], [ 0xF0000, 0xFFFFD ], [ 0x100000, 0x10FFFD ] ], 6 | R: [ 0x05BE, 0x05C0, 0x05C3, 0x05C6, [ 0x05D0, 0x05EA ], [ 0x05EF, 0x05F4 ], [ 0x07C0, 0x07EA ], 0x07F4, 0x07F5, 0x07FA, [ 0x07FE, 0x0815 ], 0x081A, 0x0824, 0x0828, [ 0x0830, 0x083E ], [ 0x0840, 0x0858 ], 0x085E, 0x200F, 0xFB1D, [ 0xFB1F, 0xFB28 ], [ 0xFB2A, 0xFB36 ], [ 0xFB38, 0xFB3C ], 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44, [ 0xFB46, 0xFB4F ], [ 0x10800, 0x10805 ], 0x10808, [ 0x1080A, 0x10835 ], 0x10837, 0x10838, 0x1083C, [ 0x1083F, 0x10855 ], [ 0x10857, 0x1089E ], [ 0x108A7, 0x108AF ], [ 0x108E0, 0x108F2 ], 0x108F4, 0x108F5, [ 0x108FB, 0x1091B ], [ 0x10920, 0x10939 ], 0x1093F, [ 0x10980, 0x109B7 ], [ 0x109BC, 0x109CF ], [ 0x109D2, 0x10A00 ], [ 0x10A10, 0x10A13 ], [ 0x10A15, 0x10A17 ], [ 0x10A19, 0x10A35 ], [ 0x10A40, 0x10A48 ], [ 0x10A50, 0x10A58 ], [ 0x10A60, 0x10A9F ], [ 0x10AC0, 0x10AE4 ], [ 0x10AEB, 0x10AF6 ], [ 0x10B00, 0x10B35 ], [ 0x10B40, 0x10B55 ], [ 0x10B58, 0x10B72 ], [ 0x10B78, 0x10B91 ], [ 0x10B99, 0x10B9C ], [ 0x10BA9, 0x10BAF ], [ 0x10C00, 0x10C48 ], [ 0x10C80, 0x10CB2 ], [ 0x10CC0, 0x10CF2 ], [ 0x10CFA, 0x10CFF ], [ 0x10E80, 0x10EA9 ], 0x10EAD, 0x10EB0, 0x10EB1, [ 0x10F00, 0x10F27 ], [ 0x10F70, 0x10F81 ], [ 0x10F86, 0x10F89 ], [ 0x10FB0, 0x10FCB ], [ 0x10FE0, 0x10FF6 ], [ 0x1E800, 0x1E8C4 ], [ 0x1E8C7, 0x1E8CF ], [ 0x1E900, 0x1E943 ], 0x1E94B, [ 0x1E950, 0x1E959 ], 0x1E95E, 0x1E95F ], 7 | AL: [ 0x0608, 0x060B, 0x060D, [ 0x061B, 0x064A ], [ 0x066D, 0x066F ], [ 0x0671, 0x06D5 ], 0x06E5, 0x06E6, 0x06EE, 0x06EF, [ 0x06FA, 0x070D ], 0x070F, 0x0710, [ 0x0712, 0x072F ], [ 0x074D, 0x07A5 ], 0x07B1, [ 0x0860, 0x086A ], [ 0x0870, 0x088E ], [ 0x08A0, 0x08C9 ], [ 0xFB50, 0xFBC2 ], [ 0xFBD3, 0xFD3D ], [ 0xFD50, 0xFD8F ], [ 0xFD92, 0xFDC7 ], [ 0xFDF0, 0xFDFC ], [ 0xFE70, 0xFE74 ], [ 0xFE76, 0xFEFC ], [ 0x10D00, 0x10D23 ], [ 0x10F30, 0x10F45 ], [ 0x10F51, 0x10F59 ], [ 0x1EC71, 0x1ECB4 ], [ 0x1ED01, 0x1ED3D ], [ 0x1EE00, 0x1EE03 ], [ 0x1EE05, 0x1EE1F ], 0x1EE21, 0x1EE22, 0x1EE24, 0x1EE27, [ 0x1EE29, 0x1EE32 ], [ 0x1EE34, 0x1EE37 ], 0x1EE39, 0x1EE3B, 0x1EE42, 0x1EE47, 0x1EE49, 0x1EE4B, [ 0x1EE4D, 0x1EE4F ], 0x1EE51, 0x1EE52, 0x1EE54, 0x1EE57, 0x1EE59, 0x1EE5B, 0x1EE5D, 0x1EE5F, 0x1EE61, 0x1EE62, 0x1EE64, [ 0x1EE67, 0x1EE6A ], [ 0x1EE6C, 0x1EE72 ], [ 0x1EE74, 0x1EE77 ], [ 0x1EE79, 0x1EE7C ], 0x1EE7E, [ 0x1EE80, 0x1EE89 ], [ 0x1EE8B, 0x1EE9B ], [ 0x1EEA1, 0x1EEA3 ], [ 0x1EEA5, 0x1EEA9 ], [ 0x1EEAB, 0x1EEBB ] ] 8 | }; 9 | -------------------------------------------------------------------------------- /src/generated/unicodejs.derivedcoreproperties.js: -------------------------------------------------------------------------------- 1 | // This file is GENERATED by tools/unicodejs-properties.js 2 | // DO NOT EDIT 3 | unicodeJS.derivedcoreproperties = { 4 | // partial extraction only 5 | Alphabetic: [ [ 0x0041, 0x005A ], [ 0x0061, 0x007A ], 0x00AA, 0x00B5, 0x00BA, [ 0x00C0, 0x00D6 ], [ 0x00D8, 0x00F6 ], [ 0x00F8, 0x02C1 ], [ 0x02C6, 0x02D1 ], [ 0x02E0, 0x02E4 ], 0x02EC, 0x02EE, 0x0345, [ 0x0370, 0x0374 ], 0x0376, 0x0377, [ 0x037A, 0x037D ], 0x037F, 0x0386, [ 0x0388, 0x038A ], 0x038C, [ 0x038E, 0x03A1 ], [ 0x03A3, 0x03F5 ], [ 0x03F7, 0x0481 ], [ 0x048A, 0x052F ], [ 0x0531, 0x0556 ], 0x0559, [ 0x0560, 0x0588 ], [ 0x05B0, 0x05BD ], 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x05C5, 0x05C7, [ 0x05D0, 0x05EA ], [ 0x05EF, 0x05F2 ], [ 0x0610, 0x061A ], [ 0x0620, 0x0657 ], [ 0x0659, 0x065F ], [ 0x066E, 0x06D3 ], [ 0x06D5, 0x06DC ], [ 0x06E1, 0x06E8 ], [ 0x06ED, 0x06EF ], [ 0x06FA, 0x06FC ], 0x06FF, [ 0x0710, 0x073F ], [ 0x074D, 0x07B1 ], [ 0x07CA, 0x07EA ], 0x07F4, 0x07F5, 0x07FA, [ 0x0800, 0x0817 ], [ 0x081A, 0x082C ], [ 0x0840, 0x0858 ], [ 0x0860, 0x086A ], [ 0x0870, 0x0887 ], [ 0x0889, 0x088E ], [ 0x08A0, 0x08C9 ], [ 0x08D4, 0x08DF ], [ 0x08E3, 0x08E9 ], [ 0x08F0, 0x093B ], [ 0x093D, 0x094C ], [ 0x094E, 0x0950 ], [ 0x0955, 0x0963 ], [ 0x0971, 0x0983 ], [ 0x0985, 0x098C ], 0x098F, 0x0990, [ 0x0993, 0x09A8 ], [ 0x09AA, 0x09B0 ], 0x09B2, [ 0x09B6, 0x09B9 ], [ 0x09BD, 0x09C4 ], 0x09C7, 0x09C8, 0x09CB, 0x09CC, 0x09CE, 0x09D7, 0x09DC, 0x09DD, [ 0x09DF, 0x09E3 ], 0x09F0, 0x09F1, 0x09FC, [ 0x0A01, 0x0A03 ], [ 0x0A05, 0x0A0A ], 0x0A0F, 0x0A10, [ 0x0A13, 0x0A28 ], [ 0x0A2A, 0x0A30 ], 0x0A32, 0x0A33, 0x0A35, 0x0A36, 0x0A38, 0x0A39, [ 0x0A3E, 0x0A42 ], 0x0A47, 0x0A48, 0x0A4B, 0x0A4C, 0x0A51, [ 0x0A59, 0x0A5C ], 0x0A5E, [ 0x0A70, 0x0A75 ], [ 0x0A81, 0x0A83 ], [ 0x0A85, 0x0A8D ], [ 0x0A8F, 0x0A91 ], [ 0x0A93, 0x0AA8 ], [ 0x0AAA, 0x0AB0 ], 0x0AB2, 0x0AB3, [ 0x0AB5, 0x0AB9 ], [ 0x0ABD, 0x0AC5 ], [ 0x0AC7, 0x0AC9 ], 0x0ACB, 0x0ACC, 0x0AD0, [ 0x0AE0, 0x0AE3 ], [ 0x0AF9, 0x0AFC ], [ 0x0B01, 0x0B03 ], [ 0x0B05, 0x0B0C ], 0x0B0F, 0x0B10, [ 0x0B13, 0x0B28 ], [ 0x0B2A, 0x0B30 ], 0x0B32, 0x0B33, [ 0x0B35, 0x0B39 ], [ 0x0B3D, 0x0B44 ], 0x0B47, 0x0B48, 0x0B4B, 0x0B4C, 0x0B56, 0x0B57, 0x0B5C, 0x0B5D, [ 0x0B5F, 0x0B63 ], 0x0B71, 0x0B82, 0x0B83, [ 0x0B85, 0x0B8A ], [ 0x0B8E, 0x0B90 ], [ 0x0B92, 0x0B95 ], 0x0B99, 0x0B9A, 0x0B9C, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, [ 0x0BA8, 0x0BAA ], [ 0x0BAE, 0x0BB9 ], [ 0x0BBE, 0x0BC2 ], [ 0x0BC6, 0x0BC8 ], [ 0x0BCA, 0x0BCC ], 0x0BD0, 0x0BD7, [ 0x0C00, 0x0C0C ], [ 0x0C0E, 0x0C10 ], [ 0x0C12, 0x0C28 ], [ 0x0C2A, 0x0C39 ], [ 0x0C3D, 0x0C44 ], [ 0x0C46, 0x0C48 ], [ 0x0C4A, 0x0C4C ], 0x0C55, 0x0C56, [ 0x0C58, 0x0C5A ], 0x0C5D, [ 0x0C60, 0x0C63 ], [ 0x0C80, 0x0C83 ], [ 0x0C85, 0x0C8C ], [ 0x0C8E, 0x0C90 ], [ 0x0C92, 0x0CA8 ], [ 0x0CAA, 0x0CB3 ], [ 0x0CB5, 0x0CB9 ], [ 0x0CBD, 0x0CC4 ], [ 0x0CC6, 0x0CC8 ], [ 0x0CCA, 0x0CCC ], 0x0CD5, 0x0CD6, 0x0CDD, 0x0CDE, [ 0x0CE0, 0x0CE3 ], [ 0x0CF1, 0x0CF3 ], [ 0x0D00, 0x0D0C ], [ 0x0D0E, 0x0D10 ], [ 0x0D12, 0x0D3A ], [ 0x0D3D, 0x0D44 ], [ 0x0D46, 0x0D48 ], [ 0x0D4A, 0x0D4C ], 0x0D4E, [ 0x0D54, 0x0D57 ], [ 0x0D5F, 0x0D63 ], [ 0x0D7A, 0x0D7F ], [ 0x0D81, 0x0D83 ], [ 0x0D85, 0x0D96 ], [ 0x0D9A, 0x0DB1 ], [ 0x0DB3, 0x0DBB ], 0x0DBD, [ 0x0DC0, 0x0DC6 ], [ 0x0DCF, 0x0DD4 ], 0x0DD6, [ 0x0DD8, 0x0DDF ], 0x0DF2, 0x0DF3, [ 0x0E01, 0x0E3A ], [ 0x0E40, 0x0E46 ], 0x0E4D, 0x0E81, 0x0E82, 0x0E84, [ 0x0E86, 0x0E8A ], [ 0x0E8C, 0x0EA3 ], 0x0EA5, [ 0x0EA7, 0x0EB9 ], [ 0x0EBB, 0x0EBD ], [ 0x0EC0, 0x0EC4 ], 0x0EC6, 0x0ECD, [ 0x0EDC, 0x0EDF ], 0x0F00, [ 0x0F40, 0x0F47 ], [ 0x0F49, 0x0F6C ], [ 0x0F71, 0x0F83 ], [ 0x0F88, 0x0F97 ], [ 0x0F99, 0x0FBC ], [ 0x1000, 0x1036 ], 0x1038, [ 0x103B, 0x103F ], [ 0x1050, 0x108F ], [ 0x109A, 0x109D ], [ 0x10A0, 0x10C5 ], 0x10C7, 0x10CD, [ 0x10D0, 0x10FA ], [ 0x10FC, 0x1248 ], [ 0x124A, 0x124D ], [ 0x1250, 0x1256 ], 0x1258, [ 0x125A, 0x125D ], [ 0x1260, 0x1288 ], [ 0x128A, 0x128D ], [ 0x1290, 0x12B0 ], [ 0x12B2, 0x12B5 ], [ 0x12B8, 0x12BE ], 0x12C0, [ 0x12C2, 0x12C5 ], [ 0x12C8, 0x12D6 ], [ 0x12D8, 0x1310 ], [ 0x1312, 0x1315 ], [ 0x1318, 0x135A ], [ 0x1380, 0x138F ], [ 0x13A0, 0x13F5 ], [ 0x13F8, 0x13FD ], [ 0x1401, 0x166C ], [ 0x166F, 0x167F ], [ 0x1681, 0x169A ], [ 0x16A0, 0x16EA ], [ 0x16EE, 0x16F8 ], [ 0x1700, 0x1713 ], [ 0x171F, 0x1733 ], [ 0x1740, 0x1753 ], [ 0x1760, 0x176C ], [ 0x176E, 0x1770 ], 0x1772, 0x1773, [ 0x1780, 0x17B3 ], [ 0x17B6, 0x17C8 ], 0x17D7, 0x17DC, [ 0x1820, 0x1878 ], [ 0x1880, 0x18AA ], [ 0x18B0, 0x18F5 ], [ 0x1900, 0x191E ], [ 0x1920, 0x192B ], [ 0x1930, 0x1938 ], [ 0x1950, 0x196D ], [ 0x1970, 0x1974 ], [ 0x1980, 0x19AB ], [ 0x19B0, 0x19C9 ], [ 0x1A00, 0x1A1B ], [ 0x1A20, 0x1A5E ], [ 0x1A61, 0x1A74 ], 0x1AA7, 0x1ABF, 0x1AC0, [ 0x1ACC, 0x1ACE ], [ 0x1B00, 0x1B33 ], [ 0x1B35, 0x1B43 ], [ 0x1B45, 0x1B4C ], [ 0x1B80, 0x1BA9 ], [ 0x1BAC, 0x1BAF ], [ 0x1BBA, 0x1BE5 ], [ 0x1BE7, 0x1BF1 ], [ 0x1C00, 0x1C36 ], [ 0x1C4D, 0x1C4F ], [ 0x1C5A, 0x1C7D ], [ 0x1C80, 0x1C88 ], [ 0x1C90, 0x1CBA ], [ 0x1CBD, 0x1CBF ], [ 0x1CE9, 0x1CEC ], [ 0x1CEE, 0x1CF3 ], 0x1CF5, 0x1CF6, 0x1CFA, [ 0x1D00, 0x1DBF ], [ 0x1DE7, 0x1DF4 ], [ 0x1E00, 0x1F15 ], [ 0x1F18, 0x1F1D ], [ 0x1F20, 0x1F45 ], [ 0x1F48, 0x1F4D ], [ 0x1F50, 0x1F57 ], 0x1F59, 0x1F5B, 0x1F5D, [ 0x1F5F, 0x1F7D ], [ 0x1F80, 0x1FB4 ], [ 0x1FB6, 0x1FBC ], 0x1FBE, [ 0x1FC2, 0x1FC4 ], [ 0x1FC6, 0x1FCC ], [ 0x1FD0, 0x1FD3 ], [ 0x1FD6, 0x1FDB ], [ 0x1FE0, 0x1FEC ], [ 0x1FF2, 0x1FF4 ], [ 0x1FF6, 0x1FFC ], 0x2071, 0x207F, [ 0x2090, 0x209C ], 0x2102, 0x2107, [ 0x210A, 0x2113 ], 0x2115, [ 0x2119, 0x211D ], 0x2124, 0x2126, 0x2128, [ 0x212A, 0x212D ], [ 0x212F, 0x2139 ], [ 0x213C, 0x213F ], [ 0x2145, 0x2149 ], 0x214E, [ 0x2160, 0x2188 ], [ 0x24B6, 0x24E9 ], [ 0x2C00, 0x2CE4 ], [ 0x2CEB, 0x2CEE ], 0x2CF2, 0x2CF3, [ 0x2D00, 0x2D25 ], 0x2D27, 0x2D2D, [ 0x2D30, 0x2D67 ], 0x2D6F, [ 0x2D80, 0x2D96 ], [ 0x2DA0, 0x2DA6 ], [ 0x2DA8, 0x2DAE ], [ 0x2DB0, 0x2DB6 ], [ 0x2DB8, 0x2DBE ], [ 0x2DC0, 0x2DC6 ], [ 0x2DC8, 0x2DCE ], [ 0x2DD0, 0x2DD6 ], [ 0x2DD8, 0x2DDE ], [ 0x2DE0, 0x2DFF ], 0x2E2F, [ 0x3005, 0x3007 ], [ 0x3021, 0x3029 ], [ 0x3031, 0x3035 ], [ 0x3038, 0x303C ], [ 0x3041, 0x3096 ], [ 0x309D, 0x309F ], [ 0x30A1, 0x30FA ], [ 0x30FC, 0x30FF ], [ 0x3105, 0x312F ], [ 0x3131, 0x318E ], [ 0x31A0, 0x31BF ], [ 0x31F0, 0x31FF ], [ 0x3400, 0x4DBF ], [ 0x4E00, 0xA48C ], [ 0xA4D0, 0xA4FD ], [ 0xA500, 0xA60C ], [ 0xA610, 0xA61F ], 0xA62A, 0xA62B, [ 0xA640, 0xA66E ], [ 0xA674, 0xA67B ], [ 0xA67F, 0xA6EF ], [ 0xA717, 0xA71F ], [ 0xA722, 0xA788 ], [ 0xA78B, 0xA7CA ], 0xA7D0, 0xA7D1, 0xA7D3, [ 0xA7D5, 0xA7D9 ], [ 0xA7F2, 0xA805 ], [ 0xA807, 0xA827 ], [ 0xA840, 0xA873 ], [ 0xA880, 0xA8C3 ], 0xA8C5, [ 0xA8F2, 0xA8F7 ], 0xA8FB, [ 0xA8FD, 0xA8FF ], [ 0xA90A, 0xA92A ], [ 0xA930, 0xA952 ], [ 0xA960, 0xA97C ], [ 0xA980, 0xA9B2 ], [ 0xA9B4, 0xA9BF ], 0xA9CF, [ 0xA9E0, 0xA9EF ], [ 0xA9FA, 0xA9FE ], [ 0xAA00, 0xAA36 ], [ 0xAA40, 0xAA4D ], [ 0xAA60, 0xAA76 ], [ 0xAA7A, 0xAABE ], 0xAAC0, 0xAAC2, [ 0xAADB, 0xAADD ], [ 0xAAE0, 0xAAEF ], [ 0xAAF2, 0xAAF5 ], [ 0xAB01, 0xAB06 ], [ 0xAB09, 0xAB0E ], [ 0xAB11, 0xAB16 ], [ 0xAB20, 0xAB26 ], [ 0xAB28, 0xAB2E ], [ 0xAB30, 0xAB5A ], [ 0xAB5C, 0xAB69 ], [ 0xAB70, 0xABEA ], [ 0xAC00, 0xD7A3 ], [ 0xD7B0, 0xD7C6 ], [ 0xD7CB, 0xD7FB ], [ 0xF900, 0xFA6D ], [ 0xFA70, 0xFAD9 ], [ 0xFB00, 0xFB06 ], [ 0xFB13, 0xFB17 ], [ 0xFB1D, 0xFB28 ], [ 0xFB2A, 0xFB36 ], [ 0xFB38, 0xFB3C ], 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44, [ 0xFB46, 0xFBB1 ], [ 0xFBD3, 0xFD3D ], [ 0xFD50, 0xFD8F ], [ 0xFD92, 0xFDC7 ], [ 0xFDF0, 0xFDFB ], [ 0xFE70, 0xFE74 ], [ 0xFE76, 0xFEFC ], [ 0xFF21, 0xFF3A ], [ 0xFF41, 0xFF5A ], [ 0xFF66, 0xFFBE ], [ 0xFFC2, 0xFFC7 ], [ 0xFFCA, 0xFFCF ], [ 0xFFD2, 0xFFD7 ], [ 0xFFDA, 0xFFDC ], [ 0x10000, 0x1000B ], [ 0x1000D, 0x10026 ], [ 0x10028, 0x1003A ], 0x1003C, 0x1003D, [ 0x1003F, 0x1004D ], [ 0x10050, 0x1005D ], [ 0x10080, 0x100FA ], [ 0x10140, 0x10174 ], [ 0x10280, 0x1029C ], [ 0x102A0, 0x102D0 ], [ 0x10300, 0x1031F ], [ 0x1032D, 0x1034A ], [ 0x10350, 0x1037A ], [ 0x10380, 0x1039D ], [ 0x103A0, 0x103C3 ], [ 0x103C8, 0x103CF ], [ 0x103D1, 0x103D5 ], [ 0x10400, 0x1049D ], [ 0x104B0, 0x104D3 ], [ 0x104D8, 0x104FB ], [ 0x10500, 0x10527 ], [ 0x10530, 0x10563 ], [ 0x10570, 0x1057A ], [ 0x1057C, 0x1058A ], [ 0x1058C, 0x10592 ], 0x10594, 0x10595, [ 0x10597, 0x105A1 ], [ 0x105A3, 0x105B1 ], [ 0x105B3, 0x105B9 ], 0x105BB, 0x105BC, [ 0x10600, 0x10736 ], [ 0x10740, 0x10755 ], [ 0x10760, 0x10767 ], [ 0x10780, 0x10785 ], [ 0x10787, 0x107B0 ], [ 0x107B2, 0x107BA ], [ 0x10800, 0x10805 ], 0x10808, [ 0x1080A, 0x10835 ], 0x10837, 0x10838, 0x1083C, [ 0x1083F, 0x10855 ], [ 0x10860, 0x10876 ], [ 0x10880, 0x1089E ], [ 0x108E0, 0x108F2 ], 0x108F4, 0x108F5, [ 0x10900, 0x10915 ], [ 0x10920, 0x10939 ], [ 0x10980, 0x109B7 ], 0x109BE, 0x109BF, [ 0x10A00, 0x10A03 ], 0x10A05, 0x10A06, [ 0x10A0C, 0x10A13 ], [ 0x10A15, 0x10A17 ], [ 0x10A19, 0x10A35 ], [ 0x10A60, 0x10A7C ], [ 0x10A80, 0x10A9C ], [ 0x10AC0, 0x10AC7 ], [ 0x10AC9, 0x10AE4 ], [ 0x10B00, 0x10B35 ], [ 0x10B40, 0x10B55 ], [ 0x10B60, 0x10B72 ], [ 0x10B80, 0x10B91 ], [ 0x10C00, 0x10C48 ], [ 0x10C80, 0x10CB2 ], [ 0x10CC0, 0x10CF2 ], [ 0x10D00, 0x10D27 ], [ 0x10E80, 0x10EA9 ], 0x10EAB, 0x10EAC, 0x10EB0, 0x10EB1, [ 0x10F00, 0x10F1C ], 0x10F27, [ 0x10F30, 0x10F45 ], [ 0x10F70, 0x10F81 ], [ 0x10FB0, 0x10FC4 ], [ 0x10FE0, 0x10FF6 ], [ 0x11000, 0x11045 ], [ 0x11071, 0x11075 ], [ 0x11080, 0x110B8 ], 0x110C2, [ 0x110D0, 0x110E8 ], [ 0x11100, 0x11132 ], [ 0x11144, 0x11147 ], [ 0x11150, 0x11172 ], 0x11176, [ 0x11180, 0x111BF ], [ 0x111C1, 0x111C4 ], 0x111CE, 0x111CF, 0x111DA, 0x111DC, [ 0x11200, 0x11211 ], [ 0x11213, 0x11234 ], 0x11237, [ 0x1123E, 0x11241 ], [ 0x11280, 0x11286 ], 0x11288, [ 0x1128A, 0x1128D ], [ 0x1128F, 0x1129D ], [ 0x1129F, 0x112A8 ], [ 0x112B0, 0x112E8 ], [ 0x11300, 0x11303 ], [ 0x11305, 0x1130C ], 0x1130F, 0x11310, [ 0x11313, 0x11328 ], [ 0x1132A, 0x11330 ], 0x11332, 0x11333, [ 0x11335, 0x11339 ], [ 0x1133D, 0x11344 ], 0x11347, 0x11348, 0x1134B, 0x1134C, 0x11350, 0x11357, [ 0x1135D, 0x11363 ], [ 0x11400, 0x11441 ], [ 0x11443, 0x11445 ], [ 0x11447, 0x1144A ], [ 0x1145F, 0x11461 ], [ 0x11480, 0x114C1 ], 0x114C4, 0x114C5, 0x114C7, [ 0x11580, 0x115B5 ], [ 0x115B8, 0x115BE ], [ 0x115D8, 0x115DD ], [ 0x11600, 0x1163E ], 0x11640, 0x11644, [ 0x11680, 0x116B5 ], 0x116B8, [ 0x11700, 0x1171A ], [ 0x1171D, 0x1172A ], [ 0x11740, 0x11746 ], [ 0x11800, 0x11838 ], [ 0x118A0, 0x118DF ], [ 0x118FF, 0x11906 ], 0x11909, [ 0x1190C, 0x11913 ], 0x11915, 0x11916, [ 0x11918, 0x11935 ], 0x11937, 0x11938, 0x1193B, 0x1193C, [ 0x1193F, 0x11942 ], [ 0x119A0, 0x119A7 ], [ 0x119AA, 0x119D7 ], [ 0x119DA, 0x119DF ], 0x119E1, 0x119E3, 0x119E4, [ 0x11A00, 0x11A32 ], [ 0x11A35, 0x11A3E ], [ 0x11A50, 0x11A97 ], 0x11A9D, [ 0x11AB0, 0x11AF8 ], [ 0x11C00, 0x11C08 ], [ 0x11C0A, 0x11C36 ], [ 0x11C38, 0x11C3E ], 0x11C40, [ 0x11C72, 0x11C8F ], [ 0x11C92, 0x11CA7 ], [ 0x11CA9, 0x11CB6 ], [ 0x11D00, 0x11D06 ], 0x11D08, 0x11D09, [ 0x11D0B, 0x11D36 ], 0x11D3A, 0x11D3C, 0x11D3D, [ 0x11D3F, 0x11D41 ], 0x11D43, 0x11D46, 0x11D47, [ 0x11D60, 0x11D65 ], 0x11D67, 0x11D68, [ 0x11D6A, 0x11D8E ], 0x11D90, 0x11D91, [ 0x11D93, 0x11D96 ], 0x11D98, [ 0x11EE0, 0x11EF6 ], [ 0x11F00, 0x11F10 ], [ 0x11F12, 0x11F3A ], [ 0x11F3E, 0x11F40 ], 0x11FB0, [ 0x12000, 0x12399 ], [ 0x12400, 0x1246E ], [ 0x12480, 0x12543 ], [ 0x12F90, 0x12FF0 ], [ 0x13000, 0x1342F ], [ 0x13441, 0x13446 ], [ 0x14400, 0x14646 ], [ 0x16800, 0x16A38 ], [ 0x16A40, 0x16A5E ], [ 0x16A70, 0x16ABE ], [ 0x16AD0, 0x16AED ], [ 0x16B00, 0x16B2F ], [ 0x16B40, 0x16B43 ], [ 0x16B63, 0x16B77 ], [ 0x16B7D, 0x16B8F ], [ 0x16E40, 0x16E7F ], [ 0x16F00, 0x16F4A ], [ 0x16F4F, 0x16F87 ], [ 0x16F8F, 0x16F9F ], 0x16FE0, 0x16FE1, 0x16FE3, 0x16FF0, 0x16FF1, [ 0x17000, 0x187F7 ], [ 0x18800, 0x18CD5 ], [ 0x18D00, 0x18D08 ], [ 0x1AFF0, 0x1AFF3 ], [ 0x1AFF5, 0x1AFFB ], 0x1AFFD, 0x1AFFE, [ 0x1B000, 0x1B122 ], 0x1B132, [ 0x1B150, 0x1B152 ], 0x1B155, [ 0x1B164, 0x1B167 ], [ 0x1B170, 0x1B2FB ], [ 0x1BC00, 0x1BC6A ], [ 0x1BC70, 0x1BC7C ], [ 0x1BC80, 0x1BC88 ], [ 0x1BC90, 0x1BC99 ], 0x1BC9E, [ 0x1D400, 0x1D454 ], [ 0x1D456, 0x1D49C ], 0x1D49E, 0x1D49F, 0x1D4A2, 0x1D4A5, 0x1D4A6, [ 0x1D4A9, 0x1D4AC ], [ 0x1D4AE, 0x1D4B9 ], 0x1D4BB, [ 0x1D4BD, 0x1D4C3 ], [ 0x1D4C5, 0x1D505 ], [ 0x1D507, 0x1D50A ], [ 0x1D50D, 0x1D514 ], [ 0x1D516, 0x1D51C ], [ 0x1D51E, 0x1D539 ], [ 0x1D53B, 0x1D53E ], [ 0x1D540, 0x1D544 ], 0x1D546, [ 0x1D54A, 0x1D550 ], [ 0x1D552, 0x1D6A5 ], [ 0x1D6A8, 0x1D6C0 ], [ 0x1D6C2, 0x1D6DA ], [ 0x1D6DC, 0x1D6FA ], [ 0x1D6FC, 0x1D714 ], [ 0x1D716, 0x1D734 ], [ 0x1D736, 0x1D74E ], [ 0x1D750, 0x1D76E ], [ 0x1D770, 0x1D788 ], [ 0x1D78A, 0x1D7A8 ], [ 0x1D7AA, 0x1D7C2 ], [ 0x1D7C4, 0x1D7CB ], [ 0x1DF00, 0x1DF1E ], [ 0x1DF25, 0x1DF2A ], [ 0x1E000, 0x1E006 ], [ 0x1E008, 0x1E018 ], [ 0x1E01B, 0x1E021 ], 0x1E023, 0x1E024, [ 0x1E026, 0x1E02A ], [ 0x1E030, 0x1E06D ], 0x1E08F, [ 0x1E100, 0x1E12C ], [ 0x1E137, 0x1E13D ], 0x1E14E, [ 0x1E290, 0x1E2AD ], [ 0x1E2C0, 0x1E2EB ], [ 0x1E4D0, 0x1E4EB ], [ 0x1E7E0, 0x1E7E6 ], [ 0x1E7E8, 0x1E7EB ], 0x1E7ED, 0x1E7EE, [ 0x1E7F0, 0x1E7FE ], [ 0x1E800, 0x1E8C4 ], [ 0x1E900, 0x1E943 ], 0x1E947, 0x1E94B, [ 0x1EE00, 0x1EE03 ], [ 0x1EE05, 0x1EE1F ], 0x1EE21, 0x1EE22, 0x1EE24, 0x1EE27, [ 0x1EE29, 0x1EE32 ], [ 0x1EE34, 0x1EE37 ], 0x1EE39, 0x1EE3B, 0x1EE42, 0x1EE47, 0x1EE49, 0x1EE4B, [ 0x1EE4D, 0x1EE4F ], 0x1EE51, 0x1EE52, 0x1EE54, 0x1EE57, 0x1EE59, 0x1EE5B, 0x1EE5D, 0x1EE5F, 0x1EE61, 0x1EE62, 0x1EE64, [ 0x1EE67, 0x1EE6A ], [ 0x1EE6C, 0x1EE72 ], [ 0x1EE74, 0x1EE77 ], [ 0x1EE79, 0x1EE7C ], 0x1EE7E, [ 0x1EE80, 0x1EE89 ], [ 0x1EE8B, 0x1EE9B ], [ 0x1EEA1, 0x1EEA3 ], [ 0x1EEA5, 0x1EEA9 ], [ 0x1EEAB, 0x1EEBB ], [ 0x1F130, 0x1F149 ], [ 0x1F150, 0x1F169 ], [ 0x1F170, 0x1F189 ], [ 0x20000, 0x2A6DF ], [ 0x2A700, 0x2B739 ], [ 0x2B740, 0x2B81D ], [ 0x2B820, 0x2CEA1 ], [ 0x2CEB0, 0x2EBE0 ], [ 0x2F800, 0x2FA1D ], [ 0x30000, 0x3134A ], [ 0x31350, 0x323AF ] ] 6 | }; 7 | -------------------------------------------------------------------------------- /src/generated/unicodejs.derivedgeneralcategories.js: -------------------------------------------------------------------------------- 1 | // This file is GENERATED by tools/unicodejs-properties.js 2 | // DO NOT EDIT 3 | unicodeJS.derivedgeneralcategories = { 4 | // partial extraction only 5 | M: [ [ 0x0300, 0x036F ], [ 0x0483, 0x0489 ], [ 0x0591, 0x05BD ], 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x05C5, 0x05C7, [ 0x0610, 0x061A ], [ 0x064B, 0x065F ], 0x0670, [ 0x06D6, 0x06DC ], [ 0x06DF, 0x06E4 ], 0x06E7, 0x06E8, [ 0x06EA, 0x06ED ], 0x0711, [ 0x0730, 0x074A ], [ 0x07A6, 0x07B0 ], [ 0x07EB, 0x07F3 ], 0x07FD, [ 0x0816, 0x0819 ], [ 0x081B, 0x0823 ], [ 0x0825, 0x0827 ], [ 0x0829, 0x082D ], [ 0x0859, 0x085B ], [ 0x0898, 0x089F ], [ 0x08CA, 0x08E1 ], [ 0x08E3, 0x0903 ], [ 0x093A, 0x093C ], [ 0x093E, 0x094F ], [ 0x0951, 0x0957 ], 0x0962, 0x0963, [ 0x0981, 0x0983 ], 0x09BC, [ 0x09BE, 0x09C4 ], 0x09C7, 0x09C8, [ 0x09CB, 0x09CD ], 0x09D7, 0x09E2, 0x09E3, 0x09FE, [ 0x0A01, 0x0A03 ], 0x0A3C, [ 0x0A3E, 0x0A42 ], 0x0A47, 0x0A48, [ 0x0A4B, 0x0A4D ], 0x0A51, 0x0A70, 0x0A71, 0x0A75, [ 0x0A81, 0x0A83 ], 0x0ABC, [ 0x0ABE, 0x0AC5 ], [ 0x0AC7, 0x0AC9 ], [ 0x0ACB, 0x0ACD ], 0x0AE2, 0x0AE3, [ 0x0AFA, 0x0AFF ], [ 0x0B01, 0x0B03 ], 0x0B3C, [ 0x0B3E, 0x0B44 ], 0x0B47, 0x0B48, [ 0x0B4B, 0x0B4D ], [ 0x0B55, 0x0B57 ], 0x0B62, 0x0B63, 0x0B82, [ 0x0BBE, 0x0BC2 ], [ 0x0BC6, 0x0BC8 ], [ 0x0BCA, 0x0BCD ], 0x0BD7, [ 0x0C00, 0x0C04 ], 0x0C3C, [ 0x0C3E, 0x0C44 ], [ 0x0C46, 0x0C48 ], [ 0x0C4A, 0x0C4D ], 0x0C55, 0x0C56, 0x0C62, 0x0C63, [ 0x0C81, 0x0C83 ], 0x0CBC, [ 0x0CBE, 0x0CC4 ], [ 0x0CC6, 0x0CC8 ], [ 0x0CCA, 0x0CCD ], 0x0CD5, 0x0CD6, 0x0CE2, 0x0CE3, 0x0CF3, [ 0x0D00, 0x0D03 ], 0x0D3B, 0x0D3C, [ 0x0D3E, 0x0D44 ], [ 0x0D46, 0x0D48 ], [ 0x0D4A, 0x0D4D ], 0x0D57, 0x0D62, 0x0D63, [ 0x0D81, 0x0D83 ], 0x0DCA, [ 0x0DCF, 0x0DD4 ], 0x0DD6, [ 0x0DD8, 0x0DDF ], 0x0DF2, 0x0DF3, 0x0E31, [ 0x0E34, 0x0E3A ], [ 0x0E47, 0x0E4E ], 0x0EB1, [ 0x0EB4, 0x0EBC ], [ 0x0EC8, 0x0ECE ], 0x0F18, 0x0F19, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, [ 0x0F71, 0x0F84 ], 0x0F86, 0x0F87, [ 0x0F8D, 0x0F97 ], [ 0x0F99, 0x0FBC ], 0x0FC6, [ 0x102B, 0x103E ], [ 0x1056, 0x1059 ], [ 0x105E, 0x1060 ], [ 0x1062, 0x1064 ], [ 0x1067, 0x106D ], [ 0x1071, 0x1074 ], [ 0x1082, 0x108D ], 0x108F, [ 0x109A, 0x109D ], [ 0x135D, 0x135F ], [ 0x1712, 0x1715 ], [ 0x1732, 0x1734 ], 0x1752, 0x1753, 0x1772, 0x1773, [ 0x17B4, 0x17D3 ], 0x17DD, [ 0x180B, 0x180D ], 0x180F, 0x1885, 0x1886, 0x18A9, [ 0x1920, 0x192B ], [ 0x1930, 0x193B ], [ 0x1A17, 0x1A1B ], [ 0x1A55, 0x1A5E ], [ 0x1A60, 0x1A7C ], 0x1A7F, [ 0x1AB0, 0x1ACE ], [ 0x1B00, 0x1B04 ], [ 0x1B34, 0x1B44 ], [ 0x1B6B, 0x1B73 ], [ 0x1B80, 0x1B82 ], [ 0x1BA1, 0x1BAD ], [ 0x1BE6, 0x1BF3 ], [ 0x1C24, 0x1C37 ], [ 0x1CD0, 0x1CD2 ], [ 0x1CD4, 0x1CE8 ], 0x1CED, 0x1CF4, [ 0x1CF7, 0x1CF9 ], [ 0x1DC0, 0x1DFF ], [ 0x20D0, 0x20F0 ], [ 0x2CEF, 0x2CF1 ], 0x2D7F, [ 0x2DE0, 0x2DFF ], [ 0x302A, 0x302F ], 0x3099, 0x309A, [ 0xA66F, 0xA672 ], [ 0xA674, 0xA67D ], 0xA69E, 0xA69F, 0xA6F0, 0xA6F1, 0xA802, 0xA806, 0xA80B, [ 0xA823, 0xA827 ], 0xA82C, 0xA880, 0xA881, [ 0xA8B4, 0xA8C5 ], [ 0xA8E0, 0xA8F1 ], 0xA8FF, [ 0xA926, 0xA92D ], [ 0xA947, 0xA953 ], [ 0xA980, 0xA983 ], [ 0xA9B3, 0xA9C0 ], 0xA9E5, [ 0xAA29, 0xAA36 ], 0xAA43, 0xAA4C, 0xAA4D, [ 0xAA7B, 0xAA7D ], 0xAAB0, [ 0xAAB2, 0xAAB4 ], 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, [ 0xAAEB, 0xAAEF ], 0xAAF5, 0xAAF6, [ 0xABE3, 0xABEA ], 0xABEC, 0xABED, 0xFB1E, [ 0xFE00, 0xFE0F ], [ 0xFE20, 0xFE2F ], 0x101FD, 0x102E0, [ 0x10376, 0x1037A ], [ 0x10A01, 0x10A03 ], 0x10A05, 0x10A06, [ 0x10A0C, 0x10A0F ], [ 0x10A38, 0x10A3A ], 0x10A3F, 0x10AE5, 0x10AE6, [ 0x10D24, 0x10D27 ], 0x10EAB, 0x10EAC, [ 0x10EFD, 0x10EFF ], [ 0x10F46, 0x10F50 ], [ 0x10F82, 0x10F85 ], [ 0x11000, 0x11002 ], [ 0x11038, 0x11046 ], 0x11070, 0x11073, 0x11074, [ 0x1107F, 0x11082 ], [ 0x110B0, 0x110BA ], 0x110C2, [ 0x11100, 0x11102 ], [ 0x11127, 0x11134 ], 0x11145, 0x11146, 0x11173, [ 0x11180, 0x11182 ], [ 0x111B3, 0x111C0 ], [ 0x111C9, 0x111CC ], 0x111CE, 0x111CF, [ 0x1122C, 0x11237 ], 0x1123E, 0x11241, [ 0x112DF, 0x112EA ], [ 0x11300, 0x11303 ], 0x1133B, 0x1133C, [ 0x1133E, 0x11344 ], 0x11347, 0x11348, [ 0x1134B, 0x1134D ], 0x11357, 0x11362, 0x11363, [ 0x11366, 0x1136C ], [ 0x11370, 0x11374 ], [ 0x11435, 0x11446 ], 0x1145E, [ 0x114B0, 0x114C3 ], [ 0x115AF, 0x115B5 ], [ 0x115B8, 0x115C0 ], 0x115DC, 0x115DD, [ 0x11630, 0x11640 ], [ 0x116AB, 0x116B7 ], [ 0x1171D, 0x1172B ], [ 0x1182C, 0x1183A ], [ 0x11930, 0x11935 ], 0x11937, 0x11938, [ 0x1193B, 0x1193E ], 0x11940, 0x11942, 0x11943, [ 0x119D1, 0x119D7 ], [ 0x119DA, 0x119E0 ], 0x119E4, [ 0x11A01, 0x11A0A ], [ 0x11A33, 0x11A39 ], [ 0x11A3B, 0x11A3E ], 0x11A47, [ 0x11A51, 0x11A5B ], [ 0x11A8A, 0x11A99 ], [ 0x11C2F, 0x11C36 ], [ 0x11C38, 0x11C3F ], [ 0x11C92, 0x11CA7 ], [ 0x11CA9, 0x11CB6 ], [ 0x11D31, 0x11D36 ], 0x11D3A, 0x11D3C, 0x11D3D, [ 0x11D3F, 0x11D45 ], 0x11D47, [ 0x11D8A, 0x11D8E ], 0x11D90, 0x11D91, [ 0x11D93, 0x11D97 ], [ 0x11EF3, 0x11EF6 ], 0x11F00, 0x11F01, 0x11F03, [ 0x11F34, 0x11F3A ], [ 0x11F3E, 0x11F42 ], 0x13440, [ 0x13447, 0x13455 ], [ 0x16AF0, 0x16AF4 ], [ 0x16B30, 0x16B36 ], 0x16F4F, [ 0x16F51, 0x16F87 ], [ 0x16F8F, 0x16F92 ], 0x16FE4, 0x16FF0, 0x16FF1, 0x1BC9D, 0x1BC9E, [ 0x1CF00, 0x1CF2D ], [ 0x1CF30, 0x1CF46 ], [ 0x1D165, 0x1D169 ], [ 0x1D16D, 0x1D172 ], [ 0x1D17B, 0x1D182 ], [ 0x1D185, 0x1D18B ], [ 0x1D1AA, 0x1D1AD ], [ 0x1D242, 0x1D244 ], [ 0x1DA00, 0x1DA36 ], [ 0x1DA3B, 0x1DA6C ], 0x1DA75, 0x1DA84, [ 0x1DA9B, 0x1DA9F ], [ 0x1DAA1, 0x1DAAF ], [ 0x1E000, 0x1E006 ], [ 0x1E008, 0x1E018 ], [ 0x1E01B, 0x1E021 ], 0x1E023, 0x1E024, [ 0x1E026, 0x1E02A ], 0x1E08F, [ 0x1E130, 0x1E136 ], 0x1E2AE, [ 0x1E2EC, 0x1E2EF ], [ 0x1E4EC, 0x1E4EF ], [ 0x1E8D0, 0x1E8D6 ], [ 0x1E944, 0x1E94A ], [ 0xE0100, 0xE01EF ] ], 6 | Pc: [ 0x005F, 0x203F, 0x2040, 0x2054, 0xFE33, 0xFE34, [ 0xFE4D, 0xFE4F ], 0xFF3F ] 7 | }; 8 | -------------------------------------------------------------------------------- /src/generated/unicodejs.emojiproperties.js: -------------------------------------------------------------------------------- 1 | // This file is GENERATED by tools/unicodejs-properties.js 2 | // DO NOT EDIT 3 | unicodeJS.emojiproperties = { 4 | // partial extraction only 5 | ExtendedPictographic: [ 0x00A9, 0x00AE, 0x203C, 0x2049, 0x2122, 0x2139, [ 0x2194, 0x2199 ], 0x21A9, 0x21AA, 0x231A, 0x231B, 0x2328, 0x2388, 0x23CF, [ 0x23E9, 0x23F3 ], [ 0x23F8, 0x23FA ], 0x24C2, 0x25AA, 0x25AB, 0x25B6, 0x25C0, [ 0x25FB, 0x25FE ], [ 0x2600, 0x2605 ], [ 0x2607, 0x2612 ], [ 0x2614, 0x2685 ], [ 0x2690, 0x2705 ], [ 0x2708, 0x2712 ], 0x2714, 0x2716, 0x271D, 0x2721, 0x2728, 0x2733, 0x2734, 0x2744, 0x2747, 0x274C, 0x274E, [ 0x2753, 0x2755 ], 0x2757, [ 0x2763, 0x2767 ], [ 0x2795, 0x2797 ], 0x27A1, 0x27B0, 0x27BF, 0x2934, 0x2935, [ 0x2B05, 0x2B07 ], 0x2B1B, 0x2B1C, 0x2B50, 0x2B55, 0x3030, 0x303D, 0x3297, 0x3299, [ 0x1F000, 0x1F0FF ], [ 0x1F10D, 0x1F10F ], 0x1F12F, [ 0x1F16C, 0x1F171 ], 0x1F17E, 0x1F17F, 0x1F18E, [ 0x1F191, 0x1F19A ], [ 0x1F1AD, 0x1F1E5 ], [ 0x1F201, 0x1F20F ], 0x1F21A, 0x1F22F, [ 0x1F232, 0x1F23A ], [ 0x1F23C, 0x1F23F ], [ 0x1F249, 0x1F3FA ], [ 0x1F400, 0x1F53D ], [ 0x1F546, 0x1F64F ], [ 0x1F680, 0x1F6FF ], [ 0x1F774, 0x1F77F ], [ 0x1F7D5, 0x1F7FF ], [ 0x1F80C, 0x1F80F ], [ 0x1F848, 0x1F84F ], [ 0x1F85A, 0x1F85F ], [ 0x1F888, 0x1F88F ], [ 0x1F8AE, 0x1F8FF ], [ 0x1F90C, 0x1F93A ], [ 0x1F93C, 0x1F945 ], [ 0x1F947, 0x1FAFF ], [ 0x1FC00, 0x1FFFD ] ] 6 | }; 7 | -------------------------------------------------------------------------------- /src/generated/unicodejs.graphemebreakproperties.js: -------------------------------------------------------------------------------- 1 | // This file is GENERATED by tools/unicodejs-properties.js 2 | // DO NOT EDIT 3 | unicodeJS.graphemebreakproperties = { 4 | Prepend: [ [ 0x0600, 0x0605 ], 0x06DD, 0x070F, 0x0890, 0x0891, 0x08E2, 0x0D4E, 0x110BD, 0x110CD, 0x111C2, 0x111C3, 0x1193F, 0x11941, 0x11A3A, [ 0x11A84, 0x11A89 ], 0x11D46, 0x11F02 ], 5 | CR: [ 0x000D ], 6 | LF: [ 0x000A ], 7 | Control: [ [ 0x0000, 0x0009 ], 0x000B, 0x000C, [ 0x000E, 0x001F ], [ 0x007F, 0x009F ], 0x00AD, 0x061C, 0x180E, 0x200B, 0x200E, 0x200F, [ 0x2028, 0x202E ], [ 0x2060, 0x206F ], 0xFEFF, [ 0xFFF0, 0xFFFB ], [ 0x13430, 0x1343F ], [ 0x1BCA0, 0x1BCA3 ], [ 0x1D173, 0x1D17A ], [ 0xE0000, 0xE001F ], [ 0xE0080, 0xE00FF ], [ 0xE01F0, 0xE0FFF ] ], 8 | Extend: [ [ 0x0300, 0x036F ], [ 0x0483, 0x0489 ], [ 0x0591, 0x05BD ], 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x05C5, 0x05C7, [ 0x0610, 0x061A ], [ 0x064B, 0x065F ], 0x0670, [ 0x06D6, 0x06DC ], [ 0x06DF, 0x06E4 ], 0x06E7, 0x06E8, [ 0x06EA, 0x06ED ], 0x0711, [ 0x0730, 0x074A ], [ 0x07A6, 0x07B0 ], [ 0x07EB, 0x07F3 ], 0x07FD, [ 0x0816, 0x0819 ], [ 0x081B, 0x0823 ], [ 0x0825, 0x0827 ], [ 0x0829, 0x082D ], [ 0x0859, 0x085B ], [ 0x0898, 0x089F ], [ 0x08CA, 0x08E1 ], [ 0x08E3, 0x0902 ], 0x093A, 0x093C, [ 0x0941, 0x0948 ], 0x094D, [ 0x0951, 0x0957 ], 0x0962, 0x0963, 0x0981, 0x09BC, 0x09BE, [ 0x09C1, 0x09C4 ], 0x09CD, 0x09D7, 0x09E2, 0x09E3, 0x09FE, 0x0A01, 0x0A02, 0x0A3C, 0x0A41, 0x0A42, 0x0A47, 0x0A48, [ 0x0A4B, 0x0A4D ], 0x0A51, 0x0A70, 0x0A71, 0x0A75, 0x0A81, 0x0A82, 0x0ABC, [ 0x0AC1, 0x0AC5 ], 0x0AC7, 0x0AC8, 0x0ACD, 0x0AE2, 0x0AE3, [ 0x0AFA, 0x0AFF ], 0x0B01, 0x0B3C, 0x0B3E, 0x0B3F, [ 0x0B41, 0x0B44 ], 0x0B4D, [ 0x0B55, 0x0B57 ], 0x0B62, 0x0B63, 0x0B82, 0x0BBE, 0x0BC0, 0x0BCD, 0x0BD7, 0x0C00, 0x0C04, 0x0C3C, [ 0x0C3E, 0x0C40 ], [ 0x0C46, 0x0C48 ], [ 0x0C4A, 0x0C4D ], 0x0C55, 0x0C56, 0x0C62, 0x0C63, 0x0C81, 0x0CBC, 0x0CBF, 0x0CC2, 0x0CC6, 0x0CCC, 0x0CCD, 0x0CD5, 0x0CD6, 0x0CE2, 0x0CE3, 0x0D00, 0x0D01, 0x0D3B, 0x0D3C, 0x0D3E, [ 0x0D41, 0x0D44 ], 0x0D4D, 0x0D57, 0x0D62, 0x0D63, 0x0D81, 0x0DCA, 0x0DCF, [ 0x0DD2, 0x0DD4 ], 0x0DD6, 0x0DDF, 0x0E31, [ 0x0E34, 0x0E3A ], [ 0x0E47, 0x0E4E ], 0x0EB1, [ 0x0EB4, 0x0EBC ], [ 0x0EC8, 0x0ECE ], 0x0F18, 0x0F19, 0x0F35, 0x0F37, 0x0F39, [ 0x0F71, 0x0F7E ], [ 0x0F80, 0x0F84 ], 0x0F86, 0x0F87, [ 0x0F8D, 0x0F97 ], [ 0x0F99, 0x0FBC ], 0x0FC6, [ 0x102D, 0x1030 ], [ 0x1032, 0x1037 ], 0x1039, 0x103A, 0x103D, 0x103E, 0x1058, 0x1059, [ 0x105E, 0x1060 ], [ 0x1071, 0x1074 ], 0x1082, 0x1085, 0x1086, 0x108D, 0x109D, [ 0x135D, 0x135F ], [ 0x1712, 0x1714 ], 0x1732, 0x1733, 0x1752, 0x1753, 0x1772, 0x1773, 0x17B4, 0x17B5, [ 0x17B7, 0x17BD ], 0x17C6, [ 0x17C9, 0x17D3 ], 0x17DD, [ 0x180B, 0x180D ], 0x180F, 0x1885, 0x1886, 0x18A9, [ 0x1920, 0x1922 ], 0x1927, 0x1928, 0x1932, [ 0x1939, 0x193B ], 0x1A17, 0x1A18, 0x1A1B, 0x1A56, [ 0x1A58, 0x1A5E ], 0x1A60, 0x1A62, [ 0x1A65, 0x1A6C ], [ 0x1A73, 0x1A7C ], 0x1A7F, [ 0x1AB0, 0x1ACE ], [ 0x1B00, 0x1B03 ], [ 0x1B34, 0x1B3A ], 0x1B3C, 0x1B42, [ 0x1B6B, 0x1B73 ], 0x1B80, 0x1B81, [ 0x1BA2, 0x1BA5 ], 0x1BA8, 0x1BA9, [ 0x1BAB, 0x1BAD ], 0x1BE6, 0x1BE8, 0x1BE9, 0x1BED, [ 0x1BEF, 0x1BF1 ], [ 0x1C2C, 0x1C33 ], 0x1C36, 0x1C37, [ 0x1CD0, 0x1CD2 ], [ 0x1CD4, 0x1CE0 ], [ 0x1CE2, 0x1CE8 ], 0x1CED, 0x1CF4, 0x1CF8, 0x1CF9, [ 0x1DC0, 0x1DFF ], 0x200C, [ 0x20D0, 0x20F0 ], [ 0x2CEF, 0x2CF1 ], 0x2D7F, [ 0x2DE0, 0x2DFF ], [ 0x302A, 0x302F ], 0x3099, 0x309A, [ 0xA66F, 0xA672 ], [ 0xA674, 0xA67D ], 0xA69E, 0xA69F, 0xA6F0, 0xA6F1, 0xA802, 0xA806, 0xA80B, 0xA825, 0xA826, 0xA82C, 0xA8C4, 0xA8C5, [ 0xA8E0, 0xA8F1 ], 0xA8FF, [ 0xA926, 0xA92D ], [ 0xA947, 0xA951 ], [ 0xA980, 0xA982 ], 0xA9B3, [ 0xA9B6, 0xA9B9 ], 0xA9BC, 0xA9BD, 0xA9E5, [ 0xAA29, 0xAA2E ], 0xAA31, 0xAA32, 0xAA35, 0xAA36, 0xAA43, 0xAA4C, 0xAA7C, 0xAAB0, [ 0xAAB2, 0xAAB4 ], 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, 0xAAEC, 0xAAED, 0xAAF6, 0xABE5, 0xABE8, 0xABED, 0xFB1E, [ 0xFE00, 0xFE0F ], [ 0xFE20, 0xFE2F ], 0xFF9E, 0xFF9F, 0x101FD, 0x102E0, [ 0x10376, 0x1037A ], [ 0x10A01, 0x10A03 ], 0x10A05, 0x10A06, [ 0x10A0C, 0x10A0F ], [ 0x10A38, 0x10A3A ], 0x10A3F, 0x10AE5, 0x10AE6, [ 0x10D24, 0x10D27 ], 0x10EAB, 0x10EAC, [ 0x10EFD, 0x10EFF ], [ 0x10F46, 0x10F50 ], [ 0x10F82, 0x10F85 ], 0x11001, [ 0x11038, 0x11046 ], 0x11070, 0x11073, 0x11074, [ 0x1107F, 0x11081 ], [ 0x110B3, 0x110B6 ], 0x110B9, 0x110BA, 0x110C2, [ 0x11100, 0x11102 ], [ 0x11127, 0x1112B ], [ 0x1112D, 0x11134 ], 0x11173, 0x11180, 0x11181, [ 0x111B6, 0x111BE ], [ 0x111C9, 0x111CC ], 0x111CF, [ 0x1122F, 0x11231 ], 0x11234, 0x11236, 0x11237, 0x1123E, 0x11241, 0x112DF, [ 0x112E3, 0x112EA ], 0x11300, 0x11301, 0x1133B, 0x1133C, 0x1133E, 0x11340, 0x11357, [ 0x11366, 0x1136C ], [ 0x11370, 0x11374 ], [ 0x11438, 0x1143F ], [ 0x11442, 0x11444 ], 0x11446, 0x1145E, 0x114B0, [ 0x114B3, 0x114B8 ], 0x114BA, 0x114BD, 0x114BF, 0x114C0, 0x114C2, 0x114C3, 0x115AF, [ 0x115B2, 0x115B5 ], 0x115BC, 0x115BD, 0x115BF, 0x115C0, 0x115DC, 0x115DD, [ 0x11633, 0x1163A ], 0x1163D, 0x1163F, 0x11640, 0x116AB, 0x116AD, [ 0x116B0, 0x116B5 ], 0x116B7, [ 0x1171D, 0x1171F ], [ 0x11722, 0x11725 ], [ 0x11727, 0x1172B ], [ 0x1182F, 0x11837 ], 0x11839, 0x1183A, 0x11930, 0x1193B, 0x1193C, 0x1193E, 0x11943, [ 0x119D4, 0x119D7 ], 0x119DA, 0x119DB, 0x119E0, [ 0x11A01, 0x11A0A ], [ 0x11A33, 0x11A38 ], [ 0x11A3B, 0x11A3E ], 0x11A47, [ 0x11A51, 0x11A56 ], [ 0x11A59, 0x11A5B ], [ 0x11A8A, 0x11A96 ], 0x11A98, 0x11A99, [ 0x11C30, 0x11C36 ], [ 0x11C38, 0x11C3D ], 0x11C3F, [ 0x11C92, 0x11CA7 ], [ 0x11CAA, 0x11CB0 ], 0x11CB2, 0x11CB3, 0x11CB5, 0x11CB6, [ 0x11D31, 0x11D36 ], 0x11D3A, 0x11D3C, 0x11D3D, [ 0x11D3F, 0x11D45 ], 0x11D47, 0x11D90, 0x11D91, 0x11D95, 0x11D97, 0x11EF3, 0x11EF4, 0x11F00, 0x11F01, [ 0x11F36, 0x11F3A ], 0x11F40, 0x11F42, 0x13440, [ 0x13447, 0x13455 ], [ 0x16AF0, 0x16AF4 ], [ 0x16B30, 0x16B36 ], 0x16F4F, [ 0x16F8F, 0x16F92 ], 0x16FE4, 0x1BC9D, 0x1BC9E, [ 0x1CF00, 0x1CF2D ], [ 0x1CF30, 0x1CF46 ], 0x1D165, [ 0x1D167, 0x1D169 ], [ 0x1D16E, 0x1D172 ], [ 0x1D17B, 0x1D182 ], [ 0x1D185, 0x1D18B ], [ 0x1D1AA, 0x1D1AD ], [ 0x1D242, 0x1D244 ], [ 0x1DA00, 0x1DA36 ], [ 0x1DA3B, 0x1DA6C ], 0x1DA75, 0x1DA84, [ 0x1DA9B, 0x1DA9F ], [ 0x1DAA1, 0x1DAAF ], [ 0x1E000, 0x1E006 ], [ 0x1E008, 0x1E018 ], [ 0x1E01B, 0x1E021 ], 0x1E023, 0x1E024, [ 0x1E026, 0x1E02A ], 0x1E08F, [ 0x1E130, 0x1E136 ], 0x1E2AE, [ 0x1E2EC, 0x1E2EF ], [ 0x1E4EC, 0x1E4EF ], [ 0x1E8D0, 0x1E8D6 ], [ 0x1E944, 0x1E94A ], [ 0x1F3FB, 0x1F3FF ], [ 0xE0020, 0xE007F ], [ 0xE0100, 0xE01EF ] ], 9 | RegionalIndicator: [ [ 0x1F1E6, 0x1F1FF ] ], 10 | SpacingMark: [ 0x0903, 0x093B, [ 0x093E, 0x0940 ], [ 0x0949, 0x094C ], 0x094E, 0x094F, 0x0982, 0x0983, 0x09BF, 0x09C0, 0x09C7, 0x09C8, 0x09CB, 0x09CC, 0x0A03, [ 0x0A3E, 0x0A40 ], 0x0A83, [ 0x0ABE, 0x0AC0 ], 0x0AC9, 0x0ACB, 0x0ACC, 0x0B02, 0x0B03, 0x0B40, 0x0B47, 0x0B48, 0x0B4B, 0x0B4C, 0x0BBF, 0x0BC1, 0x0BC2, [ 0x0BC6, 0x0BC8 ], [ 0x0BCA, 0x0BCC ], [ 0x0C01, 0x0C03 ], [ 0x0C41, 0x0C44 ], 0x0C82, 0x0C83, 0x0CBE, 0x0CC0, 0x0CC1, 0x0CC3, 0x0CC4, 0x0CC7, 0x0CC8, 0x0CCA, 0x0CCB, 0x0CF3, 0x0D02, 0x0D03, 0x0D3F, 0x0D40, [ 0x0D46, 0x0D48 ], [ 0x0D4A, 0x0D4C ], 0x0D82, 0x0D83, 0x0DD0, 0x0DD1, [ 0x0DD8, 0x0DDE ], 0x0DF2, 0x0DF3, 0x0E33, 0x0EB3, 0x0F3E, 0x0F3F, 0x0F7F, 0x1031, 0x103B, 0x103C, 0x1056, 0x1057, 0x1084, 0x1715, 0x1734, 0x17B6, [ 0x17BE, 0x17C5 ], 0x17C7, 0x17C8, [ 0x1923, 0x1926 ], [ 0x1929, 0x192B ], 0x1930, 0x1931, [ 0x1933, 0x1938 ], 0x1A19, 0x1A1A, 0x1A55, 0x1A57, [ 0x1A6D, 0x1A72 ], 0x1B04, 0x1B3B, [ 0x1B3D, 0x1B41 ], 0x1B43, 0x1B44, 0x1B82, 0x1BA1, 0x1BA6, 0x1BA7, 0x1BAA, 0x1BE7, [ 0x1BEA, 0x1BEC ], 0x1BEE, 0x1BF2, 0x1BF3, [ 0x1C24, 0x1C2B ], 0x1C34, 0x1C35, 0x1CE1, 0x1CF7, 0xA823, 0xA824, 0xA827, 0xA880, 0xA881, [ 0xA8B4, 0xA8C3 ], 0xA952, 0xA953, 0xA983, 0xA9B4, 0xA9B5, 0xA9BA, 0xA9BB, [ 0xA9BE, 0xA9C0 ], 0xAA2F, 0xAA30, 0xAA33, 0xAA34, 0xAA4D, 0xAAEB, 0xAAEE, 0xAAEF, 0xAAF5, 0xABE3, 0xABE4, 0xABE6, 0xABE7, 0xABE9, 0xABEA, 0xABEC, 0x11000, 0x11002, 0x11082, [ 0x110B0, 0x110B2 ], 0x110B7, 0x110B8, 0x1112C, 0x11145, 0x11146, 0x11182, [ 0x111B3, 0x111B5 ], 0x111BF, 0x111C0, 0x111CE, [ 0x1122C, 0x1122E ], 0x11232, 0x11233, 0x11235, [ 0x112E0, 0x112E2 ], 0x11302, 0x11303, 0x1133F, [ 0x11341, 0x11344 ], 0x11347, 0x11348, [ 0x1134B, 0x1134D ], 0x11362, 0x11363, [ 0x11435, 0x11437 ], 0x11440, 0x11441, 0x11445, 0x114B1, 0x114B2, 0x114B9, 0x114BB, 0x114BC, 0x114BE, 0x114C1, 0x115B0, 0x115B1, [ 0x115B8, 0x115BB ], 0x115BE, [ 0x11630, 0x11632 ], 0x1163B, 0x1163C, 0x1163E, 0x116AC, 0x116AE, 0x116AF, 0x116B6, 0x11726, [ 0x1182C, 0x1182E ], 0x11838, [ 0x11931, 0x11935 ], 0x11937, 0x11938, 0x1193D, 0x11940, 0x11942, [ 0x119D1, 0x119D3 ], [ 0x119DC, 0x119DF ], 0x119E4, 0x11A39, 0x11A57, 0x11A58, 0x11A97, 0x11C2F, 0x11C3E, 0x11CA9, 0x11CB1, 0x11CB4, [ 0x11D8A, 0x11D8E ], 0x11D93, 0x11D94, 0x11D96, 0x11EF5, 0x11EF6, 0x11F03, 0x11F34, 0x11F35, 0x11F3E, 0x11F3F, 0x11F41, [ 0x16F51, 0x16F87 ], 0x16FF0, 0x16FF1, 0x1D166, 0x1D16D ], 11 | L: [ [ 0x1100, 0x115F ], [ 0xA960, 0xA97C ] ], 12 | V: [ [ 0x1160, 0x11A7 ], [ 0xD7B0, 0xD7C6 ] ], 13 | T: [ [ 0x11A8, 0x11FF ], [ 0xD7CB, 0xD7FB ] ], 14 | LV: [ 0xAC00, 0xAC1C, 0xAC38, 0xAC54, 0xAC70, 0xAC8C, 0xACA8, 0xACC4, 0xACE0, 0xACFC, 0xAD18, 0xAD34, 0xAD50, 0xAD6C, 0xAD88, 0xADA4, 0xADC0, 0xADDC, 0xADF8, 0xAE14, 0xAE30, 0xAE4C, 0xAE68, 0xAE84, 0xAEA0, 0xAEBC, 0xAED8, 0xAEF4, 0xAF10, 0xAF2C, 0xAF48, 0xAF64, 0xAF80, 0xAF9C, 0xAFB8, 0xAFD4, 0xAFF0, 0xB00C, 0xB028, 0xB044, 0xB060, 0xB07C, 0xB098, 0xB0B4, 0xB0D0, 0xB0EC, 0xB108, 0xB124, 0xB140, 0xB15C, 0xB178, 0xB194, 0xB1B0, 0xB1CC, 0xB1E8, 0xB204, 0xB220, 0xB23C, 0xB258, 0xB274, 0xB290, 0xB2AC, 0xB2C8, 0xB2E4, 0xB300, 0xB31C, 0xB338, 0xB354, 0xB370, 0xB38C, 0xB3A8, 0xB3C4, 0xB3E0, 0xB3FC, 0xB418, 0xB434, 0xB450, 0xB46C, 0xB488, 0xB4A4, 0xB4C0, 0xB4DC, 0xB4F8, 0xB514, 0xB530, 0xB54C, 0xB568, 0xB584, 0xB5A0, 0xB5BC, 0xB5D8, 0xB5F4, 0xB610, 0xB62C, 0xB648, 0xB664, 0xB680, 0xB69C, 0xB6B8, 0xB6D4, 0xB6F0, 0xB70C, 0xB728, 0xB744, 0xB760, 0xB77C, 0xB798, 0xB7B4, 0xB7D0, 0xB7EC, 0xB808, 0xB824, 0xB840, 0xB85C, 0xB878, 0xB894, 0xB8B0, 0xB8CC, 0xB8E8, 0xB904, 0xB920, 0xB93C, 0xB958, 0xB974, 0xB990, 0xB9AC, 0xB9C8, 0xB9E4, 0xBA00, 0xBA1C, 0xBA38, 0xBA54, 0xBA70, 0xBA8C, 0xBAA8, 0xBAC4, 0xBAE0, 0xBAFC, 0xBB18, 0xBB34, 0xBB50, 0xBB6C, 0xBB88, 0xBBA4, 0xBBC0, 0xBBDC, 0xBBF8, 0xBC14, 0xBC30, 0xBC4C, 0xBC68, 0xBC84, 0xBCA0, 0xBCBC, 0xBCD8, 0xBCF4, 0xBD10, 0xBD2C, 0xBD48, 0xBD64, 0xBD80, 0xBD9C, 0xBDB8, 0xBDD4, 0xBDF0, 0xBE0C, 0xBE28, 0xBE44, 0xBE60, 0xBE7C, 0xBE98, 0xBEB4, 0xBED0, 0xBEEC, 0xBF08, 0xBF24, 0xBF40, 0xBF5C, 0xBF78, 0xBF94, 0xBFB0, 0xBFCC, 0xBFE8, 0xC004, 0xC020, 0xC03C, 0xC058, 0xC074, 0xC090, 0xC0AC, 0xC0C8, 0xC0E4, 0xC100, 0xC11C, 0xC138, 0xC154, 0xC170, 0xC18C, 0xC1A8, 0xC1C4, 0xC1E0, 0xC1FC, 0xC218, 0xC234, 0xC250, 0xC26C, 0xC288, 0xC2A4, 0xC2C0, 0xC2DC, 0xC2F8, 0xC314, 0xC330, 0xC34C, 0xC368, 0xC384, 0xC3A0, 0xC3BC, 0xC3D8, 0xC3F4, 0xC410, 0xC42C, 0xC448, 0xC464, 0xC480, 0xC49C, 0xC4B8, 0xC4D4, 0xC4F0, 0xC50C, 0xC528, 0xC544, 0xC560, 0xC57C, 0xC598, 0xC5B4, 0xC5D0, 0xC5EC, 0xC608, 0xC624, 0xC640, 0xC65C, 0xC678, 0xC694, 0xC6B0, 0xC6CC, 0xC6E8, 0xC704, 0xC720, 0xC73C, 0xC758, 0xC774, 0xC790, 0xC7AC, 0xC7C8, 0xC7E4, 0xC800, 0xC81C, 0xC838, 0xC854, 0xC870, 0xC88C, 0xC8A8, 0xC8C4, 0xC8E0, 0xC8FC, 0xC918, 0xC934, 0xC950, 0xC96C, 0xC988, 0xC9A4, 0xC9C0, 0xC9DC, 0xC9F8, 0xCA14, 0xCA30, 0xCA4C, 0xCA68, 0xCA84, 0xCAA0, 0xCABC, 0xCAD8, 0xCAF4, 0xCB10, 0xCB2C, 0xCB48, 0xCB64, 0xCB80, 0xCB9C, 0xCBB8, 0xCBD4, 0xCBF0, 0xCC0C, 0xCC28, 0xCC44, 0xCC60, 0xCC7C, 0xCC98, 0xCCB4, 0xCCD0, 0xCCEC, 0xCD08, 0xCD24, 0xCD40, 0xCD5C, 0xCD78, 0xCD94, 0xCDB0, 0xCDCC, 0xCDE8, 0xCE04, 0xCE20, 0xCE3C, 0xCE58, 0xCE74, 0xCE90, 0xCEAC, 0xCEC8, 0xCEE4, 0xCF00, 0xCF1C, 0xCF38, 0xCF54, 0xCF70, 0xCF8C, 0xCFA8, 0xCFC4, 0xCFE0, 0xCFFC, 0xD018, 0xD034, 0xD050, 0xD06C, 0xD088, 0xD0A4, 0xD0C0, 0xD0DC, 0xD0F8, 0xD114, 0xD130, 0xD14C, 0xD168, 0xD184, 0xD1A0, 0xD1BC, 0xD1D8, 0xD1F4, 0xD210, 0xD22C, 0xD248, 0xD264, 0xD280, 0xD29C, 0xD2B8, 0xD2D4, 0xD2F0, 0xD30C, 0xD328, 0xD344, 0xD360, 0xD37C, 0xD398, 0xD3B4, 0xD3D0, 0xD3EC, 0xD408, 0xD424, 0xD440, 0xD45C, 0xD478, 0xD494, 0xD4B0, 0xD4CC, 0xD4E8, 0xD504, 0xD520, 0xD53C, 0xD558, 0xD574, 0xD590, 0xD5AC, 0xD5C8, 0xD5E4, 0xD600, 0xD61C, 0xD638, 0xD654, 0xD670, 0xD68C, 0xD6A8, 0xD6C4, 0xD6E0, 0xD6FC, 0xD718, 0xD734, 0xD750, 0xD76C, 0xD788 ], 15 | LVT: [ [ 0xAC01, 0xAC1B ], [ 0xAC1D, 0xAC37 ], [ 0xAC39, 0xAC53 ], [ 0xAC55, 0xAC6F ], [ 0xAC71, 0xAC8B ], [ 0xAC8D, 0xACA7 ], [ 0xACA9, 0xACC3 ], [ 0xACC5, 0xACDF ], [ 0xACE1, 0xACFB ], [ 0xACFD, 0xAD17 ], [ 0xAD19, 0xAD33 ], [ 0xAD35, 0xAD4F ], [ 0xAD51, 0xAD6B ], [ 0xAD6D, 0xAD87 ], [ 0xAD89, 0xADA3 ], [ 0xADA5, 0xADBF ], [ 0xADC1, 0xADDB ], [ 0xADDD, 0xADF7 ], [ 0xADF9, 0xAE13 ], [ 0xAE15, 0xAE2F ], [ 0xAE31, 0xAE4B ], [ 0xAE4D, 0xAE67 ], [ 0xAE69, 0xAE83 ], [ 0xAE85, 0xAE9F ], [ 0xAEA1, 0xAEBB ], [ 0xAEBD, 0xAED7 ], [ 0xAED9, 0xAEF3 ], [ 0xAEF5, 0xAF0F ], [ 0xAF11, 0xAF2B ], [ 0xAF2D, 0xAF47 ], [ 0xAF49, 0xAF63 ], [ 0xAF65, 0xAF7F ], [ 0xAF81, 0xAF9B ], [ 0xAF9D, 0xAFB7 ], [ 0xAFB9, 0xAFD3 ], [ 0xAFD5, 0xAFEF ], [ 0xAFF1, 0xB00B ], [ 0xB00D, 0xB027 ], [ 0xB029, 0xB043 ], [ 0xB045, 0xB05F ], [ 0xB061, 0xB07B ], [ 0xB07D, 0xB097 ], [ 0xB099, 0xB0B3 ], [ 0xB0B5, 0xB0CF ], [ 0xB0D1, 0xB0EB ], [ 0xB0ED, 0xB107 ], [ 0xB109, 0xB123 ], [ 0xB125, 0xB13F ], [ 0xB141, 0xB15B ], [ 0xB15D, 0xB177 ], [ 0xB179, 0xB193 ], [ 0xB195, 0xB1AF ], [ 0xB1B1, 0xB1CB ], [ 0xB1CD, 0xB1E7 ], [ 0xB1E9, 0xB203 ], [ 0xB205, 0xB21F ], [ 0xB221, 0xB23B ], [ 0xB23D, 0xB257 ], [ 0xB259, 0xB273 ], [ 0xB275, 0xB28F ], [ 0xB291, 0xB2AB ], [ 0xB2AD, 0xB2C7 ], [ 0xB2C9, 0xB2E3 ], [ 0xB2E5, 0xB2FF ], [ 0xB301, 0xB31B ], [ 0xB31D, 0xB337 ], [ 0xB339, 0xB353 ], [ 0xB355, 0xB36F ], [ 0xB371, 0xB38B ], [ 0xB38D, 0xB3A7 ], [ 0xB3A9, 0xB3C3 ], [ 0xB3C5, 0xB3DF ], [ 0xB3E1, 0xB3FB ], [ 0xB3FD, 0xB417 ], [ 0xB419, 0xB433 ], [ 0xB435, 0xB44F ], [ 0xB451, 0xB46B ], [ 0xB46D, 0xB487 ], [ 0xB489, 0xB4A3 ], [ 0xB4A5, 0xB4BF ], [ 0xB4C1, 0xB4DB ], [ 0xB4DD, 0xB4F7 ], [ 0xB4F9, 0xB513 ], [ 0xB515, 0xB52F ], [ 0xB531, 0xB54B ], [ 0xB54D, 0xB567 ], [ 0xB569, 0xB583 ], [ 0xB585, 0xB59F ], [ 0xB5A1, 0xB5BB ], [ 0xB5BD, 0xB5D7 ], [ 0xB5D9, 0xB5F3 ], [ 0xB5F5, 0xB60F ], [ 0xB611, 0xB62B ], [ 0xB62D, 0xB647 ], [ 0xB649, 0xB663 ], [ 0xB665, 0xB67F ], [ 0xB681, 0xB69B ], [ 0xB69D, 0xB6B7 ], [ 0xB6B9, 0xB6D3 ], [ 0xB6D5, 0xB6EF ], [ 0xB6F1, 0xB70B ], [ 0xB70D, 0xB727 ], [ 0xB729, 0xB743 ], [ 0xB745, 0xB75F ], [ 0xB761, 0xB77B ], [ 0xB77D, 0xB797 ], [ 0xB799, 0xB7B3 ], [ 0xB7B5, 0xB7CF ], [ 0xB7D1, 0xB7EB ], [ 0xB7ED, 0xB807 ], [ 0xB809, 0xB823 ], [ 0xB825, 0xB83F ], [ 0xB841, 0xB85B ], [ 0xB85D, 0xB877 ], [ 0xB879, 0xB893 ], [ 0xB895, 0xB8AF ], [ 0xB8B1, 0xB8CB ], [ 0xB8CD, 0xB8E7 ], [ 0xB8E9, 0xB903 ], [ 0xB905, 0xB91F ], [ 0xB921, 0xB93B ], [ 0xB93D, 0xB957 ], [ 0xB959, 0xB973 ], [ 0xB975, 0xB98F ], [ 0xB991, 0xB9AB ], [ 0xB9AD, 0xB9C7 ], [ 0xB9C9, 0xB9E3 ], [ 0xB9E5, 0xB9FF ], [ 0xBA01, 0xBA1B ], [ 0xBA1D, 0xBA37 ], [ 0xBA39, 0xBA53 ], [ 0xBA55, 0xBA6F ], [ 0xBA71, 0xBA8B ], [ 0xBA8D, 0xBAA7 ], [ 0xBAA9, 0xBAC3 ], [ 0xBAC5, 0xBADF ], [ 0xBAE1, 0xBAFB ], [ 0xBAFD, 0xBB17 ], [ 0xBB19, 0xBB33 ], [ 0xBB35, 0xBB4F ], [ 0xBB51, 0xBB6B ], [ 0xBB6D, 0xBB87 ], [ 0xBB89, 0xBBA3 ], [ 0xBBA5, 0xBBBF ], [ 0xBBC1, 0xBBDB ], [ 0xBBDD, 0xBBF7 ], [ 0xBBF9, 0xBC13 ], [ 0xBC15, 0xBC2F ], [ 0xBC31, 0xBC4B ], [ 0xBC4D, 0xBC67 ], [ 0xBC69, 0xBC83 ], [ 0xBC85, 0xBC9F ], [ 0xBCA1, 0xBCBB ], [ 0xBCBD, 0xBCD7 ], [ 0xBCD9, 0xBCF3 ], [ 0xBCF5, 0xBD0F ], [ 0xBD11, 0xBD2B ], [ 0xBD2D, 0xBD47 ], [ 0xBD49, 0xBD63 ], [ 0xBD65, 0xBD7F ], [ 0xBD81, 0xBD9B ], [ 0xBD9D, 0xBDB7 ], [ 0xBDB9, 0xBDD3 ], [ 0xBDD5, 0xBDEF ], [ 0xBDF1, 0xBE0B ], [ 0xBE0D, 0xBE27 ], [ 0xBE29, 0xBE43 ], [ 0xBE45, 0xBE5F ], [ 0xBE61, 0xBE7B ], [ 0xBE7D, 0xBE97 ], [ 0xBE99, 0xBEB3 ], [ 0xBEB5, 0xBECF ], [ 0xBED1, 0xBEEB ], [ 0xBEED, 0xBF07 ], [ 0xBF09, 0xBF23 ], [ 0xBF25, 0xBF3F ], [ 0xBF41, 0xBF5B ], [ 0xBF5D, 0xBF77 ], [ 0xBF79, 0xBF93 ], [ 0xBF95, 0xBFAF ], [ 0xBFB1, 0xBFCB ], [ 0xBFCD, 0xBFE7 ], [ 0xBFE9, 0xC003 ], [ 0xC005, 0xC01F ], [ 0xC021, 0xC03B ], [ 0xC03D, 0xC057 ], [ 0xC059, 0xC073 ], [ 0xC075, 0xC08F ], [ 0xC091, 0xC0AB ], [ 0xC0AD, 0xC0C7 ], [ 0xC0C9, 0xC0E3 ], [ 0xC0E5, 0xC0FF ], [ 0xC101, 0xC11B ], [ 0xC11D, 0xC137 ], [ 0xC139, 0xC153 ], [ 0xC155, 0xC16F ], [ 0xC171, 0xC18B ], [ 0xC18D, 0xC1A7 ], [ 0xC1A9, 0xC1C3 ], [ 0xC1C5, 0xC1DF ], [ 0xC1E1, 0xC1FB ], [ 0xC1FD, 0xC217 ], [ 0xC219, 0xC233 ], [ 0xC235, 0xC24F ], [ 0xC251, 0xC26B ], [ 0xC26D, 0xC287 ], [ 0xC289, 0xC2A3 ], [ 0xC2A5, 0xC2BF ], [ 0xC2C1, 0xC2DB ], [ 0xC2DD, 0xC2F7 ], [ 0xC2F9, 0xC313 ], [ 0xC315, 0xC32F ], [ 0xC331, 0xC34B ], [ 0xC34D, 0xC367 ], [ 0xC369, 0xC383 ], [ 0xC385, 0xC39F ], [ 0xC3A1, 0xC3BB ], [ 0xC3BD, 0xC3D7 ], [ 0xC3D9, 0xC3F3 ], [ 0xC3F5, 0xC40F ], [ 0xC411, 0xC42B ], [ 0xC42D, 0xC447 ], [ 0xC449, 0xC463 ], [ 0xC465, 0xC47F ], [ 0xC481, 0xC49B ], [ 0xC49D, 0xC4B7 ], [ 0xC4B9, 0xC4D3 ], [ 0xC4D5, 0xC4EF ], [ 0xC4F1, 0xC50B ], [ 0xC50D, 0xC527 ], [ 0xC529, 0xC543 ], [ 0xC545, 0xC55F ], [ 0xC561, 0xC57B ], [ 0xC57D, 0xC597 ], [ 0xC599, 0xC5B3 ], [ 0xC5B5, 0xC5CF ], [ 0xC5D1, 0xC5EB ], [ 0xC5ED, 0xC607 ], [ 0xC609, 0xC623 ], [ 0xC625, 0xC63F ], [ 0xC641, 0xC65B ], [ 0xC65D, 0xC677 ], [ 0xC679, 0xC693 ], [ 0xC695, 0xC6AF ], [ 0xC6B1, 0xC6CB ], [ 0xC6CD, 0xC6E7 ], [ 0xC6E9, 0xC703 ], [ 0xC705, 0xC71F ], [ 0xC721, 0xC73B ], [ 0xC73D, 0xC757 ], [ 0xC759, 0xC773 ], [ 0xC775, 0xC78F ], [ 0xC791, 0xC7AB ], [ 0xC7AD, 0xC7C7 ], [ 0xC7C9, 0xC7E3 ], [ 0xC7E5, 0xC7FF ], [ 0xC801, 0xC81B ], [ 0xC81D, 0xC837 ], [ 0xC839, 0xC853 ], [ 0xC855, 0xC86F ], [ 0xC871, 0xC88B ], [ 0xC88D, 0xC8A7 ], [ 0xC8A9, 0xC8C3 ], [ 0xC8C5, 0xC8DF ], [ 0xC8E1, 0xC8FB ], [ 0xC8FD, 0xC917 ], [ 0xC919, 0xC933 ], [ 0xC935, 0xC94F ], [ 0xC951, 0xC96B ], [ 0xC96D, 0xC987 ], [ 0xC989, 0xC9A3 ], [ 0xC9A5, 0xC9BF ], [ 0xC9C1, 0xC9DB ], [ 0xC9DD, 0xC9F7 ], [ 0xC9F9, 0xCA13 ], [ 0xCA15, 0xCA2F ], [ 0xCA31, 0xCA4B ], [ 0xCA4D, 0xCA67 ], [ 0xCA69, 0xCA83 ], [ 0xCA85, 0xCA9F ], [ 0xCAA1, 0xCABB ], [ 0xCABD, 0xCAD7 ], [ 0xCAD9, 0xCAF3 ], [ 0xCAF5, 0xCB0F ], [ 0xCB11, 0xCB2B ], [ 0xCB2D, 0xCB47 ], [ 0xCB49, 0xCB63 ], [ 0xCB65, 0xCB7F ], [ 0xCB81, 0xCB9B ], [ 0xCB9D, 0xCBB7 ], [ 0xCBB9, 0xCBD3 ], [ 0xCBD5, 0xCBEF ], [ 0xCBF1, 0xCC0B ], [ 0xCC0D, 0xCC27 ], [ 0xCC29, 0xCC43 ], [ 0xCC45, 0xCC5F ], [ 0xCC61, 0xCC7B ], [ 0xCC7D, 0xCC97 ], [ 0xCC99, 0xCCB3 ], [ 0xCCB5, 0xCCCF ], [ 0xCCD1, 0xCCEB ], [ 0xCCED, 0xCD07 ], [ 0xCD09, 0xCD23 ], [ 0xCD25, 0xCD3F ], [ 0xCD41, 0xCD5B ], [ 0xCD5D, 0xCD77 ], [ 0xCD79, 0xCD93 ], [ 0xCD95, 0xCDAF ], [ 0xCDB1, 0xCDCB ], [ 0xCDCD, 0xCDE7 ], [ 0xCDE9, 0xCE03 ], [ 0xCE05, 0xCE1F ], [ 0xCE21, 0xCE3B ], [ 0xCE3D, 0xCE57 ], [ 0xCE59, 0xCE73 ], [ 0xCE75, 0xCE8F ], [ 0xCE91, 0xCEAB ], [ 0xCEAD, 0xCEC7 ], [ 0xCEC9, 0xCEE3 ], [ 0xCEE5, 0xCEFF ], [ 0xCF01, 0xCF1B ], [ 0xCF1D, 0xCF37 ], [ 0xCF39, 0xCF53 ], [ 0xCF55, 0xCF6F ], [ 0xCF71, 0xCF8B ], [ 0xCF8D, 0xCFA7 ], [ 0xCFA9, 0xCFC3 ], [ 0xCFC5, 0xCFDF ], [ 0xCFE1, 0xCFFB ], [ 0xCFFD, 0xD017 ], [ 0xD019, 0xD033 ], [ 0xD035, 0xD04F ], [ 0xD051, 0xD06B ], [ 0xD06D, 0xD087 ], [ 0xD089, 0xD0A3 ], [ 0xD0A5, 0xD0BF ], [ 0xD0C1, 0xD0DB ], [ 0xD0DD, 0xD0F7 ], [ 0xD0F9, 0xD113 ], [ 0xD115, 0xD12F ], [ 0xD131, 0xD14B ], [ 0xD14D, 0xD167 ], [ 0xD169, 0xD183 ], [ 0xD185, 0xD19F ], [ 0xD1A1, 0xD1BB ], [ 0xD1BD, 0xD1D7 ], [ 0xD1D9, 0xD1F3 ], [ 0xD1F5, 0xD20F ], [ 0xD211, 0xD22B ], [ 0xD22D, 0xD247 ], [ 0xD249, 0xD263 ], [ 0xD265, 0xD27F ], [ 0xD281, 0xD29B ], [ 0xD29D, 0xD2B7 ], [ 0xD2B9, 0xD2D3 ], [ 0xD2D5, 0xD2EF ], [ 0xD2F1, 0xD30B ], [ 0xD30D, 0xD327 ], [ 0xD329, 0xD343 ], [ 0xD345, 0xD35F ], [ 0xD361, 0xD37B ], [ 0xD37D, 0xD397 ], [ 0xD399, 0xD3B3 ], [ 0xD3B5, 0xD3CF ], [ 0xD3D1, 0xD3EB ], [ 0xD3ED, 0xD407 ], [ 0xD409, 0xD423 ], [ 0xD425, 0xD43F ], [ 0xD441, 0xD45B ], [ 0xD45D, 0xD477 ], [ 0xD479, 0xD493 ], [ 0xD495, 0xD4AF ], [ 0xD4B1, 0xD4CB ], [ 0xD4CD, 0xD4E7 ], [ 0xD4E9, 0xD503 ], [ 0xD505, 0xD51F ], [ 0xD521, 0xD53B ], [ 0xD53D, 0xD557 ], [ 0xD559, 0xD573 ], [ 0xD575, 0xD58F ], [ 0xD591, 0xD5AB ], [ 0xD5AD, 0xD5C7 ], [ 0xD5C9, 0xD5E3 ], [ 0xD5E5, 0xD5FF ], [ 0xD601, 0xD61B ], [ 0xD61D, 0xD637 ], [ 0xD639, 0xD653 ], [ 0xD655, 0xD66F ], [ 0xD671, 0xD68B ], [ 0xD68D, 0xD6A7 ], [ 0xD6A9, 0xD6C3 ], [ 0xD6C5, 0xD6DF ], [ 0xD6E1, 0xD6FB ], [ 0xD6FD, 0xD717 ], [ 0xD719, 0xD733 ], [ 0xD735, 0xD74F ], [ 0xD751, 0xD76B ], [ 0xD76D, 0xD787 ], [ 0xD789, 0xD7A3 ] ], 16 | ZWJ: [ 0x200D ] 17 | }; 18 | -------------------------------------------------------------------------------- /src/generated/unicodejs.wordbreakproperties.js: -------------------------------------------------------------------------------- 1 | // This file is GENERATED by tools/unicodejs-properties.js 2 | // DO NOT EDIT 3 | unicodeJS.wordbreakproperties = { 4 | DoubleQuote: [ 0x0022 ], 5 | SingleQuote: [ 0x0027 ], 6 | HebrewLetter: [ [ 0x05D0, 0x05EA ], [ 0x05EF, 0x05F2 ], 0xFB1D, [ 0xFB1F, 0xFB28 ], [ 0xFB2A, 0xFB36 ], [ 0xFB38, 0xFB3C ], 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44, [ 0xFB46, 0xFB4F ] ], 7 | CR: [ 0x000D ], 8 | LF: [ 0x000A ], 9 | Newline: [ 0x000B, 0x000C, 0x0085, 0x2028, 0x2029 ], 10 | Extend: [ [ 0x0300, 0x036F ], [ 0x0483, 0x0489 ], [ 0x0591, 0x05BD ], 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x05C5, 0x05C7, [ 0x0610, 0x061A ], [ 0x064B, 0x065F ], 0x0670, [ 0x06D6, 0x06DC ], [ 0x06DF, 0x06E4 ], 0x06E7, 0x06E8, [ 0x06EA, 0x06ED ], 0x0711, [ 0x0730, 0x074A ], [ 0x07A6, 0x07B0 ], [ 0x07EB, 0x07F3 ], 0x07FD, [ 0x0816, 0x0819 ], [ 0x081B, 0x0823 ], [ 0x0825, 0x0827 ], [ 0x0829, 0x082D ], [ 0x0859, 0x085B ], [ 0x0898, 0x089F ], [ 0x08CA, 0x08E1 ], [ 0x08E3, 0x0903 ], [ 0x093A, 0x093C ], [ 0x093E, 0x094F ], [ 0x0951, 0x0957 ], 0x0962, 0x0963, [ 0x0981, 0x0983 ], 0x09BC, [ 0x09BE, 0x09C4 ], 0x09C7, 0x09C8, [ 0x09CB, 0x09CD ], 0x09D7, 0x09E2, 0x09E3, 0x09FE, [ 0x0A01, 0x0A03 ], 0x0A3C, [ 0x0A3E, 0x0A42 ], 0x0A47, 0x0A48, [ 0x0A4B, 0x0A4D ], 0x0A51, 0x0A70, 0x0A71, 0x0A75, [ 0x0A81, 0x0A83 ], 0x0ABC, [ 0x0ABE, 0x0AC5 ], [ 0x0AC7, 0x0AC9 ], [ 0x0ACB, 0x0ACD ], 0x0AE2, 0x0AE3, [ 0x0AFA, 0x0AFF ], [ 0x0B01, 0x0B03 ], 0x0B3C, [ 0x0B3E, 0x0B44 ], 0x0B47, 0x0B48, [ 0x0B4B, 0x0B4D ], [ 0x0B55, 0x0B57 ], 0x0B62, 0x0B63, 0x0B82, [ 0x0BBE, 0x0BC2 ], [ 0x0BC6, 0x0BC8 ], [ 0x0BCA, 0x0BCD ], 0x0BD7, [ 0x0C00, 0x0C04 ], 0x0C3C, [ 0x0C3E, 0x0C44 ], [ 0x0C46, 0x0C48 ], [ 0x0C4A, 0x0C4D ], 0x0C55, 0x0C56, 0x0C62, 0x0C63, [ 0x0C81, 0x0C83 ], 0x0CBC, [ 0x0CBE, 0x0CC4 ], [ 0x0CC6, 0x0CC8 ], [ 0x0CCA, 0x0CCD ], 0x0CD5, 0x0CD6, 0x0CE2, 0x0CE3, 0x0CF3, [ 0x0D00, 0x0D03 ], 0x0D3B, 0x0D3C, [ 0x0D3E, 0x0D44 ], [ 0x0D46, 0x0D48 ], [ 0x0D4A, 0x0D4D ], 0x0D57, 0x0D62, 0x0D63, [ 0x0D81, 0x0D83 ], 0x0DCA, [ 0x0DCF, 0x0DD4 ], 0x0DD6, [ 0x0DD8, 0x0DDF ], 0x0DF2, 0x0DF3, 0x0E31, [ 0x0E34, 0x0E3A ], [ 0x0E47, 0x0E4E ], 0x0EB1, [ 0x0EB4, 0x0EBC ], [ 0x0EC8, 0x0ECE ], 0x0F18, 0x0F19, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, [ 0x0F71, 0x0F84 ], 0x0F86, 0x0F87, [ 0x0F8D, 0x0F97 ], [ 0x0F99, 0x0FBC ], 0x0FC6, [ 0x102B, 0x103E ], [ 0x1056, 0x1059 ], [ 0x105E, 0x1060 ], [ 0x1062, 0x1064 ], [ 0x1067, 0x106D ], [ 0x1071, 0x1074 ], [ 0x1082, 0x108D ], 0x108F, [ 0x109A, 0x109D ], [ 0x135D, 0x135F ], [ 0x1712, 0x1715 ], [ 0x1732, 0x1734 ], 0x1752, 0x1753, 0x1772, 0x1773, [ 0x17B4, 0x17D3 ], 0x17DD, [ 0x180B, 0x180D ], 0x180F, 0x1885, 0x1886, 0x18A9, [ 0x1920, 0x192B ], [ 0x1930, 0x193B ], [ 0x1A17, 0x1A1B ], [ 0x1A55, 0x1A5E ], [ 0x1A60, 0x1A7C ], 0x1A7F, [ 0x1AB0, 0x1ACE ], [ 0x1B00, 0x1B04 ], [ 0x1B34, 0x1B44 ], [ 0x1B6B, 0x1B73 ], [ 0x1B80, 0x1B82 ], [ 0x1BA1, 0x1BAD ], [ 0x1BE6, 0x1BF3 ], [ 0x1C24, 0x1C37 ], [ 0x1CD0, 0x1CD2 ], [ 0x1CD4, 0x1CE8 ], 0x1CED, 0x1CF4, [ 0x1CF7, 0x1CF9 ], [ 0x1DC0, 0x1DFF ], 0x200C, [ 0x20D0, 0x20F0 ], [ 0x2CEF, 0x2CF1 ], 0x2D7F, [ 0x2DE0, 0x2DFF ], [ 0x302A, 0x302F ], 0x3099, 0x309A, [ 0xA66F, 0xA672 ], [ 0xA674, 0xA67D ], 0xA69E, 0xA69F, 0xA6F0, 0xA6F1, 0xA802, 0xA806, 0xA80B, [ 0xA823, 0xA827 ], 0xA82C, 0xA880, 0xA881, [ 0xA8B4, 0xA8C5 ], [ 0xA8E0, 0xA8F1 ], 0xA8FF, [ 0xA926, 0xA92D ], [ 0xA947, 0xA953 ], [ 0xA980, 0xA983 ], [ 0xA9B3, 0xA9C0 ], 0xA9E5, [ 0xAA29, 0xAA36 ], 0xAA43, 0xAA4C, 0xAA4D, [ 0xAA7B, 0xAA7D ], 0xAAB0, [ 0xAAB2, 0xAAB4 ], 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, [ 0xAAEB, 0xAAEF ], 0xAAF5, 0xAAF6, [ 0xABE3, 0xABEA ], 0xABEC, 0xABED, 0xFB1E, [ 0xFE00, 0xFE0F ], [ 0xFE20, 0xFE2F ], 0xFF9E, 0xFF9F, 0x101FD, 0x102E0, [ 0x10376, 0x1037A ], [ 0x10A01, 0x10A03 ], 0x10A05, 0x10A06, [ 0x10A0C, 0x10A0F ], [ 0x10A38, 0x10A3A ], 0x10A3F, 0x10AE5, 0x10AE6, [ 0x10D24, 0x10D27 ], 0x10EAB, 0x10EAC, [ 0x10EFD, 0x10EFF ], [ 0x10F46, 0x10F50 ], [ 0x10F82, 0x10F85 ], [ 0x11000, 0x11002 ], [ 0x11038, 0x11046 ], 0x11070, 0x11073, 0x11074, [ 0x1107F, 0x11082 ], [ 0x110B0, 0x110BA ], 0x110C2, [ 0x11100, 0x11102 ], [ 0x11127, 0x11134 ], 0x11145, 0x11146, 0x11173, [ 0x11180, 0x11182 ], [ 0x111B3, 0x111C0 ], [ 0x111C9, 0x111CC ], 0x111CE, 0x111CF, [ 0x1122C, 0x11237 ], 0x1123E, 0x11241, [ 0x112DF, 0x112EA ], [ 0x11300, 0x11303 ], 0x1133B, 0x1133C, [ 0x1133E, 0x11344 ], 0x11347, 0x11348, [ 0x1134B, 0x1134D ], 0x11357, 0x11362, 0x11363, [ 0x11366, 0x1136C ], [ 0x11370, 0x11374 ], [ 0x11435, 0x11446 ], 0x1145E, [ 0x114B0, 0x114C3 ], [ 0x115AF, 0x115B5 ], [ 0x115B8, 0x115C0 ], 0x115DC, 0x115DD, [ 0x11630, 0x11640 ], [ 0x116AB, 0x116B7 ], [ 0x1171D, 0x1172B ], [ 0x1182C, 0x1183A ], [ 0x11930, 0x11935 ], 0x11937, 0x11938, [ 0x1193B, 0x1193E ], 0x11940, 0x11942, 0x11943, [ 0x119D1, 0x119D7 ], [ 0x119DA, 0x119E0 ], 0x119E4, [ 0x11A01, 0x11A0A ], [ 0x11A33, 0x11A39 ], [ 0x11A3B, 0x11A3E ], 0x11A47, [ 0x11A51, 0x11A5B ], [ 0x11A8A, 0x11A99 ], [ 0x11C2F, 0x11C36 ], [ 0x11C38, 0x11C3F ], [ 0x11C92, 0x11CA7 ], [ 0x11CA9, 0x11CB6 ], [ 0x11D31, 0x11D36 ], 0x11D3A, 0x11D3C, 0x11D3D, [ 0x11D3F, 0x11D45 ], 0x11D47, [ 0x11D8A, 0x11D8E ], 0x11D90, 0x11D91, [ 0x11D93, 0x11D97 ], [ 0x11EF3, 0x11EF6 ], 0x11F00, 0x11F01, 0x11F03, [ 0x11F34, 0x11F3A ], [ 0x11F3E, 0x11F42 ], 0x13440, [ 0x13447, 0x13455 ], [ 0x16AF0, 0x16AF4 ], [ 0x16B30, 0x16B36 ], 0x16F4F, [ 0x16F51, 0x16F87 ], [ 0x16F8F, 0x16F92 ], 0x16FE4, 0x16FF0, 0x16FF1, 0x1BC9D, 0x1BC9E, [ 0x1CF00, 0x1CF2D ], [ 0x1CF30, 0x1CF46 ], [ 0x1D165, 0x1D169 ], [ 0x1D16D, 0x1D172 ], [ 0x1D17B, 0x1D182 ], [ 0x1D185, 0x1D18B ], [ 0x1D1AA, 0x1D1AD ], [ 0x1D242, 0x1D244 ], [ 0x1DA00, 0x1DA36 ], [ 0x1DA3B, 0x1DA6C ], 0x1DA75, 0x1DA84, [ 0x1DA9B, 0x1DA9F ], [ 0x1DAA1, 0x1DAAF ], [ 0x1E000, 0x1E006 ], [ 0x1E008, 0x1E018 ], [ 0x1E01B, 0x1E021 ], 0x1E023, 0x1E024, [ 0x1E026, 0x1E02A ], 0x1E08F, [ 0x1E130, 0x1E136 ], 0x1E2AE, [ 0x1E2EC, 0x1E2EF ], [ 0x1E4EC, 0x1E4EF ], [ 0x1E8D0, 0x1E8D6 ], [ 0x1E944, 0x1E94A ], [ 0x1F3FB, 0x1F3FF ], [ 0xE0020, 0xE007F ], [ 0xE0100, 0xE01EF ] ], 11 | RegionalIndicator: [ [ 0x1F1E6, 0x1F1FF ] ], 12 | Format: [ 0x00AD, [ 0x0600, 0x0605 ], 0x061C, 0x06DD, 0x070F, 0x0890, 0x0891, 0x08E2, 0x180E, 0x200E, 0x200F, [ 0x202A, 0x202E ], [ 0x2060, 0x2064 ], [ 0x2066, 0x206F ], 0xFEFF, [ 0xFFF9, 0xFFFB ], 0x110BD, 0x110CD, [ 0x13430, 0x1343F ], [ 0x1BCA0, 0x1BCA3 ], [ 0x1D173, 0x1D17A ], 0xE0001 ], 13 | Katakana: [ [ 0x3031, 0x3035 ], 0x309B, 0x309C, [ 0x30A0, 0x30FA ], [ 0x30FC, 0x30FF ], [ 0x31F0, 0x31FF ], [ 0x32D0, 0x32FE ], [ 0x3300, 0x3357 ], [ 0xFF66, 0xFF9D ], [ 0x1AFF0, 0x1AFF3 ], [ 0x1AFF5, 0x1AFFB ], 0x1AFFD, 0x1AFFE, 0x1B000, [ 0x1B120, 0x1B122 ], 0x1B155, [ 0x1B164, 0x1B167 ] ], 14 | ALetter: [ [ 0x0041, 0x005A ], [ 0x0061, 0x007A ], 0x00AA, 0x00B5, 0x00BA, [ 0x00C0, 0x00D6 ], [ 0x00D8, 0x00F6 ], [ 0x00F8, 0x02D7 ], [ 0x02DE, 0x02FF ], [ 0x0370, 0x0374 ], 0x0376, 0x0377, [ 0x037A, 0x037D ], 0x037F, 0x0386, [ 0x0388, 0x038A ], 0x038C, [ 0x038E, 0x03A1 ], [ 0x03A3, 0x03F5 ], [ 0x03F7, 0x0481 ], [ 0x048A, 0x052F ], [ 0x0531, 0x0556 ], [ 0x0559, 0x055C ], 0x055E, [ 0x0560, 0x0588 ], 0x058A, 0x05F3, [ 0x0620, 0x064A ], 0x066E, 0x066F, [ 0x0671, 0x06D3 ], 0x06D5, 0x06E5, 0x06E6, 0x06EE, 0x06EF, [ 0x06FA, 0x06FC ], 0x06FF, 0x0710, [ 0x0712, 0x072F ], [ 0x074D, 0x07A5 ], 0x07B1, [ 0x07CA, 0x07EA ], 0x07F4, 0x07F5, 0x07FA, [ 0x0800, 0x0815 ], 0x081A, 0x0824, 0x0828, [ 0x0840, 0x0858 ], [ 0x0860, 0x086A ], [ 0x0870, 0x0887 ], [ 0x0889, 0x088E ], [ 0x08A0, 0x08C9 ], [ 0x0904, 0x0939 ], 0x093D, 0x0950, [ 0x0958, 0x0961 ], [ 0x0971, 0x0980 ], [ 0x0985, 0x098C ], 0x098F, 0x0990, [ 0x0993, 0x09A8 ], [ 0x09AA, 0x09B0 ], 0x09B2, [ 0x09B6, 0x09B9 ], 0x09BD, 0x09CE, 0x09DC, 0x09DD, [ 0x09DF, 0x09E1 ], 0x09F0, 0x09F1, 0x09FC, [ 0x0A05, 0x0A0A ], 0x0A0F, 0x0A10, [ 0x0A13, 0x0A28 ], [ 0x0A2A, 0x0A30 ], 0x0A32, 0x0A33, 0x0A35, 0x0A36, 0x0A38, 0x0A39, [ 0x0A59, 0x0A5C ], 0x0A5E, [ 0x0A72, 0x0A74 ], [ 0x0A85, 0x0A8D ], [ 0x0A8F, 0x0A91 ], [ 0x0A93, 0x0AA8 ], [ 0x0AAA, 0x0AB0 ], 0x0AB2, 0x0AB3, [ 0x0AB5, 0x0AB9 ], 0x0ABD, 0x0AD0, 0x0AE0, 0x0AE1, 0x0AF9, [ 0x0B05, 0x0B0C ], 0x0B0F, 0x0B10, [ 0x0B13, 0x0B28 ], [ 0x0B2A, 0x0B30 ], 0x0B32, 0x0B33, [ 0x0B35, 0x0B39 ], 0x0B3D, 0x0B5C, 0x0B5D, [ 0x0B5F, 0x0B61 ], 0x0B71, 0x0B83, [ 0x0B85, 0x0B8A ], [ 0x0B8E, 0x0B90 ], [ 0x0B92, 0x0B95 ], 0x0B99, 0x0B9A, 0x0B9C, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, [ 0x0BA8, 0x0BAA ], [ 0x0BAE, 0x0BB9 ], 0x0BD0, [ 0x0C05, 0x0C0C ], [ 0x0C0E, 0x0C10 ], [ 0x0C12, 0x0C28 ], [ 0x0C2A, 0x0C39 ], 0x0C3D, [ 0x0C58, 0x0C5A ], 0x0C5D, 0x0C60, 0x0C61, 0x0C80, [ 0x0C85, 0x0C8C ], [ 0x0C8E, 0x0C90 ], [ 0x0C92, 0x0CA8 ], [ 0x0CAA, 0x0CB3 ], [ 0x0CB5, 0x0CB9 ], 0x0CBD, 0x0CDD, 0x0CDE, 0x0CE0, 0x0CE1, 0x0CF1, 0x0CF2, [ 0x0D04, 0x0D0C ], [ 0x0D0E, 0x0D10 ], [ 0x0D12, 0x0D3A ], 0x0D3D, 0x0D4E, [ 0x0D54, 0x0D56 ], [ 0x0D5F, 0x0D61 ], [ 0x0D7A, 0x0D7F ], [ 0x0D85, 0x0D96 ], [ 0x0D9A, 0x0DB1 ], [ 0x0DB3, 0x0DBB ], 0x0DBD, [ 0x0DC0, 0x0DC6 ], 0x0F00, [ 0x0F40, 0x0F47 ], [ 0x0F49, 0x0F6C ], [ 0x0F88, 0x0F8C ], [ 0x10A0, 0x10C5 ], 0x10C7, 0x10CD, [ 0x10D0, 0x10FA ], [ 0x10FC, 0x1248 ], [ 0x124A, 0x124D ], [ 0x1250, 0x1256 ], 0x1258, [ 0x125A, 0x125D ], [ 0x1260, 0x1288 ], [ 0x128A, 0x128D ], [ 0x1290, 0x12B0 ], [ 0x12B2, 0x12B5 ], [ 0x12B8, 0x12BE ], 0x12C0, [ 0x12C2, 0x12C5 ], [ 0x12C8, 0x12D6 ], [ 0x12D8, 0x1310 ], [ 0x1312, 0x1315 ], [ 0x1318, 0x135A ], [ 0x1380, 0x138F ], [ 0x13A0, 0x13F5 ], [ 0x13F8, 0x13FD ], [ 0x1401, 0x166C ], [ 0x166F, 0x167F ], [ 0x1681, 0x169A ], [ 0x16A0, 0x16EA ], [ 0x16EE, 0x16F8 ], [ 0x1700, 0x1711 ], [ 0x171F, 0x1731 ], [ 0x1740, 0x1751 ], [ 0x1760, 0x176C ], [ 0x176E, 0x1770 ], [ 0x1820, 0x1878 ], [ 0x1880, 0x1884 ], [ 0x1887, 0x18A8 ], 0x18AA, [ 0x18B0, 0x18F5 ], [ 0x1900, 0x191E ], [ 0x1A00, 0x1A16 ], [ 0x1B05, 0x1B33 ], [ 0x1B45, 0x1B4C ], [ 0x1B83, 0x1BA0 ], 0x1BAE, 0x1BAF, [ 0x1BBA, 0x1BE5 ], [ 0x1C00, 0x1C23 ], [ 0x1C4D, 0x1C4F ], [ 0x1C5A, 0x1C7D ], [ 0x1C80, 0x1C88 ], [ 0x1C90, 0x1CBA ], [ 0x1CBD, 0x1CBF ], [ 0x1CE9, 0x1CEC ], [ 0x1CEE, 0x1CF3 ], 0x1CF5, 0x1CF6, 0x1CFA, [ 0x1D00, 0x1DBF ], [ 0x1E00, 0x1F15 ], [ 0x1F18, 0x1F1D ], [ 0x1F20, 0x1F45 ], [ 0x1F48, 0x1F4D ], [ 0x1F50, 0x1F57 ], 0x1F59, 0x1F5B, 0x1F5D, [ 0x1F5F, 0x1F7D ], [ 0x1F80, 0x1FB4 ], [ 0x1FB6, 0x1FBC ], 0x1FBE, [ 0x1FC2, 0x1FC4 ], [ 0x1FC6, 0x1FCC ], [ 0x1FD0, 0x1FD3 ], [ 0x1FD6, 0x1FDB ], [ 0x1FE0, 0x1FEC ], [ 0x1FF2, 0x1FF4 ], [ 0x1FF6, 0x1FFC ], 0x2071, 0x207F, [ 0x2090, 0x209C ], 0x2102, 0x2107, [ 0x210A, 0x2113 ], 0x2115, [ 0x2119, 0x211D ], 0x2124, 0x2126, 0x2128, [ 0x212A, 0x212D ], [ 0x212F, 0x2139 ], [ 0x213C, 0x213F ], [ 0x2145, 0x2149 ], 0x214E, [ 0x2160, 0x2188 ], [ 0x24B6, 0x24E9 ], [ 0x2C00, 0x2CE4 ], [ 0x2CEB, 0x2CEE ], 0x2CF2, 0x2CF3, [ 0x2D00, 0x2D25 ], 0x2D27, 0x2D2D, [ 0x2D30, 0x2D67 ], 0x2D6F, [ 0x2D80, 0x2D96 ], [ 0x2DA0, 0x2DA6 ], [ 0x2DA8, 0x2DAE ], [ 0x2DB0, 0x2DB6 ], [ 0x2DB8, 0x2DBE ], [ 0x2DC0, 0x2DC6 ], [ 0x2DC8, 0x2DCE ], [ 0x2DD0, 0x2DD6 ], [ 0x2DD8, 0x2DDE ], 0x2E2F, 0x3005, 0x303B, 0x303C, [ 0x3105, 0x312F ], [ 0x3131, 0x318E ], [ 0x31A0, 0x31BF ], [ 0xA000, 0xA48C ], [ 0xA4D0, 0xA4FD ], [ 0xA500, 0xA60C ], [ 0xA610, 0xA61F ], 0xA62A, 0xA62B, [ 0xA640, 0xA66E ], [ 0xA67F, 0xA69D ], [ 0xA6A0, 0xA6EF ], [ 0xA708, 0xA7CA ], 0xA7D0, 0xA7D1, 0xA7D3, [ 0xA7D5, 0xA7D9 ], [ 0xA7F2, 0xA801 ], [ 0xA803, 0xA805 ], [ 0xA807, 0xA80A ], [ 0xA80C, 0xA822 ], [ 0xA840, 0xA873 ], [ 0xA882, 0xA8B3 ], [ 0xA8F2, 0xA8F7 ], 0xA8FB, 0xA8FD, 0xA8FE, [ 0xA90A, 0xA925 ], [ 0xA930, 0xA946 ], [ 0xA960, 0xA97C ], [ 0xA984, 0xA9B2 ], 0xA9CF, [ 0xAA00, 0xAA28 ], [ 0xAA40, 0xAA42 ], [ 0xAA44, 0xAA4B ], [ 0xAAE0, 0xAAEA ], [ 0xAAF2, 0xAAF4 ], [ 0xAB01, 0xAB06 ], [ 0xAB09, 0xAB0E ], [ 0xAB11, 0xAB16 ], [ 0xAB20, 0xAB26 ], [ 0xAB28, 0xAB2E ], [ 0xAB30, 0xAB69 ], [ 0xAB70, 0xABE2 ], [ 0xAC00, 0xD7A3 ], [ 0xD7B0, 0xD7C6 ], [ 0xD7CB, 0xD7FB ], [ 0xFB00, 0xFB06 ], [ 0xFB13, 0xFB17 ], [ 0xFB50, 0xFBB1 ], [ 0xFBD3, 0xFD3D ], [ 0xFD50, 0xFD8F ], [ 0xFD92, 0xFDC7 ], [ 0xFDF0, 0xFDFB ], [ 0xFE70, 0xFE74 ], [ 0xFE76, 0xFEFC ], [ 0xFF21, 0xFF3A ], [ 0xFF41, 0xFF5A ], [ 0xFFA0, 0xFFBE ], [ 0xFFC2, 0xFFC7 ], [ 0xFFCA, 0xFFCF ], [ 0xFFD2, 0xFFD7 ], [ 0xFFDA, 0xFFDC ], [ 0x10000, 0x1000B ], [ 0x1000D, 0x10026 ], [ 0x10028, 0x1003A ], 0x1003C, 0x1003D, [ 0x1003F, 0x1004D ], [ 0x10050, 0x1005D ], [ 0x10080, 0x100FA ], [ 0x10140, 0x10174 ], [ 0x10280, 0x1029C ], [ 0x102A0, 0x102D0 ], [ 0x10300, 0x1031F ], [ 0x1032D, 0x1034A ], [ 0x10350, 0x10375 ], [ 0x10380, 0x1039D ], [ 0x103A0, 0x103C3 ], [ 0x103C8, 0x103CF ], [ 0x103D1, 0x103D5 ], [ 0x10400, 0x1049D ], [ 0x104B0, 0x104D3 ], [ 0x104D8, 0x104FB ], [ 0x10500, 0x10527 ], [ 0x10530, 0x10563 ], [ 0x10570, 0x1057A ], [ 0x1057C, 0x1058A ], [ 0x1058C, 0x10592 ], 0x10594, 0x10595, [ 0x10597, 0x105A1 ], [ 0x105A3, 0x105B1 ], [ 0x105B3, 0x105B9 ], 0x105BB, 0x105BC, [ 0x10600, 0x10736 ], [ 0x10740, 0x10755 ], [ 0x10760, 0x10767 ], [ 0x10780, 0x10785 ], [ 0x10787, 0x107B0 ], [ 0x107B2, 0x107BA ], [ 0x10800, 0x10805 ], 0x10808, [ 0x1080A, 0x10835 ], 0x10837, 0x10838, 0x1083C, [ 0x1083F, 0x10855 ], [ 0x10860, 0x10876 ], [ 0x10880, 0x1089E ], [ 0x108E0, 0x108F2 ], 0x108F4, 0x108F5, [ 0x10900, 0x10915 ], [ 0x10920, 0x10939 ], [ 0x10980, 0x109B7 ], 0x109BE, 0x109BF, 0x10A00, [ 0x10A10, 0x10A13 ], [ 0x10A15, 0x10A17 ], [ 0x10A19, 0x10A35 ], [ 0x10A60, 0x10A7C ], [ 0x10A80, 0x10A9C ], [ 0x10AC0, 0x10AC7 ], [ 0x10AC9, 0x10AE4 ], [ 0x10B00, 0x10B35 ], [ 0x10B40, 0x10B55 ], [ 0x10B60, 0x10B72 ], [ 0x10B80, 0x10B91 ], [ 0x10C00, 0x10C48 ], [ 0x10C80, 0x10CB2 ], [ 0x10CC0, 0x10CF2 ], [ 0x10D00, 0x10D23 ], [ 0x10E80, 0x10EA9 ], 0x10EB0, 0x10EB1, [ 0x10F00, 0x10F1C ], 0x10F27, [ 0x10F30, 0x10F45 ], [ 0x10F70, 0x10F81 ], [ 0x10FB0, 0x10FC4 ], [ 0x10FE0, 0x10FF6 ], [ 0x11003, 0x11037 ], 0x11071, 0x11072, 0x11075, [ 0x11083, 0x110AF ], [ 0x110D0, 0x110E8 ], [ 0x11103, 0x11126 ], 0x11144, 0x11147, [ 0x11150, 0x11172 ], 0x11176, [ 0x11183, 0x111B2 ], [ 0x111C1, 0x111C4 ], 0x111DA, 0x111DC, [ 0x11200, 0x11211 ], [ 0x11213, 0x1122B ], 0x1123F, 0x11240, [ 0x11280, 0x11286 ], 0x11288, [ 0x1128A, 0x1128D ], [ 0x1128F, 0x1129D ], [ 0x1129F, 0x112A8 ], [ 0x112B0, 0x112DE ], [ 0x11305, 0x1130C ], 0x1130F, 0x11310, [ 0x11313, 0x11328 ], [ 0x1132A, 0x11330 ], 0x11332, 0x11333, [ 0x11335, 0x11339 ], 0x1133D, 0x11350, [ 0x1135D, 0x11361 ], [ 0x11400, 0x11434 ], [ 0x11447, 0x1144A ], [ 0x1145F, 0x11461 ], [ 0x11480, 0x114AF ], 0x114C4, 0x114C5, 0x114C7, [ 0x11580, 0x115AE ], [ 0x115D8, 0x115DB ], [ 0x11600, 0x1162F ], 0x11644, [ 0x11680, 0x116AA ], 0x116B8, [ 0x11800, 0x1182B ], [ 0x118A0, 0x118DF ], [ 0x118FF, 0x11906 ], 0x11909, [ 0x1190C, 0x11913 ], 0x11915, 0x11916, [ 0x11918, 0x1192F ], 0x1193F, 0x11941, [ 0x119A0, 0x119A7 ], [ 0x119AA, 0x119D0 ], 0x119E1, 0x119E3, 0x11A00, [ 0x11A0B, 0x11A32 ], 0x11A3A, 0x11A50, [ 0x11A5C, 0x11A89 ], 0x11A9D, [ 0x11AB0, 0x11AF8 ], [ 0x11C00, 0x11C08 ], [ 0x11C0A, 0x11C2E ], 0x11C40, [ 0x11C72, 0x11C8F ], [ 0x11D00, 0x11D06 ], 0x11D08, 0x11D09, [ 0x11D0B, 0x11D30 ], 0x11D46, [ 0x11D60, 0x11D65 ], 0x11D67, 0x11D68, [ 0x11D6A, 0x11D89 ], 0x11D98, [ 0x11EE0, 0x11EF2 ], 0x11F02, [ 0x11F04, 0x11F10 ], [ 0x11F12, 0x11F33 ], 0x11FB0, [ 0x12000, 0x12399 ], [ 0x12400, 0x1246E ], [ 0x12480, 0x12543 ], [ 0x12F90, 0x12FF0 ], [ 0x13000, 0x1342F ], [ 0x13441, 0x13446 ], [ 0x14400, 0x14646 ], [ 0x16800, 0x16A38 ], [ 0x16A40, 0x16A5E ], [ 0x16A70, 0x16ABE ], [ 0x16AD0, 0x16AED ], [ 0x16B00, 0x16B2F ], [ 0x16B40, 0x16B43 ], [ 0x16B63, 0x16B77 ], [ 0x16B7D, 0x16B8F ], [ 0x16E40, 0x16E7F ], [ 0x16F00, 0x16F4A ], 0x16F50, [ 0x16F93, 0x16F9F ], 0x16FE0, 0x16FE1, 0x16FE3, [ 0x1BC00, 0x1BC6A ], [ 0x1BC70, 0x1BC7C ], [ 0x1BC80, 0x1BC88 ], [ 0x1BC90, 0x1BC99 ], [ 0x1D400, 0x1D454 ], [ 0x1D456, 0x1D49C ], 0x1D49E, 0x1D49F, 0x1D4A2, 0x1D4A5, 0x1D4A6, [ 0x1D4A9, 0x1D4AC ], [ 0x1D4AE, 0x1D4B9 ], 0x1D4BB, [ 0x1D4BD, 0x1D4C3 ], [ 0x1D4C5, 0x1D505 ], [ 0x1D507, 0x1D50A ], [ 0x1D50D, 0x1D514 ], [ 0x1D516, 0x1D51C ], [ 0x1D51E, 0x1D539 ], [ 0x1D53B, 0x1D53E ], [ 0x1D540, 0x1D544 ], 0x1D546, [ 0x1D54A, 0x1D550 ], [ 0x1D552, 0x1D6A5 ], [ 0x1D6A8, 0x1D6C0 ], [ 0x1D6C2, 0x1D6DA ], [ 0x1D6DC, 0x1D6FA ], [ 0x1D6FC, 0x1D714 ], [ 0x1D716, 0x1D734 ], [ 0x1D736, 0x1D74E ], [ 0x1D750, 0x1D76E ], [ 0x1D770, 0x1D788 ], [ 0x1D78A, 0x1D7A8 ], [ 0x1D7AA, 0x1D7C2 ], [ 0x1D7C4, 0x1D7CB ], [ 0x1DF00, 0x1DF1E ], [ 0x1DF25, 0x1DF2A ], [ 0x1E030, 0x1E06D ], [ 0x1E100, 0x1E12C ], [ 0x1E137, 0x1E13D ], 0x1E14E, [ 0x1E290, 0x1E2AD ], [ 0x1E2C0, 0x1E2EB ], [ 0x1E4D0, 0x1E4EB ], [ 0x1E7E0, 0x1E7E6 ], [ 0x1E7E8, 0x1E7EB ], 0x1E7ED, 0x1E7EE, [ 0x1E7F0, 0x1E7FE ], [ 0x1E800, 0x1E8C4 ], [ 0x1E900, 0x1E943 ], 0x1E94B, [ 0x1EE00, 0x1EE03 ], [ 0x1EE05, 0x1EE1F ], 0x1EE21, 0x1EE22, 0x1EE24, 0x1EE27, [ 0x1EE29, 0x1EE32 ], [ 0x1EE34, 0x1EE37 ], 0x1EE39, 0x1EE3B, 0x1EE42, 0x1EE47, 0x1EE49, 0x1EE4B, [ 0x1EE4D, 0x1EE4F ], 0x1EE51, 0x1EE52, 0x1EE54, 0x1EE57, 0x1EE59, 0x1EE5B, 0x1EE5D, 0x1EE5F, 0x1EE61, 0x1EE62, 0x1EE64, [ 0x1EE67, 0x1EE6A ], [ 0x1EE6C, 0x1EE72 ], [ 0x1EE74, 0x1EE77 ], [ 0x1EE79, 0x1EE7C ], 0x1EE7E, [ 0x1EE80, 0x1EE89 ], [ 0x1EE8B, 0x1EE9B ], [ 0x1EEA1, 0x1EEA3 ], [ 0x1EEA5, 0x1EEA9 ], [ 0x1EEAB, 0x1EEBB ], [ 0x1F130, 0x1F149 ], [ 0x1F150, 0x1F169 ], [ 0x1F170, 0x1F189 ] ], 15 | MidLetter: [ 0x003A, 0x00B7, 0x0387, 0x055F, 0x05F4, 0x2027, 0xFE13, 0xFE55, 0xFF1A ], 16 | MidNum: [ 0x002C, 0x003B, 0x037E, 0x0589, 0x060C, 0x060D, 0x066C, 0x07F8, 0x2044, 0xFE10, 0xFE14, 0xFE50, 0xFE54, 0xFF0C, 0xFF1B ], 17 | MidNumLet: [ 0x002E, 0x2018, 0x2019, 0x2024, 0xFE52, 0xFF07, 0xFF0E ], 18 | Numeric: [ [ 0x0030, 0x0039 ], [ 0x0660, 0x0669 ], 0x066B, [ 0x06F0, 0x06F9 ], [ 0x07C0, 0x07C9 ], [ 0x0966, 0x096F ], [ 0x09E6, 0x09EF ], [ 0x0A66, 0x0A6F ], [ 0x0AE6, 0x0AEF ], [ 0x0B66, 0x0B6F ], [ 0x0BE6, 0x0BEF ], [ 0x0C66, 0x0C6F ], [ 0x0CE6, 0x0CEF ], [ 0x0D66, 0x0D6F ], [ 0x0DE6, 0x0DEF ], [ 0x0E50, 0x0E59 ], [ 0x0ED0, 0x0ED9 ], [ 0x0F20, 0x0F29 ], [ 0x1040, 0x1049 ], [ 0x1090, 0x1099 ], [ 0x17E0, 0x17E9 ], [ 0x1810, 0x1819 ], [ 0x1946, 0x194F ], [ 0x19D0, 0x19D9 ], [ 0x1A80, 0x1A89 ], [ 0x1A90, 0x1A99 ], [ 0x1B50, 0x1B59 ], [ 0x1BB0, 0x1BB9 ], [ 0x1C40, 0x1C49 ], [ 0x1C50, 0x1C59 ], [ 0xA620, 0xA629 ], [ 0xA8D0, 0xA8D9 ], [ 0xA900, 0xA909 ], [ 0xA9D0, 0xA9D9 ], [ 0xA9F0, 0xA9F9 ], [ 0xAA50, 0xAA59 ], [ 0xABF0, 0xABF9 ], [ 0xFF10, 0xFF19 ], [ 0x104A0, 0x104A9 ], [ 0x10D30, 0x10D39 ], [ 0x11066, 0x1106F ], [ 0x110F0, 0x110F9 ], [ 0x11136, 0x1113F ], [ 0x111D0, 0x111D9 ], [ 0x112F0, 0x112F9 ], [ 0x11450, 0x11459 ], [ 0x114D0, 0x114D9 ], [ 0x11650, 0x11659 ], [ 0x116C0, 0x116C9 ], [ 0x11730, 0x11739 ], [ 0x118E0, 0x118E9 ], [ 0x11950, 0x11959 ], [ 0x11C50, 0x11C59 ], [ 0x11D50, 0x11D59 ], [ 0x11DA0, 0x11DA9 ], [ 0x11F50, 0x11F59 ], [ 0x16A60, 0x16A69 ], [ 0x16AC0, 0x16AC9 ], [ 0x16B50, 0x16B59 ], [ 0x1D7CE, 0x1D7FF ], [ 0x1E140, 0x1E149 ], [ 0x1E2F0, 0x1E2F9 ], [ 0x1E4F0, 0x1E4F9 ], [ 0x1E950, 0x1E959 ], [ 0x1FBF0, 0x1FBF9 ] ], 19 | ExtendNumLet: [ 0x005F, 0x202F, 0x203F, 0x2040, 0x2054, 0xFE33, 0xFE34, [ 0xFE4D, 0xFE4F ], 0xFF3F ], 20 | ZWJ: [ 0x200D ], 21 | WSegSpace: [ 0x0020, 0x1680, [ 0x2000, 0x2006 ], [ 0x2008, 0x200A ], 0x205F, 0x3000 ] 22 | }; 23 | -------------------------------------------------------------------------------- /src/unicodejs.characterclass.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS character classes 3 | * 4 | * Support for unicode equivalents of JS regex character classes 5 | * 6 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 7 | * @license The MIT License (MIT); see LICENSE.txt 8 | */ 9 | ( function () { 10 | /** 11 | * @namespace unicodeJS.characterclass 12 | */ 13 | const basicLatinDigitRange = [ 0x30, 0x39 ], 14 | joinControlRange = [ 0x200C, 0x200D ], 15 | characterclass = unicodeJS.characterclass = {}; 16 | 17 | /** 18 | * @memberof unicodeJS.characterclass 19 | * @property {Object} 20 | */ 21 | characterclass.patterns = { 22 | // \w is defined in http://unicode.org/reports/tr18/ 23 | word: unicodeJS.charRangeArrayRegexp( [].concat( 24 | unicodeJS.derivedcoreproperties.Alphabetic, 25 | unicodeJS.derivedgeneralcategories.M, 26 | [ basicLatinDigitRange ], 27 | unicodeJS.derivedgeneralcategories.Pc, 28 | [ joinControlRange ] 29 | ) ) 30 | }; 31 | }() ); 32 | -------------------------------------------------------------------------------- /src/unicodejs.graphemebreak.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS Grapheme Break module 3 | * 4 | * Implementation of Unicode 15.0.0 Default Grapheme Cluster Boundary Specification 5 | * http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table 6 | * 7 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 8 | * @license The MIT License (MIT); see LICENSE.txt 9 | */ 10 | ( function () { 11 | const properties = unicodeJS.graphemebreakproperties, 12 | emojiProperties = unicodeJS.emojiproperties, 13 | /** 14 | * @namespace unicodeJS.graphemebreak 15 | */ 16 | graphemebreak = unicodeJS.graphemebreak = {}, 17 | patterns = {}; 18 | 19 | // build regexes 20 | for ( const property in properties ) { 21 | // eslint-disable-next-line security/detect-non-literal-regexp 22 | patterns[ property ] = new RegExp( 23 | unicodeJS.charRangeArrayRegexp( properties[ property ] ) 24 | ); 25 | } 26 | for ( const property in emojiProperties ) { 27 | // eslint-disable-next-line security/detect-non-literal-regexp 28 | patterns[ property ] = new RegExp( 29 | unicodeJS.charRangeArrayRegexp( emojiProperties[ property ] ) 30 | ); 31 | } 32 | 33 | function getProperty( codepoint ) { 34 | for ( const property in patterns ) { 35 | if ( patterns[ property ].test( codepoint ) ) { 36 | return property; 37 | } 38 | } 39 | return null; 40 | } 41 | 42 | /** 43 | * Split text into grapheme clusters 44 | * 45 | * @memberof unicodeJS.graphemebreak 46 | * @param {string} text Text to split 47 | * @return {string[]} Split text 48 | */ 49 | graphemebreak.splitClusters = function ( text ) { 50 | return text.split( /(?![\uDC00-\uDFFF])/g ).reduce( ( clusters, codepoint, i, codepoints ) => { 51 | function isBreak() { 52 | const lft = []; 53 | 54 | // Break at the start and end of text, unless the text is empty. 55 | // GB1: sot ÷ Any 56 | // GB2: Any ÷ eot 57 | if ( i === 0 || i === codepoints.length ) { 58 | return true; 59 | } 60 | 61 | lft.push( getProperty( codepoints[ i - 1 ] ) ); 62 | // No rules currently require us to look ahead. 63 | const rgt = getProperty( codepoint ); 64 | 65 | // Do not break between a CR and LF. Otherwise, break before and after controls. 66 | // GB3: CR × LF 67 | if ( lft[ 0 ] === 'CR' && rgt === 'LF' ) { 68 | return false; 69 | } 70 | 71 | // GB4: ( Control | CR | LF ) ÷ 72 | // GB5: ÷ ( Control | CR | LF ) 73 | if ( 74 | [ 'Control', 'CR', 'LF' ].includes( lft[ 0 ] ) || 75 | [ 'Control', 'CR', 'LF' ].includes( rgt ) 76 | ) { 77 | return true; 78 | } 79 | 80 | // Do not break Hangul syllable sequences. 81 | // GB6: L × ( L | V | LV | LVT ) 82 | if ( 83 | lft[ 0 ] === 'L' && 84 | [ 'L', 'V', 'LV', 'LVT' ].includes( rgt ) 85 | ) { 86 | return false; 87 | } 88 | // GB7: ( LV | V ) × ( V | T ) 89 | if ( 90 | [ 'LV', 'V' ].includes( lft[ 0 ] ) && 91 | [ 'V', 'T' ].includes( rgt ) 92 | ) { 93 | return false; 94 | } 95 | // GB8: ( LVT | T ) × T 96 | if ( 97 | [ 'LVT', 'T' ].includes( lft[ 0 ] ) && 98 | rgt === 'T' 99 | ) { 100 | return false; 101 | } 102 | 103 | // Do not break before extending characters or ZWJ. 104 | // GB9 × ( Extend | ZWJ ) 105 | // The GB9a and GB9b rules only apply to extended grapheme clusters: 106 | // Do not break before SpacingMarks, or after Prepend characters. 107 | // GB9a: × SpacingMark 108 | if ( [ 'Extend', 'ZWJ', 'SpacingMark' ].includes( rgt ) ) { 109 | return false; 110 | } 111 | // GB9b: Prepend × 112 | if ( lft[ 0 ] === 'Prepend' ) { 113 | return false; 114 | } 115 | 116 | // Do not break within emoji modifier sequences or emoji zwj sequences. 117 | // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} 118 | let l = 0; 119 | if ( rgt === 'ExtendedPictographic' ) { 120 | if ( lft[ l ] === 'ZWJ' ) { 121 | l++; 122 | lft[ l ] = getProperty( codepoints[ i - 1 - l ] ); 123 | while ( lft[ l ] === 'Extend' ) { 124 | l++; 125 | lft[ l ] = getProperty( codepoints[ i - 1 - l ] ); 126 | } 127 | if ( lft[ l ] === 'ExtendedPictographic' ) { 128 | return false; 129 | } 130 | } 131 | } 132 | 133 | // Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. 134 | // GB12: sot (RI RI)* RI × RI 135 | // GB13: [^RI] (RI RI)* RI × RI 136 | l = 0; 137 | while ( lft[ l ] === 'RegionalIndicator' ) { 138 | l++; 139 | lft[ l ] = getProperty( codepoints[ i - 1 - l ] ); 140 | } 141 | if ( rgt === 'RegionalIndicator' && l % 2 === 1 ) { 142 | return false; 143 | } 144 | // Otherwise, break everywhere. 145 | // GB999: Any ÷ Any 146 | return true; 147 | } 148 | 149 | if ( isBreak() ) { 150 | clusters.push( codepoint ); 151 | } else { 152 | // TODO: This is not covered by tests, is it needed? 153 | // istanbul ignore next 154 | if ( !clusters.length ) { 155 | clusters.push( '' ); 156 | } 157 | clusters[ clusters.length - 1 ] += codepoint; 158 | } 159 | 160 | return clusters; 161 | }, [] ); 162 | }; 163 | }() ); 164 | -------------------------------------------------------------------------------- /src/unicodejs.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS namespace 3 | * 4 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 5 | * @license The MIT License (MIT); see LICENSE.txt 6 | */ 7 | 8 | ( function () { 9 | 10 | /** 11 | * Namespace for all UnicodeJS classes, static methods and static properties. 12 | * 13 | * @namespace unicodeJS 14 | */ 15 | 16 | /** 17 | * Check if a code unit is a the leading half of a surrogate pair 18 | * 19 | * @param {string} unit Code unit 20 | * @return {boolean} 21 | */ 22 | unicodeJS.isLeadingSurrogate = function ( unit ) { 23 | return unit && unit.match( /^[\uD800-\uDBFF]$/ ); 24 | }; 25 | 26 | /** 27 | * Check if a code unit is a the trailing half of a surrogate pair 28 | * 29 | * @param {string} unit Code unit 30 | * @return {boolean} 31 | */ 32 | unicodeJS.isTrailingSurrogate = function ( unit ) { 33 | return unit && unit.match( /^[\uDC00-\uDFFF]$/ ); 34 | }; 35 | 36 | /** 37 | * Write a UTF-16 code unit as a javascript string literal. 38 | * 39 | * @memberof unicodeJS 40 | * @private 41 | * @param {number} codeUnit integer between 0x0000 and 0xFFFF 42 | * @return {string} String literal ('\u' followed by 4 hex digits) 43 | */ 44 | function uEsc( codeUnit ) { 45 | return '\\u' + ( codeUnit + 0x10000 ).toString( 16 ).slice( -4 ); 46 | } 47 | 48 | /** 49 | * Return a regexp string for the code unit range min-max 50 | * 51 | * @memberof unicodeJS 52 | * @private 53 | * @param {number} min the minimum code unit in the range. 54 | * @param {number} max the maximum code unit in the range. 55 | * @param {boolean} [bracket] If true, then wrap range in [ ... ] 56 | * @return {string} Regexp string which matches the range 57 | */ 58 | function codeUnitRange( min, max, bracket ) { 59 | if ( min === max ) { // single code unit: never bracket 60 | return uEsc( min ); 61 | } 62 | const value = uEsc( min ) + '-' + uEsc( max ); 63 | if ( bracket ) { 64 | return '[' + value + ']'; 65 | } else { 66 | return value; 67 | } 68 | } 69 | 70 | /** 71 | * Get a list of boxes in hi-lo surrogate space, corresponding to the given character range 72 | * 73 | * A box {hi: [x, y], lo: [z, w]} represents a regex [x-y][z-w] to match a surrogate pair 74 | * 75 | * Suppose ch1 and ch2 have surrogate pairs (hi1, lo1) and (hi2, lo2). 76 | * Then the range of chars from ch1 to ch2 can be represented as the 77 | * disjunction of three code unit ranges: 78 | * 79 | * [hi1 - hi1][lo1 - 0xDFFF] 80 | * | 81 | * [hi1+1 - hi2-1][0xDC00 - 0xDFFF] 82 | * | 83 | * [hi2 - hi2][0xD800 - lo2] 84 | * 85 | * Often the notation can be optimised (e.g. when hi1 == hi2). 86 | * 87 | * @memberof unicodeJS 88 | * @private 89 | * @param {number} ch1 The min character of the range; must be over 0xFFFF 90 | * @param {number} ch2 The max character of the range; must be at least ch1 91 | * @return {Array.} A list of boxes where each box is an object with two properties: 'hi' and 'lo'. 92 | * 'hi' is an array of two numbers representing the range of the high surrogate. 93 | * 'lo' is an array of two numbers representing the range of the low surrogate. 94 | */ 95 | function getCodeUnitBoxes( ch1, ch2 ) { 96 | 97 | const loMin = 0xDC00; 98 | const loMax = 0xDFFF; 99 | 100 | // hi and lo surrogates for ch1 101 | /* eslint-disable no-bitwise */ 102 | const hi1 = 0xD800 + ( ( ch1 - 0x10000 ) >> 10 ); 103 | const lo1 = 0xDC00 + ( ( ch1 - 0x10000 ) & 0x3FF ); 104 | 105 | // hi and lo surrogates for ch2 106 | const hi2 = 0xD800 + ( ( ch2 - 0x10000 ) >> 10 ); 107 | const lo2 = 0xDC00 + ( ( ch2 - 0x10000 ) & 0x3FF ); 108 | /* eslint-enable no-bitwise */ 109 | 110 | if ( hi1 === hi2 ) { 111 | return [ { hi: [ hi1, hi2 ], lo: [ lo1, lo2 ] } ]; 112 | } 113 | 114 | const boxes = []; 115 | 116 | /* eslint-disable no-bitwise */ 117 | 118 | // minimum hi surrogate which only represents characters >= ch1 119 | const hiMinAbove = 0xD800 + ( ( ch1 - 0x10000 + 0x3FF ) >> 10 ); 120 | // maximum hi surrogate which only represents characters <= ch2 121 | const hiMaxBelow = 0xD800 + ( ( ch2 - 0x10000 - 0x3FF ) >> 10 ); 122 | /* eslint-enable no-bitwise */ 123 | 124 | if ( hi1 < hiMinAbove ) { 125 | boxes.push( { hi: [ hi1, hi1 ], lo: [ lo1, loMax ] } ); 126 | } 127 | if ( hiMinAbove <= hiMaxBelow ) { 128 | boxes.push( { hi: [ hiMinAbove, hiMaxBelow ], lo: [ loMin, loMax ] } ); 129 | } 130 | if ( hiMaxBelow < hi2 ) { 131 | boxes.push( { hi: [ hi2, hi2 ], lo: [ loMin, lo2 ] } ); 132 | } 133 | return boxes; 134 | } 135 | 136 | /** 137 | * Make a regexp string for an array of Unicode character ranges. 138 | * 139 | * If either character in a range is above 0xFFFF, then the range will 140 | * be encoded as multiple surrogate pair ranges. It is an error for a 141 | * range to overlap with the surrogate range 0xD800-0xDFFF (as this would 142 | * only match ill-formed strings). 143 | * 144 | * @param {Array} ranges Array of ranges, each of which is a character or an interval 145 | * @return {string} Regexp string for the disjunction of the ranges. 146 | */ 147 | unicodeJS.charRangeArrayRegexp = function ( ranges ) { 148 | let boxes = []; 149 | const characterClass = [], // list of (\uXXXX code unit or interval), for BMP 150 | disjunction = []; // list of regex strings, to be joined with '|' 151 | 152 | ranges.forEach( ( range ) => { 153 | // Handle single code unit 154 | if ( typeof range === 'number' ) { 155 | if ( range <= 0xFFFF ) { 156 | if ( range >= 0xD800 && range <= 0xDFFF ) { 157 | throw new Error( 'Surrogate: ' + range.toString( 16 ) ); 158 | } 159 | characterClass.push( uEsc( range ) ); 160 | return; 161 | } else { 162 | // Handle single surrogate pair 163 | if ( range > 0x10FFFF ) { 164 | throw new Error( 'Character code too high: ' + range.toString( 16 ) ); 165 | } 166 | /* eslint-disable no-bitwise */ 167 | const hi = 0xD800 + ( ( range - 0x10000 ) >> 10 ); 168 | const lo = 0xDC00 + ( ( range - 0x10000 ) & 0x3FF ); 169 | /* eslint-enable no-bitwise */ 170 | 171 | disjunction.push( uEsc( hi ) + uEsc( lo ) ); 172 | return; 173 | } 174 | } 175 | 176 | // Handle interval 177 | const min = range[ 0 ]; 178 | const max = range[ 1 ]; 179 | if ( min > max ) { 180 | throw new Error( min.toString( 16 ) + ' > ' + max.toString( 16 ) ); 181 | } 182 | if ( max > 0x10FFFF ) { 183 | throw new Error( 'Character code too high: ' + 184 | max.toString( 16 ) ); 185 | } 186 | if ( max >= 0xD800 && min <= 0xDFFF ) { 187 | throw new Error( 'range includes surrogates: ' + 188 | min.toString( 16 ) + '-' + max.toString( 16 ) ); 189 | } 190 | if ( max <= 0xFFFF ) { 191 | // interval is entirely BMP 192 | characterClass.push( codeUnitRange( min, max ) ); 193 | } else if ( min <= 0xFFFF ) { 194 | // interval is BMP and non-BMP 195 | characterClass.push( codeUnitRange( min, 0xFFFF ) ); 196 | boxes = getCodeUnitBoxes( 0x10000, max ); 197 | } else { 198 | // interval is entirely non-BMP 199 | boxes = getCodeUnitBoxes( min, max ); 200 | } 201 | 202 | // append hi-lo surrogate space boxes as code unit range pairs 203 | boxes.forEach( ( box ) => { 204 | const hi2 = codeUnitRange( box.hi[ 0 ], box.hi[ 1 ], true ); 205 | const lo2 = codeUnitRange( box.lo[ 0 ], box.lo[ 1 ], true ); 206 | disjunction.push( hi2 + lo2 ); 207 | } ); 208 | } ); 209 | 210 | // prepend BMP character class to the disjunction 211 | if ( characterClass.length === 1 && !characterClass[ 0 ].match( /-/ ) ) { 212 | disjunction.unshift( characterClass[ 0 ] ); // single character 213 | } else if ( characterClass.length > 0 ) { 214 | disjunction.unshift( '[' + characterClass.join( '' ) + ']' ); 215 | } 216 | return disjunction.join( '|' ); 217 | }; 218 | }() ); 219 | -------------------------------------------------------------------------------- /src/unicodejs.textstring.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS TextString class. 3 | * 4 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 5 | * @license The MIT License (MIT); see LICENSE.txt 6 | */ 7 | 8 | /** 9 | * This class provides a simple interface to fetching plain text 10 | * from a data source. The base class reads data from a string, but 11 | * an extended class could provide access to a more complex structure, 12 | * e.g. an array or an HTML document tree. 13 | * 14 | * @class unicodeJS.TextString 15 | * @constructor 16 | * @param {string} text Text 17 | */ 18 | unicodeJS.TextString = function UnicodeJSTextString( text ) { 19 | this.text = text; 20 | }; 21 | 22 | /* Methods */ 23 | 24 | /** 25 | * Read code unit at specified position 26 | * 27 | * @method 28 | * @param {number} position Position to read from 29 | * @return {string|null} Code unit, or null if out of bounds 30 | */ 31 | unicodeJS.TextString.prototype.read = function ( position ) { 32 | const dataAt = this.text[ position ]; 33 | return dataAt !== undefined ? dataAt : null; 34 | }; 35 | 36 | /** 37 | * Read unicode codepoint after the specified offset 38 | * 39 | * This is the same as the code unit (=Javascript character) at that offset, 40 | * unless a valid surrogate pair ends at that code unit. (This is consistent 41 | * with the behaviour of String.prototype.codePointAt) 42 | * 43 | * @param {number} position Position 44 | * @return {string|null} Unicode codepoint, or null if out of bounds 45 | */ 46 | unicodeJS.TextString.prototype.nextCodepoint = function ( position ) { 47 | const codeUnit = this.read( position ); 48 | 49 | if ( unicodeJS.isLeadingSurrogate( codeUnit ) ) { 50 | const nextCodeUnit = this.read( position + 1 ); 51 | if ( unicodeJS.isTrailingSurrogate( nextCodeUnit ) ) { 52 | return codeUnit + nextCodeUnit; 53 | } 54 | } 55 | return codeUnit; 56 | }; 57 | 58 | /** 59 | * Read unicode codepoint before the specified offset 60 | * 61 | * This is the same as the code unit (=Javascript character) at the previous 62 | * offset, unless a valid surrogate pair ends at that offset. 63 | * 64 | * @param {number} position Position 65 | * @return {string|null} Unicode codepoint, or null if out of bounds 66 | */ 67 | unicodeJS.TextString.prototype.prevCodepoint = function ( position ) { 68 | const codeUnit = this.read( position - 1 ); 69 | 70 | if ( unicodeJS.isTrailingSurrogate( codeUnit ) ) { 71 | const prevCodeUnit = this.read( position - 2 ); 72 | if ( unicodeJS.isLeadingSurrogate( prevCodeUnit ) ) { 73 | return prevCodeUnit + codeUnit; 74 | } 75 | } 76 | return codeUnit; 77 | }; 78 | 79 | /** 80 | * Check if the current offset is in the middle of a surrogate pair 81 | * 82 | * @param {number} position Position 83 | * @return {boolean} 84 | */ 85 | unicodeJS.TextString.prototype.isMidSurrogate = function ( position ) { 86 | return unicodeJS.isLeadingSurrogate( this.read( position - 1 ) ) && 87 | unicodeJS.isTrailingSurrogate( this.read( position ) ); 88 | }; 89 | 90 | /** 91 | * Get as a plain string 92 | * 93 | * @return {string} Plain javascript string 94 | */ 95 | unicodeJS.TextString.prototype.toString = function () { 96 | return this.text; 97 | }; 98 | -------------------------------------------------------------------------------- /src/unicodejs.wordbreak.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS Word Break module 3 | * 4 | * Implementation of Unicode 15.0.0 Default Word Boundary Specification 5 | * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries 6 | * 7 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 8 | * @license The MIT License (MIT); see LICENSE.txt 9 | */ 10 | 11 | /* eslint-disable no-fallthrough */ 12 | 13 | ( function () { 14 | const properties = unicodeJS.wordbreakproperties, 15 | emojiProperties = unicodeJS.emojiproperties, 16 | /** 17 | * @namespace unicodeJS.wordbreak 18 | */ 19 | wordbreak = unicodeJS.wordbreak = {}, 20 | patterns = {}, 21 | ZWJ_FE = /^(Format|Extend|ZWJ)$/; 22 | 23 | // build regexes 24 | for ( const property in properties ) { 25 | // eslint-disable-next-line security/detect-non-literal-regexp 26 | patterns[ property ] = new RegExp( 27 | unicodeJS.charRangeArrayRegexp( properties[ property ] ) 28 | ); 29 | } 30 | for ( const property in emojiProperties ) { 31 | // eslint-disable-next-line security/detect-non-literal-regexp 32 | patterns[ property ] = new RegExp( 33 | unicodeJS.charRangeArrayRegexp( emojiProperties[ property ] ) 34 | ); 35 | } 36 | 37 | /** 38 | * Return the wordbreak property value for the codepoint 39 | * 40 | * See http://www.unicode.org/reports/tr29/#Word_Boundaries 41 | * 42 | * @memberof unicodeJS.wordbreak 43 | * @private 44 | * @param {string} codepoint The codepoint 45 | * @return {string|null} The unicode wordbreak property value (key of unicodeJS.wordbreakproperties) 46 | */ 47 | function getProperty( codepoint ) { 48 | for ( const property in patterns ) { 49 | if ( patterns[ property ].test( codepoint ) ) { 50 | return property; 51 | } 52 | } 53 | return null; 54 | } 55 | 56 | /** 57 | * Find the next word break offset. 58 | * 59 | * @memberof unicodeJS.wordbreak 60 | * @param {unicodeJS.TextString} string TextString 61 | * @param {number} pos Character position 62 | * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric 63 | * @return {number} Returns the next offset which is a word break 64 | */ 65 | wordbreak.nextBreakOffset = function ( string, pos, onlyAlphaNumeric ) { 66 | return this.moveBreakOffset( 1, string, pos, onlyAlphaNumeric ); 67 | }; 68 | 69 | /** 70 | * Find the previous word break offset. 71 | * 72 | * @memberof unicodeJS.wordbreak 73 | * @param {unicodeJS.TextString} string TextString 74 | * @param {number} pos Character position 75 | * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric 76 | * @return {number} Returns the previous offset which is a word break 77 | */ 78 | wordbreak.prevBreakOffset = function ( string, pos, onlyAlphaNumeric ) { 79 | return this.moveBreakOffset( -1, string, pos, onlyAlphaNumeric ); 80 | }; 81 | 82 | /** 83 | * Find the next word break offset in a specified direction. 84 | * 85 | * @memberof unicodeJS.wordbreak 86 | * @param {number} direction Direction to search in, should be plus or minus one 87 | * @param {unicodeJS.TextString} string TextString 88 | * @param {number} pos Character position 89 | * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric 90 | * @return {number} Returns the previous offset which is word break 91 | */ 92 | wordbreak.moveBreakOffset = function ( direction, string, pos, onlyAlphaNumeric ) { 93 | // when moving backwards, use the character to the left of the cursor 94 | const nextCodepoint = direction > 0 ? string.nextCodepoint.bind( string ) : string.prevCodepoint.bind( string ), 95 | prevCodepoint = direction > 0 ? string.prevCodepoint.bind( string ) : string.nextCodepoint.bind( string ); 96 | 97 | let codepoint; 98 | // Search for the next break point 99 | while ( ( codepoint = nextCodepoint( pos ) ) !== null ) { 100 | pos += codepoint.length * direction; 101 | if ( this.isBreak( string, pos ) ) { 102 | // Check previous character was alpha-numeric if required 103 | if ( onlyAlphaNumeric ) { 104 | const lastProperty = getProperty( prevCodepoint( pos ) ); 105 | if ( lastProperty !== 'ALetter' && 106 | lastProperty !== 'Numeric' && 107 | lastProperty !== 'Katakana' && 108 | lastProperty !== 'HebrewLetter' ) { 109 | continue; 110 | } 111 | } 112 | break; 113 | } 114 | } 115 | return pos; 116 | }; 117 | 118 | /** 119 | * Evaluates whether a position within some text is a word boundary. 120 | * 121 | * The text object elements may be codepoints or code units 122 | * 123 | * @memberof unicodeJS.wordbreak 124 | * @param {unicodeJS.TextString} string TextString 125 | * @param {number} pos Character position 126 | * @return {boolean} Is the position a word boundary 127 | */ 128 | wordbreak.isBreak = function ( string, pos ) { 129 | const lft = [], rgt = []; 130 | let l = 0, r = 0; 131 | 132 | // Table 3a. Word_Break Rule Macros 133 | // Macro Represents 134 | // AHLetter (ALetter | Hebrew_Letter) 135 | // MidNumLetQ (MidNumLet | Single_Quote) 136 | 137 | // Get some context 138 | let nextCodepoint = string.nextCodepoint( pos + r ); 139 | let prevCodepoint = string.prevCodepoint( pos - l ); 140 | 141 | // Break at the start and end of text, unless the text is empty. 142 | // WB1: sot ÷ Any 143 | // WB2: Any ÷ eot 144 | if ( nextCodepoint === null || prevCodepoint === null ) { 145 | return true; 146 | } 147 | 148 | // Do not break inside surrogate pair 149 | if ( string.isMidSurrogate( pos ) ) { 150 | return false; 151 | } 152 | 153 | // Store context 154 | rgt.push( getProperty( nextCodepoint ) ); 155 | lft.push( getProperty( prevCodepoint ) ); 156 | r += nextCodepoint.length; 157 | l += prevCodepoint.length; 158 | 159 | switch ( true ) { 160 | // Do not break within CRLF. 161 | // WB3: CR × LF 162 | case lft[ 0 ] === 'CR' && rgt[ 0 ] === 'LF': 163 | return false; 164 | 165 | // Otherwise break before and after Newlines (including CR and LF) 166 | // WB3a: (Newline | CR | LF) ÷ 167 | case lft[ 0 ] === 'Newline' || lft[ 0 ] === 'CR' || lft[ 0 ] === 'LF': 168 | // WB3b: ÷ (Newline | CR | LF) 169 | case rgt[ 0 ] === 'Newline' || rgt[ 0 ] === 'CR' || rgt[ 0 ] === 'LF': 170 | return true; 171 | // Do not break within emoji zwj sequences. 172 | // WB3c: ZWJ × \p{Extended_Pictographic} 173 | case lft[ 0 ] === 'ZWJ' && rgt[ 0 ] === 'ExtendedPictographic': 174 | return false; 175 | // Do not break within emoji zwj sequences. 176 | // WB3d: Keep horizontal whitespace together. 177 | case lft[ 0 ] === 'WSegSpace' && rgt[ 0 ] === 'WSegSpace': 178 | return false; 179 | } 180 | 181 | // Ignore Format and Extend characters, except after sot, CR, LF, and Newline. 182 | // (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend | ZWJ) 183 | // WB4: X (Extend | Format | ZWJ)* → X 184 | if ( rgt[ 0 ] && rgt[ 0 ].match( ZWJ_FE ) ) { 185 | // The Extend|Format character is to the right, so it is attached 186 | // to a character to the left, don't split here 187 | return false; 188 | } 189 | // We've reached the end of an ZWJ_FE sequence, collapse it 190 | while ( lft[ 0 ] && lft[ 0 ].match( ZWJ_FE ) ) { 191 | if ( pos - l <= 0 ) { 192 | // start of document 193 | return true; 194 | } 195 | prevCodepoint = string.prevCodepoint( pos - l ); 196 | // TODO: This is not covered by tests, see T264904 197 | // istanbul ignore next 198 | if ( prevCodepoint === null ) { 199 | // start of document? 200 | return true; 201 | } 202 | lft[ 0 ] = getProperty( prevCodepoint ); 203 | l += prevCodepoint.length; 204 | } 205 | 206 | // Do not break between most letters. 207 | // WB5: AHLetter × AHLetter 208 | if ( 209 | ( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' ) && 210 | ( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' ) 211 | ) { 212 | return false; 213 | } 214 | 215 | let nextProperty; 216 | // Some tests beyond this point require more context, as per WB4 ignore ZWJ_FE. 217 | do { 218 | nextCodepoint = string.nextCodepoint( pos + r ); 219 | if ( nextCodepoint === null ) { 220 | nextProperty = null; 221 | break; 222 | } 223 | r += nextCodepoint.length; 224 | nextProperty = getProperty( nextCodepoint ); 225 | } while ( nextProperty && nextProperty.match( ZWJ_FE ) ); 226 | rgt.push( nextProperty ); 227 | 228 | let prevProperty; 229 | do { 230 | prevCodepoint = string.prevCodepoint( pos - l ); 231 | if ( prevCodepoint === null ) { 232 | prevProperty = null; 233 | break; 234 | } 235 | l += prevCodepoint.length; 236 | prevProperty = getProperty( prevCodepoint ); 237 | } while ( prevProperty && prevProperty.match( ZWJ_FE ) ); 238 | lft.push( prevProperty ); 239 | 240 | switch ( true ) { 241 | // Do not break letters across certain punctuation. 242 | // WB6: AHLetter × (MidLetter | MidNumLetQ) AHLetter 243 | case ( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' ) && 244 | ( rgt[ 1 ] === 'ALetter' || rgt[ 1 ] === 'HebrewLetter' ) && 245 | ( rgt[ 0 ] === 'MidLetter' || rgt[ 0 ] === 'MidNumLet' || rgt[ 0 ] === 'SingleQuote' ): 246 | // WB7: AHLetter (MidLetter | MidNumLetQ) × AHLetter 247 | case ( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' ) && 248 | ( lft[ 1 ] === 'ALetter' || lft[ 1 ] === 'HebrewLetter' ) && 249 | ( lft[ 0 ] === 'MidLetter' || lft[ 0 ] === 'MidNumLet' || lft[ 0 ] === 'SingleQuote' ): 250 | // WB7a: Hebrew_Letter × Single_Quote 251 | case lft[ 0 ] === 'HebrewLetter' && rgt[ 0 ] === 'SingleQuote': 252 | // WB7b: Hebrew_Letter × Double_Quote Hebrew_Letter 253 | case lft[ 0 ] === 'HebrewLetter' && rgt[ 0 ] === 'DoubleQuote' && rgt[ 1 ] === 'HebrewLetter': 254 | // WB7c: Hebrew_Letter Double_Quote × Hebrew_Letter 255 | case lft[ 1 ] === 'HebrewLetter' && lft[ 0 ] === 'DoubleQuote' && rgt[ 0 ] === 'HebrewLetter': 256 | 257 | // Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”). 258 | // WB8: Numeric × Numeric 259 | case lft[ 0 ] === 'Numeric' && rgt[ 0 ] === 'Numeric': 260 | // WB9: AHLetter × Numeric 261 | case ( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' ) && rgt[ 0 ] === 'Numeric': 262 | // WB10: Numeric × AHLetter 263 | case lft[ 0 ] === 'Numeric' && ( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' ): 264 | return false; 265 | 266 | // Do not break within sequences, such as “3.2” or “3,456.789”. 267 | // WB11: Numeric (MidNum | MidNumLetQ) × Numeric 268 | case rgt[ 0 ] === 'Numeric' && lft[ 1 ] === 'Numeric' && 269 | ( lft[ 0 ] === 'MidNum' || lft[ 0 ] === 'MidNumLet' || lft[ 0 ] === 'SingleQuote' ): 270 | // WB12: Numeric × (MidNum | MidNumLetQ) Numeric 271 | case lft[ 0 ] === 'Numeric' && rgt[ 1 ] === 'Numeric' && 272 | ( rgt[ 0 ] === 'MidNum' || rgt[ 0 ] === 'MidNumLet' || rgt[ 0 ] === 'SingleQuote' ): 273 | return false; 274 | 275 | // Do not break between Katakana. 276 | // WB13: Katakana × Katakana 277 | case lft[ 0 ] === 'Katakana' && rgt[ 0 ] === 'Katakana': 278 | return false; 279 | 280 | // Do not break from extenders. 281 | // WB13a: (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet 282 | case rgt[ 0 ] === 'ExtendNumLet' && 283 | ( lft[ 0 ] === 'ALetter' || lft[ 0 ] === 'HebrewLetter' || lft[ 0 ] === 'Numeric' || lft[ 0 ] === 'Katakana' || lft[ 0 ] === 'ExtendNumLet' ): 284 | // WB13b: ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) 285 | case lft[ 0 ] === 'ExtendNumLet' && 286 | ( rgt[ 0 ] === 'ALetter' || rgt[ 0 ] === 'HebrewLetter' || rgt[ 0 ] === 'Numeric' || rgt[ 0 ] === 'Katakana' ): 287 | return false; 288 | } 289 | 290 | // Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. 291 | // WB15: ^ (RI RI)* RI × RI 292 | // WB16: [^RI] (RI RI)* RI × RI 293 | if ( lft[ 0 ] === 'RegionalIndicator' && rgt[ 0 ] === 'RegionalIndicator' ) { 294 | // Count RIs on the left 295 | let regional = 0; 296 | let n = 0; 297 | 298 | do { 299 | prevCodepoint = string.prevCodepoint( pos - n ); 300 | if ( prevCodepoint === null ) { 301 | break; 302 | } 303 | n += prevCodepoint.length; 304 | prevProperty = getProperty( prevCodepoint ); 305 | if ( prevProperty === 'RegionalIndicator' ) { 306 | regional++; 307 | } 308 | } while ( prevProperty === 'RegionalIndicator' || ( prevProperty && prevProperty.match( ZWJ_FE ) ) ); 309 | if ( regional % 2 === 1 ) { 310 | return false; 311 | } 312 | 313 | } 314 | // Otherwise, break everywhere (including around ideographs). 315 | // WB999: Any ÷ Any 316 | return true; 317 | }; 318 | }() ); 319 | -------------------------------------------------------------------------------- /tests/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "extends": [ 4 | "wikimedia/common", 5 | "wikimedia/language/es2019", 6 | "wikimedia/qunit" 7 | ], 8 | "globals": { 9 | "unicodeJS": "readonly" 10 | }, 11 | "plugins": [ "html" ], 12 | "settings": { 13 | "html/indent": "+tab", 14 | "html/report-bad-indent": "error" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /tests/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | UnicodeJS Tests 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
Loading QUnit. Ensure you have run npm install.
14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /tests/unicodejs.characterclass.test.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS character class module tests 3 | * 4 | * @copyright 2015– UnicodeJS team and others; see AUTHORS.txt 5 | * @license The MIT License (MIT); see LICENSE.txt 6 | */ 7 | 8 | QUnit.module( 'unicodeJS.characterclass' ); 9 | 10 | QUnit.test( 'patterns', ( assert ) => { 11 | // eslint-disable-next-line security/detect-non-literal-regexp 12 | const wordGlobalRegex = new RegExp( unicodeJS.characterclass.patterns.word, 'g' ); 13 | 14 | const wordChars = [ 15 | // Basic Latin letter 16 | 'a', 17 | // Basic Latin number 18 | '1', 19 | // Underscore (Punctuation, connector) 20 | '_', 21 | // Latin-1 Supplement letter 22 | 'é', 23 | // Latin Extended-A letter 24 | 'ŵ', 25 | // Mark (combining accent) 26 | 'x', '\u0301', 27 | // Han character 28 | '中', 29 | // Full-width punctuation connector 30 | '︴', 31 | // SMP Han character U+282E2 32 | '𨋢', 33 | // Combining mark and ZWNJ 34 | 'क', '\u094d', '\u200C', 'ष', 35 | // SMP math letter U+1D538 36 | '𝔸' 37 | ]; 38 | 39 | const nonWordChars = [ 40 | // Basic Latin punctuation 41 | '$', '-', '.', '!', ' ', '"', '\'', '(', ')', '[', ']', '{', '}', 42 | // Other punctuation 43 | '“', '”', '‘', '’', '「', '」', 44 | // Space 45 | ' ', '\r', '\n', '\t', '\u3000', 46 | // Non-Basic-Latin digit U+0668 and U+1D7E0 47 | '٨', 48 | // SMP emoticon U+1F607 and digit U+1D7E0 49 | '😇', '𝟠' 50 | ]; 51 | 52 | assert.deepEqual( wordChars.join( '' ).match( wordGlobalRegex ), wordChars ); 53 | assert.strictEqual( nonWordChars.join( '' ).match( wordGlobalRegex ), null ); 54 | } ); 55 | -------------------------------------------------------------------------------- /tests/unicodejs.graphemebreak.test.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS Grapheme Break module tests 3 | * 4 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 5 | * @license The MIT License (MIT); see LICENSE.txt 6 | */ 7 | 8 | QUnit.module( 'unicodeJS.graphemebreak' ); 9 | 10 | QUnit.test( 'Unicode test suite', ( assert ) => { 11 | unicodeJS.testdata.graphemebreak.reduce( unicodeJS.test.parseTestReduce, [] ) 12 | .forEach( ( test ) => { 13 | const expected = test.expected; 14 | const clusters = unicodeJS.graphemebreak.splitClusters( test.string ); 15 | const result = clusters.flatMap( ( cluster ) => [ 16 | // Push cluster.length-1 false's (no breaks) for each cluster 17 | ...Array( cluster.length - 1 ).fill( false ), 18 | // Expect break after cluster 19 | true 20 | ] ); 21 | result.unshift( true ); 22 | 23 | assert.deepEqual( 24 | result, 25 | expected, 26 | test.msg 27 | ); 28 | } ); 29 | } ); 30 | 31 | QUnit.test( 'splitClusters', ( assert ) => { 32 | const expected = [ 33 | 'a', 34 | ' ', 35 | ' ', 36 | 'b', 37 | 'カ', 38 | 'タ', 39 | 'カ', 40 | 'ナ', 41 | 'c\u0300\u0327', // c with two combining chars 42 | '\ud800\udf08', // U+10308 OLD ITALIC LETTER THE 43 | '\ud800\udf08\u0302', // U+10308 + combining circumflex 44 | '\r\n', 45 | '\n', 46 | '\u1104\u1173', // jamo L+V 47 | '\u1105\u1161\u11a8', // jamo L+V+T 48 | '\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters 49 | ]; 50 | assert.deepEqual( 51 | unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ), 52 | expected, 53 | 'Split clusters' 54 | ); 55 | } ); 56 | -------------------------------------------------------------------------------- /tests/unicodejs.namespace.js: -------------------------------------------------------------------------------- 1 | /* eslint-env browser */ 2 | window.unicodeJS = {}; 3 | -------------------------------------------------------------------------------- /tests/unicodejs.test.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS Base module tests 3 | * 4 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 5 | * @license The MIT License (MIT); see LICENSE.txt 6 | */ 7 | 8 | unicodeJS.testdata = {}; 9 | unicodeJS.test = { 10 | parseTestReduce: function ( arr, line ) { 11 | const breakMap = { 12 | '÷': true, 13 | '×': false 14 | }, 15 | skip = false, 16 | expected = [], 17 | parts = line.split( '#' ), 18 | data = parts[ 0 ].trim().split( ' ' ); 19 | 20 | let chars = ''; 21 | data.forEach( ( str, i ) => { 22 | if ( i % 2 === 0 ) { 23 | // Tests at even offsets 24 | expected.push( breakMap[ str ] ); 25 | } else { 26 | const codepoint = +( '0x' + str ); 27 | // Chars at odd offsets 28 | chars += String.fromCodePoint( codepoint ); 29 | // For surrogate pairs, add an expected no-break between them 30 | if ( codepoint > 0xFFFF ) { 31 | expected.push( false ); 32 | } 33 | } 34 | } ); 35 | 36 | if ( !skip ) { 37 | arr.push( { 38 | msg: line, 39 | string: chars, 40 | expected: expected 41 | } ); 42 | } 43 | return arr; 44 | } 45 | }; 46 | 47 | QUnit.module( 'unicodeJS' ); 48 | 49 | QUnit.test( 'prevNextCodepoint', ( assert ) => { 50 | const tests = [ 51 | // string, nextValues, prevValues, message 52 | [ 53 | 'XYZ', 54 | [ 'X', 'Y', 'Z', null ], 55 | [ null, 'X', 'Y', 'Z' ], 56 | 'no surrogate' 57 | ], 58 | [ 59 | 'X\ud800\udc00YZ', 60 | [ 'X', '\ud800\udc00', '\udc00', 'Y', 'Z', null ], 61 | [ null, 'X', '\ud800', '\ud800\udc00', 'Y', 'Z' ], 62 | 'pair' 63 | ], 64 | [ 65 | '\ud800WX\ud800YZ\ud800', 66 | [ '\ud800', 'W', 'X', '\ud800', 'Y', 'Z', '\ud800', null ], 67 | [ null, '\ud800', 'W', 'X', '\ud800', 'Y', 'Z', '\ud800' ], 68 | 'unpaired leading' 69 | ], 70 | [ 71 | '\udc00WX\udc00YZ\udc00', 72 | [ '\udc00', 'W', 'X', '\udc00', 'Y', 'Z', '\udc00', null ], 73 | [ null, '\udc00', 'W', 'X', '\udc00', 'Y', 'Z', '\udc00' ], 74 | 'unpaired trailing' 75 | ] 76 | ]; 77 | tests.forEach( ( test ) => { 78 | const s = new unicodeJS.TextString( test[ 0 ] ); 79 | const nextValues = test[ 1 ]; 80 | const prevValues = test[ 2 ]; 81 | const message = test[ 3 ]; 82 | nextValues.forEach( ( value, i ) => { 83 | assert.strictEqual( 84 | s.nextCodepoint( i ), 85 | value, 86 | message + ': nextCodepoint(' + i + ')' 87 | ); 88 | } ); 89 | prevValues.forEach( ( value, i ) => { 90 | assert.strictEqual( 91 | s.prevCodepoint( i ), 92 | value, 93 | message + ': prevCodepoint(' + i + ')' 94 | ); 95 | } ); 96 | } ); 97 | } ); 98 | 99 | QUnit.test( 'charRangeArrayRegexp', ( assert ) => { 100 | const equalityTests = [ 101 | [ [ 0x0040 ], '\\u0040', 'single BMP character' ], 102 | [ [ 0xFFFF ], '\\uffff', 'highest BMP character' ], 103 | [ 104 | [ 105 | 0x005F, 106 | [ 0x203F, 0x2040 ], 107 | 0x2054, 108 | [ 0xFE33, 0xFE34 ], 109 | [ 0xFE4D, 0xFE4F ], 110 | 0xFF3F 111 | ], 112 | '[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]', 113 | 'multiple BMP ranges (= ExtendNumLet from wordbreak rules)' 114 | ], 115 | [ [ 0xD7FF ], '\\ud7ff', 'just below surrogate range' ], 116 | [ [ 0xE000 ], '\\ue000', 'just above surrogate range' ], 117 | [ [ 0x10000 ], '\\ud800\\udc00', 'lowest non-BMP character' ], 118 | [ [ 0x10001 ], '\\ud800\\udc01', 'second-lowest non-BMP character' ], 119 | [ [ 0x103FF ], '\\ud800\\udfff', 'highest character with D800 leading surrogate' ], 120 | [ [ 0x10400 ], '\\ud801\\udc00', 'lowest character with D801 leading surrogate' ], 121 | [ 122 | [ [ 0xFF00, 0xFFFF ] ], 123 | '[\\uff00-\\uffff]', 124 | 'single range at top of BMP' 125 | ], 126 | [ 127 | [ [ 0xFF00, 0x10000 ] ], 128 | '[\\uff00-\\uffff]|\\ud800\\udc00', 129 | 'single range spanning BMP and non-BMP' 130 | ], 131 | [ 132 | [ 0xFFFF, 0x10000, 0x10002 ], 133 | '\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could compact 134 | 'single characters, both BMP and non-BMP' 135 | ], 136 | [ 137 | [ [ 0x0300, 0x0400 ], 0x10FFFF ], 138 | '[\\u0300-\\u0400]|\\udbff\\udfff', 139 | 'BMP range and non-BMP character' 140 | ], 141 | [ 142 | [ [ 0xFF00, 0x103FF ] ], 143 | '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]', 144 | 'range to top of D800 leading surrogate range' 145 | ], 146 | [ 147 | [ [ 0xFF00, 0x10400 ] ], 148 | '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00', 149 | 'range to start of D801 leading surrogate range' 150 | ], 151 | [ 152 | [ [ 0xFF00, 0x10401 ] ], 153 | '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]', 154 | 'range past start of D801 leading surrogate range' 155 | ], 156 | [ 157 | [ [ 0xFF00, 0x15555 ] ], 158 | '[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]', 159 | 'range spanning multiple leading surrogate ranges' 160 | ], 161 | [ 162 | [ [ 0x10454, 0x10997 ] ], 163 | '\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]', 164 | 'range starting within one leading surrogate range, and ending in the next' 165 | ], 166 | [ 167 | [ [ 0x20222, 0x29999 ] ], 168 | '\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]', 169 | 'range starting within one leading surrogate range, and ending in a distant one' 170 | ], 171 | [ 172 | [ 0x00AD, [ 0x0600, 0x0604 ], 0x06DD, 0x070F, 173 | [ 0x200E, 0x200F ], [ 0x202A, 0x202E ], [ 0x2060, 0x2064 ], 174 | [ 0x206A, 0x206F ], 0xFEFF, [ 0xFFF9, 0xFFFB ], 175 | 0x110BD, [ 0x1D173, 0x1D17A ], 176 | 0xE0001, [ 0xE0020, 0xE007F ] 177 | ], 178 | // TODO: could compact 179 | '[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' + 180 | '\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064' + 181 | '\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' + 182 | '|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' + 183 | '|\\udb40[\\udc20-\\udc7f]', 184 | 'multiple BMP and non-BMP ranges (= Format from wordbreak rules)' 185 | ], 186 | [ 187 | [ [ 0x0, 0xD7FF ], [ 0xE000, 0xFFFF ], [ 0x10000, 0x10FFFF ] ], 188 | '[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]', 189 | 'largest possible range' 190 | ] 191 | ]; 192 | const throwTests = [ 193 | [ [ 0xD800 ], 'surrogate character U+D800' ], 194 | [ [ 0xDFFF ], 'surrogate character U+DFFF' ], 195 | [ [ 0x110000 ], 'character too high' ], 196 | [ [ [ 0xCCCC, 0xDDDD ] ], 'surrogate overlap 1' ], 197 | [ [ [ 0xDDDD, 0xEEEE ] ], 'surrogate overlap 2' ], 198 | [ [ [ 0xDDDD, 0xEEEEE ] ], 'surrogate overlap 3' ], 199 | [ [ [ 0xCCCC, 0xEEEE ] ], 'surrogate overlap 4' ], 200 | [ [ [ 0x2, 0x1 ] ], 'min > max' ], 201 | [ [ [ 0x10FFFF, 0x110000 ] ], 'range too high' ] 202 | ]; 203 | 204 | equalityTests.forEach( ( test ) => { 205 | assert.strictEqual( 206 | unicodeJS.charRangeArrayRegexp( test[ 0 ] ), 207 | test[ 1 ], 208 | test[ 2 ] 209 | ); 210 | } ); 211 | throwTests.forEach( ( test ) => { 212 | const doTestFunc = function () { 213 | unicodeJS.charRangeArrayRegexp( test[ 0 ] ); 214 | }; 215 | 216 | assert.throws( 217 | doTestFunc, 218 | Error, 219 | 'throw: ' + test[ 1 ] 220 | ); 221 | } ); 222 | } ); 223 | -------------------------------------------------------------------------------- /tests/unicodejs.wordbreak.test.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * UnicodeJS Word Break module tests 3 | * 4 | * @copyright 2013– UnicodeJS team and others; see AUTHORS.txt 5 | * @license The MIT License (MIT); see LICENSE.txt 6 | */ 7 | 8 | QUnit.module( 'unicodeJS.wordbreak' ); 9 | 10 | QUnit.test( 'Unicode test suite', ( assert ) => { 11 | unicodeJS.testdata.wordbreak.reduce( unicodeJS.test.parseTestReduce, [] ) 12 | .forEach( ( test ) => { 13 | const textString = new unicodeJS.TextString( test.string ), 14 | result = []; 15 | 16 | for ( let i = 0; i <= test.string.length; i++ ) { 17 | result.push( unicodeJS.wordbreak.isBreak( textString, i ) ); 18 | } 19 | assert.deepEqual( result, test.expected, test.msg ); 20 | } ); 21 | } ); 22 | 23 | QUnit.test( 'nextBreakOffset/prevBreakOffset', ( assert ) => { 24 | const text = 'The quick brown fox', 25 | textString = new unicodeJS.TextString( text ), 26 | breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ]; 27 | 28 | let offset = 0; 29 | for ( let i = 2; i < breaks.length; i++ ) { 30 | offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset ); 31 | assert.strictEqual( offset, breaks[ i ], 'Next break is at position ' + breaks[ i ] ); 32 | } 33 | for ( let i = breaks.length - 3; i >= 0; i-- ) { 34 | offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset ); 35 | assert.strictEqual( offset, breaks[ i ], 'Previous break is at position ' + breaks[ i ] ); 36 | } 37 | } ); 38 | 39 | QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', ( assert ) => { 40 | const text = 41 | // 0 42 | ' The qui' + 43 | // 10 44 | 'ck brown ' + 45 | // 20 46 | '..fox jump' + 47 | // 30 48 | 's... 3.141' + 49 | // 40 50 | '59 すどくスドク ' + 51 | // 50 52 | 'עברית ', 53 | textString = new unicodeJS.TextString( text ), 54 | nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 55, 57 ], 55 | prevBreaks = [ 50, 46, 35, 26, 22, 14, 7, 3, 0 ]; 56 | 57 | let offset = 0; 58 | nextBreaks.forEach( ( expectedOffset ) => { 59 | offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true ); 60 | assert.strictEqual( offset, expectedOffset, 'Next break is at position ' + expectedOffset ); 61 | } ); 62 | prevBreaks.forEach( ( expectedOffset ) => { 63 | offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true ); 64 | assert.strictEqual( offset, expectedOffset, 'Previous break is at position ' + expectedOffset ); 65 | } ); 66 | } ); 67 | 68 | QUnit.test( 'TextString', ( assert ) => { 69 | const plainString = 'abc𨋢def', 70 | textString = new unicodeJS.TextString( plainString ); 71 | 72 | assert.strictEqual( textString.toString(), plainString, 'toString' ); 73 | } ); 74 | -------------------------------------------------------------------------------- /tools/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "extends": [ 4 | "wikimedia/server" 5 | ], 6 | "plugins": [ "html" ], 7 | "settings": { 8 | "html/indent": "+tab", 9 | "html/report-bad-indent": "error" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /tools/strongDir.js.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
 5 | /**
 6 |  * Matches the first strong directionality codepoint:
 7 |  * - in group 1 if it is LTR
 8 |  * - in group 2 if it is RTL
 9 |  * Does not match if there is no strong directionality codepoint.
10 |  *
11 |  * Generated by UnicodeJS (see tools/strongDir) from the UCD; see
12 |  * https://gerrit.wikimedia.org/r/unicodejs .
13 |  */
14 | strongDirRegExp = new RegExp(
15 | 	'(?:' +
16 | 		'(' +
17 | 			'' +
18 | 		')|(' +
19 | 			'' +
20 | 		')' +
21 | 	')'
22 | );
23 | 
24 | 34 | -------------------------------------------------------------------------------- /tools/strongDir.php.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
 5 | /**
 6 |  * Matches the first strong directionality codepoint:
 7 |  * - in group 1 if it is LTR
 8 |  * - in group 2 if it is RTL
 9 |  * Does not match if there is no strong directionality codepoint.
10 |  *
11 |  * The form is (?:([strong LTR codepoint])|([strong rtl codepoint])) .
12 |  *
13 |  * Generated by UnicodeJS (see tools/strongDir) from the UCD; see
14 |  * https://gerrit.wikimedia.org/r/unicodejs .
15 |  */
16 | static private $strongDirRegex = '/(?:()|())/u';
17 | 
18 | 42 | -------------------------------------------------------------------------------- /tools/unicodejs-properties.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // Generates unicodejs.*(properties|categories).js from Unicode data 4 | 5 | const VERSION = '15.0.0', 6 | hasOwn = Object.hasOwnProperty, 7 | http = require( 'http' ), 8 | fs = require( 'fs-extra' ), 9 | dir = __dirname + '/../src/generated', 10 | data = [ 11 | { 12 | url: 'http://unicode.org/Public/%V/ucd/DerivedCoreProperties.txt', 13 | jsname: 'derivedcoreproperties', 14 | propPatterns: [ /^(Alphabetic)$/ ] 15 | }, 16 | { 17 | url: 'http://www.unicode.org/Public/%V/ucd/extracted/DerivedGeneralCategory.txt', 18 | jsname: 'derivedgeneralcategories', 19 | propPatterns: [ /^(Pc)$/, /^(M).*$/ ] 20 | }, 21 | { 22 | url: 'http://www.unicode.org/Public/%V/ucd/auxiliary/GraphemeBreakProperty.txt', 23 | jsname: 'graphemebreakproperties', 24 | full: true, 25 | propPatterns: [ /^(.*)$/ ], 26 | excludeSurrogates: true 27 | }, 28 | { 29 | url: 'http://www.unicode.org/Public/%V/ucd/auxiliary/WordBreakProperty.txt', 30 | jsname: 'wordbreakproperties', 31 | full: true, 32 | propPatterns: [ /^(.*)$/ ] 33 | }, 34 | { 35 | url: 'http://www.unicode.org/Public/%V/ucd/extracted/DerivedBidiClass.txt', 36 | jsname: 'derivedbidiclasses', 37 | propPatterns: [ /^(L|R|AL)$/ ] 38 | }, 39 | { 40 | url: 'http://www.unicode.org/Public/%V/ucd/emoji/emoji-data.txt', 41 | jsname: 'emojiproperties', 42 | propPatterns: [ /^(Extended_Pictographic)$/ ] 43 | } 44 | ]; 45 | 46 | function extractProperties( body, jsname, full, propPatterns, excludeSurrogates ) { 47 | const lines = body.split( /\n/ ), 48 | // range[ property ] -> character range list e.g. [ 0x0040, [ 0x0060-0x0070 ], 0x00A3, ... ] 49 | ranges = {}, 50 | // A list of property name strings like "Extend", "Format" etc 51 | properties = [], 52 | fragments = [], 53 | blankTest = /^\s*(#|$)/, 54 | // eslint-disable-next-line security/detect-unsafe-regex 55 | definitionTest = /^([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))?\s*;\s*(\w+)\s*#/; 56 | 57 | lines.forEach( ( line ) => { 58 | line = line.trim(); 59 | // Ignore comment or blank lines 60 | if ( line.match( blankTest ) ) { 61 | return; 62 | } 63 | // Find things like one of the following: 64 | // XXXX ; propertyname 65 | // XXXX..YYYY ; propertyname 66 | const lineMatches = line.match( definitionTest ); 67 | if ( !lineMatches ) { 68 | throw new Error( 'Bad line: ' + line ); 69 | } 70 | 71 | const start = parseInt( lineMatches[ 1 ], 16 ); 72 | const end = parseInt( lineMatches[ 2 ] || lineMatches[ 1 ], 16 ); 73 | const propText = lineMatches[ 3 ]; 74 | 75 | propPatterns.forEach( ( propPattern ) => { 76 | const propMatches = propText.match( propPattern ); 77 | if ( propMatches ) { 78 | const propName = propMatches[ 1 ]; 79 | if ( !hasOwn.call( ranges, propName ) ) { 80 | properties.push( propName ); 81 | ranges[ propName ] = []; 82 | } 83 | ranges[ propName ].push( [ start, end ] ); 84 | } 85 | } ); 86 | } ); 87 | 88 | // Translate ranges into js fragments 89 | properties.forEach( ( prop ) => { 90 | const rangeStrings = [], 91 | propRanges = ranges[ prop ]; 92 | 93 | // Merge consecutive ranges 94 | propRanges.sort( ( a, b ) => a[ 0 ] - b[ 0 ] ); 95 | 96 | for ( let i = 1; i < propRanges.length; i++ ) { 97 | if ( propRanges[ i - 1 ][ 1 ] + 1 === propRanges[ i ][ 0 ] ) { 98 | propRanges[ i - 1 ] = [ 99 | propRanges[ i - 1 ][ 0 ], 100 | propRanges[ i ][ 1 ] 101 | ]; 102 | propRanges.splice( i, 1 ); 103 | i--; 104 | } 105 | } 106 | 107 | function toHex( num ) { 108 | const hex = num.toString( 16 ).toUpperCase(); 109 | return '0x' + ( '0000' + hex ).slice( Math.min( -4, -hex.length ) ); 110 | } 111 | 112 | propRanges.forEach( ( propRange ) => { 113 | const start = propRange[ 0 ], 114 | end = propRange[ 1 ]; 115 | 116 | if ( excludeSurrogates && start === 0xD800 && end === 0xDFFF ) { 117 | return; 118 | } else if ( end === start ) { 119 | rangeStrings.push( toHex( start ) ); 120 | } else if ( end === start + 1 ) { 121 | rangeStrings.push( toHex( start ), toHex( end ) ); 122 | } else { 123 | rangeStrings.push( '[ ' + toHex( start ) + ', ' + toHex( end ) + ' ]' ); 124 | } 125 | } ); 126 | fragments.push( prop.replace( /_/g, '' ) + ': [ ' + rangeStrings.join( ', ' ) + ' ]' ); 127 | } ); 128 | 129 | // Write js file 130 | let js = '// This file is GENERATED by tools/unicodejs-properties.js\n' + 131 | '// DO NOT EDIT\n' + 132 | 'unicodeJS.' + jsname + ' = {\n\t'; 133 | if ( !full ) { 134 | js += '// partial extraction only\n\t'; 135 | } 136 | js += fragments.join( ',\n\t' ) + 137 | '\n};\n'; 138 | 139 | const filename = dir + '/unicodejs.' + jsname + '.js'; 140 | // eslint-disable-next-line security/detect-non-literal-fs-filename 141 | fs.writeFile( filename, js, ( err ) => { 142 | if ( err ) { 143 | throw err; 144 | } 145 | console.log( 'wrote ' + filename ); 146 | } ); 147 | } 148 | 149 | fs.emptyDir( dir, ( err ) => { 150 | if ( err ) { 151 | throw err; 152 | } 153 | console.log( 'deleted old files' ); 154 | data.forEach( ( options ) => { 155 | const url = options.url.replace( '%V', VERSION ); 156 | console.log( 'fetching ' + url ); 157 | const request = http.get( url, ( res ) => { 158 | let body = ''; 159 | 160 | res.setEncoding( 'utf8' ); 161 | 162 | res.on( 'data', ( chunk ) => { 163 | body += chunk; 164 | } ); 165 | 166 | res.on( 'end', () => { 167 | extractProperties( 168 | body, 169 | options.jsname, 170 | !!options.full, 171 | options.propPatterns, 172 | !!options.excludeSurrogates 173 | ); 174 | } ); 175 | } ); 176 | request.end(); 177 | } ); 178 | } ); 179 | -------------------------------------------------------------------------------- /tools/unicodejs-tests.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // Generates unicodejs.*.testdata.js from Unicode test data 4 | 5 | const VERSION = '15.0.0', 6 | http = require( 'http' ), 7 | fs = require( 'fs-extra' ), 8 | dir = __dirname + '/../tests/generated', 9 | data = [ 10 | { 11 | url: 'http://www.unicode.org/Public/%V/ucd/auxiliary/WordBreakTest.txt', 12 | jsname: 'wordbreak' 13 | }, 14 | { 15 | url: 'http://www.unicode.org/Public/%V/ucd/auxiliary/GraphemeBreakTest.txt', 16 | jsname: 'graphemebreak' 17 | } 18 | ]; 19 | 20 | function buildTests( body, jsname ) { 21 | const output = [], 22 | lines = body.split( /\n/ ); 23 | 24 | lines.forEach( ( line ) => { 25 | const parts = line.split( '#' ); 26 | if ( !parts[ 0 ] ) { 27 | return; 28 | } 29 | output.push( line ); 30 | } ); 31 | 32 | // Write js file 33 | const js = '// This file is GENERATED by tools/unicodejs-tests.js\n' + 34 | '// DO NOT EDIT\n' + 35 | 'unicodeJS.testdata.' + jsname + ' = ' + JSON.stringify( output, null, '\t' ).replace( /"/g, '\'' ) + ';\n'; 36 | 37 | const filename = dir + '/unicodejs.' + jsname + '.testdata.js'; 38 | // eslint-disable-next-line security/detect-non-literal-fs-filename 39 | fs.writeFile( filename, js, ( err ) => { 40 | if ( err ) { 41 | throw err; 42 | } 43 | console.log( 'wrote ' + filename ); 44 | } ); 45 | } 46 | 47 | fs.emptyDir( dir, ( err ) => { 48 | if ( err ) { 49 | throw err; 50 | } 51 | console.log( 'deleted old files' ); 52 | data.forEach( ( options ) => { 53 | const url = options.url.replace( '%V', VERSION ); 54 | console.log( 'fetching ' + url ); 55 | const request = http.get( url, ( res ) => { 56 | let body = ''; 57 | 58 | res.setEncoding( 'utf8' ); 59 | 60 | res.on( 'data', ( chunk ) => { 61 | body += chunk; 62 | } ); 63 | 64 | res.on( 'end', () => { 65 | buildTests( body, options.jsname ); 66 | } ); 67 | } ); 68 | request.end(); 69 | } ); 70 | } ); 71 | --------------------------------------------------------------------------------