├── .babelrc ├── .eslintrc.js ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── README.md ├── bin ├── release.sh └── server.js ├── package.json ├── src ├── __tests__ │ └── index.js └── index.js └── yarn.lock /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["@babel/preset-env"], 3 | "plugins": [ 4 | "add-module-exports" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: 'algolia', 3 | "env": { 4 | "jest": true 5 | } 6 | }; 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | npm-debug.log 3 | dist/ 4 | coverage/ 5 | package-lock.json 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - node 4 | notifications: 5 | email: false 6 | cache: 7 | yarn: true 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ## [2.0.1](https://github.com/algolia/chunk-text/compare/v1.0.5...v2.0.1) (2020-09-14) 3 | 4 | ### Features 5 | 6 | * enhanced multi-byte character support using `TextEncoder` (#8) ([dab7672](https://github.com/algolia/chunk-text/commit/dab7672)) 7 | 8 | 9 | ## [1.0.5](https://github.com/algolia/chunk-text/compare/v1.0.4...v1.0.5) (2017-07-19) 10 | 11 | Fixed the build. 12 | 13 | 14 | ## [1.0.4](https://github.com/algolia/chunk-text/compare/v1.0.3...v1.0.4) (2017-06-27) 15 | 16 | 17 | ### Bug Fixes 18 | 19 | * add support for multibyte characters (#2) ([1398956](https://github.com/algolia/chunk-text/commit/1398956)) 20 | 21 | 22 | ### Features 23 | 24 | * **test:** add travis (#4) ([c88466b](https://github.com/algolia/chunk-text/commit/c88466b)) 25 | 26 | 27 | 28 | 29 | ## [1.0.3](https://github.com/algolia/chunk-text/compare/v1.0.2...v1.0.3) (2017-06-19) 30 | 31 | 32 | 33 | 34 | ## [1.0.2](https://github.com/algolia/chunk-text/compare/v1.0.1...v1.0.2) (2017-06-18) 35 | 36 | 37 | ### Bug Fixes 38 | 39 | * remove default key when requiring in nodejs ([0414419](https://github.com/algolia/chunk-text/commit/0414419)) 40 | 41 | 42 | 43 | 44 | ## [1.0.1](https://github.com/algolia/chunk-text/compare/v1.0.0...v1.0.1) (2017-06-18) 45 | 46 | 47 | 48 | 49 | # 1.0.0 (2017-06-18) 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Chunk Text 2 | === 3 | 4 | > chunk/split a string by length without cutting/truncating words. 5 | 6 | 7 | ``` javascript 8 | const out = chunk('hello world how are you?', 7); 9 | /* ['hello', 'world', 'how are', 'you?'] */ 10 | ``` 11 | 12 | 13 | ## Installation 14 | 15 | ``` bash 16 | $ npm install chunk-text 17 | # yarn add chunk-text 18 | ``` 19 | 20 | 21 | ## Usage 22 | 23 | All number values are parsed according to `Number.parseInt`. 24 | 25 | ``` javascript 26 | const chunk = require('chunk-text'); 27 | ``` 28 | 29 | #### chunk(text, chunkSize); 30 | 31 | Chunks the `text` string into an array of strings that each have a maximum length of `chunkSize`. 32 | 33 | ``` javascript 34 | const out = chunk('hello world how are you?', 7); 35 | /* ['hello', 'world', 'how are', 'you?'] */ 36 | ``` 37 | 38 | If no space is detected before `chunkSize` is reached, then it will truncate the word to always 39 | ensure the resulting text chunks have at maximum a length of `chunkSize`. 40 | 41 | ``` javascript 42 | const out = chunk('hello world', 4); 43 | /* ['hell', 'o', 'worl', 'd'] */ 44 | ``` 45 | 46 | #### chunk(text, chunkSize, chunkOptions); 47 | 48 | Chunks the `text` string into an array of strings that each have a maximum length of `chunkSize`, as determined by `chunkOptions.charLengthMask`. 49 | 50 | The default behavior if `chunkOptions.charLengthMask` is excluded is equal to `chunkOptions.charLengthMask=-1`. 51 | 52 | For single-byte characters, `chunkOptions.charLengthMask` never changes the results. 53 | 54 | For multi-byte characters, `chunkOptions.charLengthMask` allows awareness of multi-byte glyphs according to the following table: 55 | 56 | | `chunkOptions.charLengthMask` | result | 57 | |-------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 58 | | -1 | - same as default, same as `chunkOptions.charLengthMask=1`
- each character counts as 1 towards length | 59 | | 0 | - each character counts as the number of bytes it contains | 60 | | >0 | - each character counts as the number of bytes it contains, up to a limit of `chunkOptions.charLengthMask=N`
- a 7-byte ZWJ emoji such as runningPerson+ZWJ+femaleSymbol (🏃🏽‍♀️) counts as 2, when `chunkOptions.charLengthMask=2` | 61 | 62 | You can also substitute from the default `chunkOptions.charLengthType` property of `length` to `TextEncoder`. 63 | 64 | This enables you to pass any object to `chunkOptions.textEncoder` which matches the signature, `chunkOptions.textEncoder.encode(text).length` 65 | 66 | If your environment natively contains the `TextEncoder` prototype and `chunkOptions.textEncoder` isn't provided, 67 | 68 | the module attempts `new TextEncoder()` in order to use this `chunkOptions.charLengthType`. 69 | 70 | If 71 | 72 | - `chunkOptions.charLengthType` is set to `TextEncoder`. 73 | - `chunkOptions.textEncoder` isn't provided. 74 | - `TextEncoder` prototype isn't provided by the environment. 75 | 76 | Then 77 | 78 | - `ReferenceError` will occur. 79 | 80 | End If 81 | 82 | ``` javascript 83 | // one woman runner emoji with a colour is seven bytes, or five characters 84 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 85 | // (actually encodes to 17) 86 | const runner = '🏃🏽‍♀️'; 87 | 88 | const outDefault = chunk(runner+runner+runner, 4); 89 | /* [ '🏃🏽‍♀️🏃🏽‍♀️🏃🏽‍♀️' ] */ 90 | 91 | const outZero = chunk(runner+runner+runner, 4, { charLengthMask: 0 }); 92 | /* [ '🏃🏽‍♀️', '🏃🏽‍♀️', '🏃🏽‍♀️' ] */ 93 | 94 | const outTwo = chunk(runner+runner+runner, 4, { charLengthMask: 2 }); 95 | /* [ '🏃🏽‍♀️🏃🏽‍♀️', '🏃🏽‍♀️' ] */ 96 | 97 | // FLAG + RAINBOW 98 | // 2 each as length, 4 each as TextEncoder 99 | // 4 as length, 8 as TextEncoder 100 | // Node v14.5.0 does not provide TextEncoder natively. 101 | const flags = '🏳️‍🌈🏳️‍🌈'; 102 | 103 | // \/ will fail if your environment doesn't already have TextEncoder prototype \/ 104 | chunk(flags, 8, { charLengthMask: 0, charLengthType: 'TextEncoder' }); 105 | // [ '🏳️‍🌈', '🏳️‍🌈' ] 106 | // /\ will fail if your environment doesn't already have TextEncoder prototype /\ 107 | 108 | chunk(flags, 4, { 109 | charLengthMask: 0, 110 | charLengthType: 'TextEncoder', 111 | textEncoder: new TextEncoder(), 112 | }) 113 | // [ '🏳️‍🌈', '🏳️‍🌈' ] 114 | 115 | chunk(flags, 999, { 116 | charLengthMask: 0, 117 | charLengthType: 'TextEncoder', 118 | textEncoder: { 119 | encode: () => ({ length: 999 }), 120 | }, 121 | }) 122 | // [ '🏳️‍🌈', '🏳️‍🌈' ] 123 | ``` 124 | 125 | ## Usage in Algolia context 126 | 127 | This library was created by [Algolia](https://www.algolia.com/) to ease 128 | the optimizing of record payload sizes resulting in faster search responses from the API. 129 | 130 | In general, there is always a unique large "content attribute" per record, 131 | and this packages will allow to chunk that content into small chunks of text. 132 | 133 | The text chunks can then be [distributed over multiple records](https://www.algolia.com/doc/faq/basics/how-do-i-reduce-the-size-of-my-records/#faq-section). 134 | 135 | Here is an example of how to split an existing record into several ones: 136 | 137 | ``` javascript 138 | var chunk = require('chunk-text'); 139 | var record = { 140 | post_id: 100, 141 | content: 'A large chunk of text here' 142 | }; 143 | 144 | var chunks = chunk(record.content, 600); // Limit the chunk size to a length of 600. 145 | var records = []; 146 | chunks.forEach(function(content) { 147 | records.push(Object.assign({}, record, {content: content})); 148 | }); 149 | ``` 150 | -------------------------------------------------------------------------------- /bin/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eu 3 | 4 | readonly CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) 5 | if [ "$CURRENT_BRANCH" != master ]; then 6 | echo "You must be on 'master' branch to publish a release, aborting..." 7 | exit 1 8 | fi 9 | 10 | if ! git diff-index --quiet HEAD --; then 11 | echo "Working tree is not clean, aborting..." 12 | exit 1 13 | fi 14 | 15 | if ! yarn run build; then 16 | echo "Failed to build dist files, aborting..." 17 | exit 1 18 | fi 19 | 20 | if ! yarn test; then 21 | echo "Tests failed, aborting..." 22 | exit 1 23 | fi 24 | 25 | yarn run changelog:unreleased 26 | 27 | # Only update the package.json version 28 | # We need to update changelog before tagging 29 | # And publishing. 30 | yarn version --no-git-tag-version 31 | 32 | if ! yarn run changelog; then 33 | echo "Failed to update changelog, aborting..." 34 | exit 1 35 | fi 36 | 37 | yarn 38 | yarn build 39 | 40 | readonly PACKAGE_VERSION=$(< package.json grep version \ 41 | | head -1 \ 42 | | awk -F: '{ print $2 }' \ 43 | | sed 's/[",]//g' \ 44 | | tr -d '[:space:]') 45 | 46 | # Gives user a chance to review and eventually abort. 47 | git add --patch 48 | 49 | git commit --message="chore(release): v${PACKAGE_VERSION}" 50 | 51 | git push origin HEAD 52 | 53 | npm publish 54 | 55 | git tag "v$PACKAGE_VERSION" 56 | git push --tags 57 | 58 | echo "Pushed package to npm, and also pushed 'v$PACKAGE_VERSION' tag to git repository." 59 | -------------------------------------------------------------------------------- /bin/server.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | const chunk = require('../dist/index.js'); 3 | console.log(chunk(process.argv[2], Number.parseInt(process.argv[3], 10), typeof process.argv[4] !== 'undefined' && process.argv[4] !== null && process.argv[4] !== '' ? JSON.parse(process.argv[4]) : '')); 4 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chunk-text", 3 | "version": "2.0.1", 4 | "description": "🔪 chunk/split a string by length without cutting/truncating words.", 5 | "type": "commonJs", 6 | "module": "./dist/index.js", 7 | "main": "./dist/index.js", 8 | "exports": { 9 | "import": "./dist/index.js", 10 | "default": "./dist/index.js", 11 | "chunk": "./dist/server.js", 12 | "chunk-text": "./dist/server.js" 13 | }, 14 | "repository": "https://github.com/algolia/chunk-text", 15 | "author": "Raymond RUTJES ", 16 | "license": "MIT", 17 | "files": [ 18 | "dist" 19 | ], 20 | "bin": { 21 | "chunk": "./bin/server.js", 22 | "chunk-text": "./bin/server.js" 23 | }, 24 | "scripts": { 25 | "test": "npm run-script lint && jest", 26 | "build": "babel src --out-dir dist --delete-dir-on-start --ignore \"**/__tests__/*\"", 27 | "lint": "eslint src", 28 | "lint:fix": "npm run-script lint --fix", 29 | "changelog": "conventional-changelog --preset angular --infile CHANGELOG.md --same-file", 30 | "changelog:unreleased": "conventional-changelog --preset angular --output-unreleased", 31 | "start": "node --unhandled-rejections=strict --trace-warnings ./bin/server.js", 32 | "chunk": "npm run-script start", 33 | "chunk-text": "npm run-script start", 34 | "prepublishOnly": "npm run-script build" 35 | }, 36 | "devDependencies": { 37 | "@babel/cli": "^7.10.5", 38 | "@babel/core": "^7.10.5", 39 | "@babel/preset-env": "^7.10.4", 40 | "@babel/runtime": "^7.10.5", 41 | "babel-core": "6.26.3", 42 | "babel-eslint": "10.1.0", 43 | "babel-jest": "^26.1.0", 44 | "babel-loader": "^8.1.0", 45 | "babel-plugin-add-module-exports": "^1.0.2", 46 | "conventional-changelog-cli": "^2.0.34", 47 | "eslint": "7.5.0", 48 | "eslint-config-algolia": "^16.0.0", 49 | "eslint-config-prettier": "^6.11.0", 50 | "eslint-plugin-eslint-comments": "^3.2.0", 51 | "eslint-plugin-import": "^2.3.0", 52 | "eslint-plugin-jest": "^23.18.0", 53 | "eslint-plugin-prettier": "^3.1.4", 54 | "fastestsmallesttextencoderdecoder-encodeinto": "^1.0.22", 55 | "jest": "^26.1.0", 56 | "prettier": "^2.0.5" 57 | }, 58 | "dependencies": { 59 | "runes": "^0.4.3" 60 | }, 61 | "keywords": [ 62 | "chunk-text", 63 | "split", 64 | "chunk", 65 | "algolia", 66 | "text", 67 | "string", 68 | "array", 69 | "length", 70 | "index", 71 | "size", 72 | "splice", 73 | "slice", 74 | "text-processing", 75 | "text processing", 76 | "multi-byte", 77 | "multibyte", 78 | "multi", 79 | "byte", 80 | "runes", 81 | "rune", 82 | "glyphs", 83 | "glyph", 84 | "encoding", 85 | "emoji", 86 | "MIT" 87 | ] 88 | } 89 | -------------------------------------------------------------------------------- /src/__tests__/index.js: -------------------------------------------------------------------------------- 1 | import chunk from '../index'; 2 | import { TextEncoder } from 'fastestsmallesttextencoderdecoder-encodeinto'; 3 | it("should throw if 'text' is missing or its type or value are invalid.", () => { 4 | expect(() => { 5 | chunk(); 6 | }).toThrow( 7 | new TypeError('Text should be provided as first argument and be a string.') 8 | ); 9 | }); 10 | 11 | it("should throw if 'size' is missing or its type or value are invalid.", () => { 12 | expect(() => { 13 | chunk('hello world'); 14 | }).toThrow( 15 | new TypeError( 16 | 'Size should be provided as 2nd argument and parseInt to a value greater than zero.' 17 | ) 18 | ); 19 | expect(() => { 20 | chunk('hello world', 0); 21 | }).toThrow( 22 | new TypeError( 23 | 'Size should be provided as 2nd argument and parseInt to a value greater than zero.' 24 | ) 25 | ); 26 | }); 27 | 28 | it("should throw if 'type' argument's type or value is invalid.", () => { 29 | expect(() => { 30 | chunk('hello world', 1, { charLengthMask: 'one' }); 31 | }).toThrow( 32 | new TypeError( 33 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 34 | ) 35 | ); 36 | expect(() => { 37 | chunk('hello world', 1, { charLengthMask: -2.001 }); 38 | }).toThrow( 39 | new TypeError( 40 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 41 | ) 42 | ); 43 | expect(() => { 44 | chunk('hello world', 1, { charLengthMask: -2 }); 45 | }).toThrow( 46 | new TypeError( 47 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 48 | ) 49 | ); 50 | expect(() => { 51 | chunk('hello world', 1, { charLengthMask: 3 }); 52 | }).not.toThrow( 53 | new TypeError( 54 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 55 | ) 56 | ); 57 | expect(() => { 58 | chunk('hello world', 1, { charLengthMask: '3' }); 59 | }).not.toThrow( 60 | new TypeError( 61 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 62 | ) 63 | ); 64 | }); 65 | 66 | it("should not throw if 'type' type and value are missing or valid.", () => { 67 | expect(() => { 68 | chunk('hello world', 1, { charLengthMask: '' }); 69 | }).toThrow( 70 | new TypeError( 71 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 72 | ) 73 | ); 74 | expect(() => { 75 | chunk('hello world', 1, { charLengthMask: null }); 76 | }).toThrow( 77 | new TypeError( 78 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 79 | ) 80 | ); 81 | expect(() => { 82 | chunk('hello world', 1, { charLengthMask: undefined }); 83 | }).toThrow( 84 | new TypeError( 85 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 86 | ) 87 | ); 88 | expect(() => { 89 | chunk('hello world', 1, {}); 90 | }).not.toThrow( 91 | new TypeError( 92 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 93 | ) 94 | ); 95 | expect(() => { 96 | chunk('hello world', 1, { charLengthType: 'length' }); 97 | }).not.toThrow( 98 | new TypeError( 99 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 100 | ) 101 | ); 102 | expect(() => { 103 | chunk('hello world', 1); 104 | }).not.toThrow( 105 | new TypeError( 106 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 107 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 108 | ) 109 | ); 110 | expect(() => { 111 | chunk('hello world', 1, { charLengthMask: -1.999 }); 112 | }).not.toThrow( 113 | new TypeError( 114 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 115 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 116 | ) 117 | ); 118 | expect(() => { 119 | chunk('hello world', 1, { charLengthMask: -0.001 }); 120 | }).not.toThrow( 121 | new TypeError( 122 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 123 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 124 | ) 125 | ); 126 | expect(() => { 127 | chunk('hello world', 1, { charLengthMask: 0.0 }); 128 | }).not.toThrow( 129 | new TypeError( 130 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 131 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 132 | ) 133 | ); 134 | expect(() => { 135 | chunk('hello world', 1, { charLengthMask: 1.0 }); 136 | }).not.toThrow( 137 | new TypeError( 138 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 139 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 140 | ) 141 | ); 142 | expect(() => { 143 | chunk('hello world', 1, { charLengthMask: new Number.BigInt(2.0) }); 144 | }).not.toThrow( 145 | new TypeError( 146 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 147 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 148 | ) 149 | ); 150 | expect(() => { 151 | chunk('hello world', 1, { charLengthMask: 2.999 }); 152 | }).not.toThrow( 153 | new TypeError( 154 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 155 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 156 | ) 157 | ); 158 | expect(() => { 159 | chunk('hello world', 1, { charLengthMask: '2.99999 years' }); 160 | }).not.toThrow( 161 | new TypeError( 162 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 163 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 164 | ) 165 | ); 166 | expect(() => { 167 | chunk('hello world', 1, { charLengthMask: '2' }); 168 | }).not.toThrow( 169 | new TypeError( 170 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 171 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']" 172 | ) 173 | ); 174 | }); 175 | 176 | it('should return an array of strings.', () => { 177 | const pieces = chunk('hello world', 5); 178 | expect(pieces).toEqual(['hello', 'world']); 179 | }); 180 | 181 | it('should not cut in the middle of words', () => { 182 | const pieces = chunk('hello world how are you?', 7); 183 | expect(pieces).toEqual(['hello', 'world', 'how are', 'you?']); 184 | }); 185 | 186 | it('should truncate a word if longer than size', () => { 187 | const pieces = chunk('hello you', 4); 188 | expect(pieces).toEqual(['hell', 'o', 'you']); 189 | }); 190 | 191 | it('should count multi-byte characters as single characters by default', () => { 192 | // each of these characters is two bytes 193 | const chineseTextA = '𤻪'; 194 | const chineseTextB = '𬜬'; 195 | const chineseTextC = '𬜯'; 196 | const chineseText = chineseTextA + chineseTextB + chineseTextC; 197 | expect(chunk(chineseText, 2)).toEqual([ 198 | chineseTextA + chineseTextB, 199 | chineseTextC, 200 | ]); 201 | expect(chunk(chineseText, 1)).toEqual([ 202 | chineseTextA, 203 | chineseTextB, 204 | chineseTextC, 205 | ]); 206 | 207 | // each of these characters is two bytes 208 | const fourCheese = '🧀🧀🧀🧀'; 209 | const camembert = `${fourCheese} ${fourCheese}`; 210 | expect(chunk(camembert, 4)).toEqual([fourCheese, fourCheese]); 211 | 212 | // one woman runner emoji with a colour is seven bytes, or five characters 213 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 214 | const runner = '🏃🏽‍♀️'; 215 | expect( 216 | chunk(runner + runner + runner + runner + runner + runner + runner, 3) 217 | ).toEqual([runner + runner + runner, runner + runner + runner, runner]); 218 | }); 219 | 220 | it('should count all characters as single characters using charLengthMask -1 or 1 values', () => { 221 | // each of these characters is two bytes 222 | const chineseTextA = '𤻪'; 223 | const chineseTextB = '𬜬'; 224 | const chineseTextC = '𬜯'; 225 | const chineseText = chineseTextA + chineseTextB + chineseTextC; 226 | expect(chunk(chineseText, 2, { charLengthMask: -1 })).toEqual([ 227 | chineseTextA + chineseTextB, 228 | chineseTextC, 229 | ]); 230 | expect(chunk(chineseText, 1, { charLengthMask: -1 })).toEqual([ 231 | chineseTextA, 232 | chineseTextB, 233 | chineseTextC, 234 | ]); 235 | expect(chunk(chineseText, 2, { charLengthMask: 1 })).toEqual([ 236 | chineseTextA + chineseTextB, 237 | chineseTextC, 238 | ]); 239 | expect(chunk(chineseText, 1, { charLengthMask: 1 })).toEqual([ 240 | chineseTextA, 241 | chineseTextB, 242 | chineseTextC, 243 | ]); 244 | 245 | // each of these characters is two bytes 246 | const fourCheese = '🧀🧀🧀🧀'; 247 | const camembert = `${fourCheese} ${fourCheese}`; 248 | expect(chunk(camembert, 4, { charLengthMask: -1 })).toEqual([ 249 | fourCheese, 250 | fourCheese, 251 | ]); 252 | expect(chunk(camembert, 4, { charLengthMask: 1 })).toEqual([ 253 | fourCheese, 254 | fourCheese, 255 | ]); 256 | 257 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, ‍ Zero Width Joiner and ♀ Female Sign. 258 | // each of these characters is five bytes 259 | const womanRunningZWJ = '🏃‍♀️'; 260 | const womenRunningZWJ = `${ 261 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ 262 | } ${womanRunningZWJ + womanRunningZWJ}`; 263 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: -1 })).toEqual([ 264 | womanRunningZWJ + womanRunningZWJ, 265 | womanRunningZWJ + womanRunningZWJ, 266 | womanRunningZWJ + womanRunningZWJ, 267 | ]); 268 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: 1 })).toEqual([ 269 | womanRunningZWJ + womanRunningZWJ, 270 | womanRunningZWJ + womanRunningZWJ, 271 | womanRunningZWJ + womanRunningZWJ, 272 | ]); 273 | }); 274 | 275 | it('should count characters as bytes using charLengthMask value 0', () => { 276 | // each of these characters is two bytes 277 | const chineseTextA = '𤻪'; 278 | const chineseTextB = '𬜬'; 279 | const chineseTextC = '𬜯'; 280 | const chineseText = chineseTextA + chineseTextB + chineseTextC; 281 | expect(chunk(chineseText, 2, { charLengthMask: 0 })).toEqual([ 282 | chineseTextA, 283 | chineseTextB, 284 | chineseTextC, 285 | ]); 286 | expect(chunk(chineseText, 1, { charLengthMask: 0 })).toEqual([ 287 | chineseTextA, 288 | chineseTextB, 289 | chineseTextC, 290 | ]); 291 | expect(chunk(chineseText, 4, { charLengthMask: 0 })).toEqual([ 292 | chineseTextA + chineseTextB, 293 | chineseTextC, 294 | ]); 295 | expect(chunk(chineseText, 6, { charLengthMask: 0 })).toEqual([ 296 | chineseTextA + chineseTextB + chineseTextC, 297 | ]); 298 | 299 | // each of these characters is two bytes 300 | const twoCheese = '🧀🧀'; 301 | const camembert = `${twoCheese + twoCheese} ${twoCheese + twoCheese}`; 302 | expect(chunk(camembert, 4, { charLengthMask: 0 })).toEqual([ 303 | twoCheese, 304 | twoCheese, 305 | twoCheese, 306 | twoCheese, 307 | ]); 308 | 309 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, ‍ Zero Width Joiner and ♀ Female Sign. 310 | // each of these characters is five bytes 311 | const womanRunningZWJ = '🏃‍♀️'; 312 | const womenRunningZWJ = `${ 313 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ 314 | } ${womanRunningZWJ + womanRunningZWJ}`; 315 | expect(chunk(womenRunningZWJ, 10, { charLengthMask: 0 })).toEqual([ 316 | womanRunningZWJ + womanRunningZWJ, 317 | womanRunningZWJ + womanRunningZWJ, 318 | womanRunningZWJ + womanRunningZWJ, 319 | ]); 320 | expect( 321 | chunk( 322 | `12123123 1231231 312312312 123 12 ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`, 323 | 44, 324 | { charLengthMask: 0 } 325 | ) 326 | ).toEqual([ 327 | `12123123 1231231 312312312 123 12 ${womanRunningZWJ}`, 328 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}`, 329 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`, 330 | ]); 331 | 332 | // one woman runner emoji with a colour is seven bytes, or five characters 333 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 334 | const runner = '🏃🏽‍♀️'; 335 | expect(chunk(runner + runner + runner, 17, { charLengthMask: 0 })).toEqual([ 336 | runner + runner, 337 | runner, 338 | ]); 339 | expect( 340 | chunk( 341 | `12123123 1231231 312312312 123 12 ${runner}${runner}${runner} ${runner}${runner}${runner} ${runner}${runner}${runner}${runner} ${runner} ${runner}${runner} ${runner}`, 342 | 28, 343 | { charLengthMask: 0 } 344 | ) 345 | ).toEqual([ 346 | `12123123 1231231 312312312`, 347 | `123 12 ${runner}${runner}${runner}`, 348 | `${runner}${runner}${runner}`, 349 | `${runner}${runner}${runner}${runner}`, 350 | `${runner} ${runner}${runner}`, 351 | `${runner}`, 352 | ]); 353 | }); 354 | 355 | it('should count single width characters the same with all charLengthMask values', () => { 356 | for (let i = 0; i < 100; i++) { 357 | expect(chunk('hello you', 4, { charLengthMask: i })).toEqual([ 358 | 'hell', 359 | 'o', 360 | 'you', 361 | ]); 362 | } 363 | }); 364 | 365 | it('should count characters as bytes up to maximum N charLengthMask value > 0', () => { 366 | // each of these characters is two bytes 367 | const chineseTextA = '𤻪'; 368 | const chineseTextB = '𬜬'; 369 | const chineseTextC = '𬜯'; 370 | const chineseText = chineseTextA + chineseTextB + chineseTextC; 371 | expect(chunk(chineseText, 2, { charLengthMask: 2 })).toEqual([ 372 | chineseTextA, 373 | chineseTextB, 374 | chineseTextC, 375 | ]); 376 | expect(chunk(chineseText, 4, { charLengthMask: 2 })).toEqual([ 377 | chineseTextA + chineseTextB, 378 | chineseTextC, 379 | ]); 380 | expect(chunk(chineseText, 2, { charLengthMask: 1 })).toEqual([ 381 | chineseTextA + chineseTextB, 382 | chineseTextC, 383 | ]); 384 | 385 | // each of these characters is two bytes 386 | const cheese = '🧀'; 387 | const twoCheese = cheese + cheese; 388 | const camembert = `${twoCheese + twoCheese} ${twoCheese + twoCheese}`; 389 | expect(chunk(camembert, 4, { charLengthMask: 2 })).toEqual([ 390 | twoCheese, 391 | twoCheese, 392 | twoCheese, 393 | twoCheese, 394 | ]); 395 | expect(chunk(camembert, 2, { charLengthMask: 4 })).toEqual([ 396 | cheese, 397 | cheese, 398 | cheese, 399 | cheese, 400 | cheese, 401 | cheese, 402 | cheese, 403 | cheese, 404 | ]); 405 | 406 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, ‍ Zero Width Joiner and ♀ Female Sign. 407 | // each of these characters is five bytes 408 | const womanRunningZWJ = '🏃‍♀️'; 409 | const womenRunningZWJ = `${ 410 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ 411 | } ${womanRunningZWJ + womanRunningZWJ}`; 412 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: 0 })).toEqual([ 413 | womanRunningZWJ, 414 | womanRunningZWJ, 415 | womanRunningZWJ, 416 | womanRunningZWJ, 417 | womanRunningZWJ, 418 | womanRunningZWJ, 419 | ]); 420 | for (let i = 2; i < 100; i++) { 421 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: i })).toEqual([ 422 | womanRunningZWJ, 423 | womanRunningZWJ, 424 | womanRunningZWJ, 425 | womanRunningZWJ, 426 | womanRunningZWJ, 427 | womanRunningZWJ, 428 | ]); 429 | } 430 | expect(chunk(womenRunningZWJ, 4, { charLengthMask: 1 })).toEqual([ 431 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ, 432 | womanRunningZWJ + womanRunningZWJ, 433 | ]); 434 | expect(chunk(womenRunningZWJ, 4, { charLengthMask: 2 })).toEqual([ 435 | womanRunningZWJ + womanRunningZWJ, 436 | womanRunningZWJ + womanRunningZWJ, 437 | womanRunningZWJ + womanRunningZWJ, 438 | ]); 439 | expect(chunk(womenRunningZWJ, 8, { charLengthMask: 4 })).toEqual([ 440 | womanRunningZWJ + womanRunningZWJ, 441 | womanRunningZWJ + womanRunningZWJ, 442 | womanRunningZWJ + womanRunningZWJ, 443 | ]); 444 | for (let i = 9; i < 100; i++) { 445 | expect(chunk(womenRunningZWJ, 11, { charLengthMask: i })).toEqual([ 446 | womanRunningZWJ + womanRunningZWJ, 447 | womanRunningZWJ + womanRunningZWJ, 448 | womanRunningZWJ + womanRunningZWJ, 449 | ]); 450 | } 451 | expect( 452 | chunk( 453 | `12123123 1231231 312312312 123 12 ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`, 454 | 12, 455 | { charLengthMask: 2 } 456 | ) 457 | ).toEqual([ 458 | '12123123', 459 | '1231231', 460 | '312312312', 461 | '123 12', 462 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}`, 463 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`, 464 | `${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`, 465 | ]); 466 | 467 | // one woman runner emoji with a colour is seven bytes, or five characters 468 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 469 | const runner = '🏃🏽‍♀️'; 470 | expect(chunk(runner + runner + runner, 4, { charLengthMask: 2 })).toEqual([ 471 | runner + runner, 472 | runner, 473 | ]); 474 | expect( 475 | chunk( 476 | `12123123 1231231 312312312 123 12 ${runner}${runner}${runner}${runner}${runner}${runner} ${runner}${runner}${runner}${runner} ${runner} ${runner}${runner} ${runner}`, 477 | 12, 478 | { charLengthMask: 2 } 479 | ) 480 | ).toEqual([ 481 | '12123123', 482 | '1231231', 483 | '312312312', 484 | '123 12', 485 | `${runner}${runner}${runner}${runner}${runner}${runner}`, 486 | `${runner}${runner}${runner}${runner} ${runner}`, 487 | `${runner}${runner} ${runner}`, 488 | ]); 489 | }); 490 | 491 | it('should count N-byte characters with charLengthMask value 0 the same as charLengthMask value N', () => { 492 | // each of these characters is two bytes 493 | const camembert = '🧀🧀🧀🧀 🧀🧀🧀🧀'; 494 | expect(chunk(camembert, 8, { charLengthMask: 2 })).toEqual( 495 | chunk(camembert, 8, { charLengthMask: 0 }) 496 | ); 497 | 498 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, ‍ Zero Width Joiner and ♀ Female Sign. 499 | // each of these characters is five bytes 500 | const womanRunningZWJ = '🏃‍♀️'; 501 | const womenRunningZWJ = `${ 502 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ 503 | } ${womanRunningZWJ + womanRunningZWJ}`; 504 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: 0 })).toEqual( 505 | chunk(womenRunningZWJ, 2, { charLengthMask: 5 }) 506 | ); 507 | 508 | // one woman runner emoji with a colour is seven bytes, or five characters 509 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 510 | const runner = '🏃🏽‍♀️'; 511 | const runners = runner + runner + runner; 512 | expect(chunk(runners, 2, { charLengthMask: 0 })).toEqual( 513 | chunk(runners, 2, { charLengthMask: 7 }) 514 | ); 515 | }); 516 | 517 | it('should count default charLengthMask the same as charLengthMask value -1', () => { 518 | // each of these characters is two bytes 519 | const chineseText = '𤻪𬜬𬜯'; 520 | expect(chunk(chineseText, 2)).toEqual( 521 | chunk(chineseText, 2, { charLengthMask: -1 }) 522 | ); 523 | expect(chunk(chineseText, 1)).toEqual( 524 | chunk(chineseText, 1, { charLengthMask: -1 }) 525 | ); 526 | 527 | // each of these characters is two bytes 528 | const camembert = '🧀🧀🧀🧀 🧀🧀🧀🧀'; 529 | expect(chunk(camembert, 4)).toEqual( 530 | chunk(camembert, 4, { charLengthMask: -1 }) 531 | ); 532 | 533 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, ‍ Zero Width Joiner and ♀ Female Sign. 534 | // each of these characters is five bytes 535 | const womanRunningZWJ = '🏃‍♀️'; 536 | const womenRunningZWJ = `${ 537 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ 538 | } ${womanRunningZWJ + womanRunningZWJ}`; 539 | expect(chunk(womenRunningZWJ, 2)).toEqual( 540 | chunk(womenRunningZWJ, 2, { charLengthMask: -1 }) 541 | ); 542 | 543 | // one woman runner emoji with a colour is seven bytes, or five characters 544 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 545 | const runner = '🏃🏽‍♀️'; 546 | const runners = runner + runner + runner; 547 | expect(chunk(runners, 2)).toEqual(chunk(runners, 2, { charLengthMask: -1 })); 548 | }); 549 | 550 | it('should not cut combined characters', () => { 551 | // one woman runner emoji with a colour is seven bytes, or five characters 552 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 553 | const runner = '🏃🏽‍♀️'; 554 | const runners = runner + runner + runner; 555 | expect(chunk(runners, 3)).toEqual([runners]); 556 | expect(chunk(runners, 1)).toEqual([runner, runner, runner]); 557 | 558 | // FLAG + RAINBOW 559 | const flag = '🏳️‍🌈'; 560 | const flags = flag + flag; 561 | expect(chunk(flags, 1)).toEqual([flag, flag]); 562 | }); 563 | 564 | it('allows alternate TextEncoder', () => { 565 | // one woman runner emoji with a colour is seven bytes, or five characters 566 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15 567 | // 7 each as length, 17 each as TextEncoder 568 | // 21 as length, 51 as TextEncoder 569 | const runners = '🏃🏽‍♀️🏃🏽‍♀️🏃🏽‍♀️'; 570 | 571 | expect(() => { 572 | chunk(runners, 14, { charLengthMask: 0, charLengthType: 'TextEncoder' }); 573 | }).toThrow( 574 | new ReferenceError( 575 | "TextEncoder is not natively defined, new TextEncoder must be passed in with the 'chunkOptions.textEncoder' property." 576 | ) 577 | ); 578 | 579 | expect( 580 | chunk(runners, 51, { 581 | charLengthMask: 0, 582 | charLengthType: 'TextEncoder', 583 | textEncoder: new TextEncoder(), 584 | }) 585 | ).toEqual(chunk(runners, 21, { charLengthMask: 0 })); 586 | 587 | // FLAG + RAINBOW 588 | // 2 each as length, 4 each as TextEncoder 589 | // 4 as length, 8 as TextEncoder 590 | // Node v14.5.0 does not provide TextEncoder natively. 591 | const flags = '🏳️‍🌈🏳️‍🌈'; 592 | 593 | expect( 594 | chunk(flags, 4, { 595 | charLengthMask: 0, 596 | charLengthType: 'TextEncoder', 597 | textEncoder: new TextEncoder(), 598 | }) 599 | ).toEqual(chunk(flags, 2, { charLengthMask: 0 })); 600 | 601 | expect( 602 | chunk(flags, 999, { 603 | charLengthMask: 0, 604 | charLengthType: 'TextEncoder', 605 | textEncoder: { 606 | encode: () => ({ length: 999 }), 607 | }, 608 | }) 609 | ).toEqual(chunk(flags, 2, { charLengthMask: 0 })); 610 | }); 611 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import runes from 'runes'; 2 | 3 | const assertIsValidText = (text) => { 4 | if (typeof text !== 'string') { 5 | throw new TypeError( 6 | 'Text should be provided as first argument and be a string.' 7 | ); 8 | } 9 | }; 10 | 11 | const assertIsValidChunkSize = (chunkSize) => { 12 | if (Number.isNaN(chunkSize) || Number.parseInt(chunkSize, 10) <= 0) { 13 | throw new TypeError( 14 | 'Size should be provided as 2nd argument and parseInt to a value greater than zero.' 15 | ); 16 | } 17 | }; 18 | 19 | const assertIsValidChunkOptions = (chunkOptions) => { 20 | if ( 21 | typeof chunkOptions !== 'object' && 22 | typeof chunkOptions !== 'undefined' && 23 | chunkOptions !== null && 24 | chunkOptions !== '' 25 | ) { 26 | throw new TypeError( 27 | 'Options should be provided as 3rd (optional) argument and be an object.\n' + 28 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'textEncoder']" 29 | ); 30 | } 31 | }; 32 | 33 | const assertIsValidCharLengthMask = ( 34 | charLengthMask, 35 | charLengthMaskIntParseIntNaN, 36 | charLengthMaskInt 37 | ) => { 38 | if (charLengthMaskIntParseIntNaN || charLengthMaskInt < -1) { 39 | throw new TypeError( 40 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.' 41 | ); 42 | } 43 | }; 44 | 45 | const assertIsValidTextEncoder = (textEncoder) => { 46 | if ( 47 | typeof textEncoder === 'string' || 48 | Array.isArray(textEncoder) || 49 | typeof textEncoder === 'undefined' || 50 | textEncoder === null 51 | ) { 52 | throw new TypeError( 53 | 'textEncoder should be provided as a chunkOptions property and be an object containing the .encode(text).length property.' 54 | ); 55 | } 56 | }; 57 | 58 | const assertIsValidCharLengthType = (charLengthType) => { 59 | if ( 60 | typeof charLengthType !== 'string' || 61 | !(charLengthType === 'length' || charLengthType === 'TextEncoder') 62 | ) { 63 | throw new TypeError( 64 | "charLengthType should be provided as a chunkOptions property and be a value in ['length', 'TextEncoder']" 65 | ); 66 | } 67 | }; 68 | 69 | const chunkLength = ( 70 | characters, 71 | charLengthMask, 72 | charLengthType, 73 | textEncoder 74 | ) => { 75 | let length; 76 | if ( 77 | typeof characters === 'undefined' || 78 | characters === null || 79 | characters === '' 80 | ) { 81 | length = -1; 82 | } else { 83 | let charactersArray; 84 | if (typeof characters === 'string') { 85 | charactersArray = [characters]; 86 | } else if (Array.isArray(characters) && characters.length) { 87 | charactersArray = characters; 88 | } 89 | 90 | if ( 91 | !Array.isArray(charactersArray) || 92 | !charactersArray.length || 93 | charactersArray === null 94 | ) { 95 | length = -1; 96 | } else if (charLengthMask === 0) { 97 | length = charactersArray 98 | .map( 99 | (character) => 100 | (charLengthType === 'TextEncoder' 101 | ? textEncoder.encode(character) 102 | : character 103 | ).length 104 | ) 105 | .reduce((accumulator, currentValue) => accumulator + currentValue); 106 | } else if (charLengthMask > 0) { 107 | const arrayLength = charactersArray 108 | .map( 109 | (character) => 110 | (charLengthType === 'TextEncoder' 111 | ? textEncoder.encode(character) 112 | : character 113 | ).length 114 | ) 115 | .reduce( 116 | (accumulator, currentValue) => 117 | accumulator + 118 | (currentValue > charLengthMask ? charLengthMask : currentValue) 119 | ); 120 | const maxLength = charactersArray.length * charLengthMask; 121 | length = maxLength > arrayLength ? arrayLength : maxLength; 122 | } else { 123 | length = charactersArray.length; 124 | } 125 | } 126 | return length; 127 | }; 128 | 129 | const lastSpaceOrLength = (text, upTo) => { 130 | let lastIndex = text.lastIndexOf(' ', upTo); 131 | if (lastIndex === -1) { 132 | lastIndex = upTo; 133 | } 134 | if (lastIndex > text.length || upTo >= text.length) { 135 | lastIndex = text.length; 136 | } 137 | return lastIndex; 138 | }; 139 | 140 | const chunkIndexOf = ( 141 | characters, 142 | chunkSize, 143 | charLengthMask, 144 | charLengthType, 145 | textEncoder 146 | ) => { 147 | let splitAt = lastSpaceOrLength(characters, chunkSize); 148 | 149 | while ( 150 | splitAt > 0 && 151 | chunkSize < 152 | chunkLength( 153 | characters.slice(0, splitAt), 154 | charLengthMask, 155 | charLengthType, 156 | textEncoder 157 | ) 158 | ) { 159 | splitAt = splitAt - 1; 160 | } 161 | splitAt = lastSpaceOrLength(characters, splitAt); 162 | if ((splitAt > -2 && splitAt < 1) || characters[splitAt] === ' ') { 163 | splitAt = splitAt + 1; 164 | } 165 | if ( 166 | splitAt > characters.length || 167 | splitAt < 0 || 168 | (splitAt === 0 && characters.length === 1) 169 | ) { 170 | splitAt = characters.length; 171 | } 172 | return splitAt; 173 | }; 174 | 175 | export default (text, chunkSize, chunkOptions) => { 176 | assertIsValidText(text); 177 | const chunkSizeInt = Number.parseInt(chunkSize, 10); 178 | assertIsValidChunkSize(chunkSizeInt); 179 | assertIsValidChunkOptions(chunkOptions); 180 | 181 | let charLengthMaskInt = -1; 182 | let charLengthMaskIntParseInt = -1; 183 | let charLengthMaskIntParseIntNaN = true; 184 | let textEncoderObject; 185 | if (typeof chunkOptions === 'object') { 186 | if (Object.prototype.hasOwnProperty.call(chunkOptions, 'charLengthMask')) { 187 | charLengthMaskInt = chunkOptions.charLengthMask; 188 | charLengthMaskIntParseInt = Number.parseInt(charLengthMaskInt, 10); 189 | charLengthMaskIntParseIntNaN = Number.isNaN(charLengthMaskIntParseInt); 190 | assertIsValidCharLengthMask( 191 | charLengthMaskInt, 192 | charLengthMaskIntParseIntNaN, 193 | charLengthMaskIntParseInt 194 | ); 195 | } 196 | if (Object.prototype.hasOwnProperty.call(chunkOptions, 'charLengthType')) { 197 | assertIsValidCharLengthType(chunkOptions.charLengthType); 198 | if (chunkOptions.charLengthType === 'TextEncoder') { 199 | if (Object.prototype.hasOwnProperty.call(chunkOptions, 'textEncoder')) { 200 | assertIsValidTextEncoder(chunkOptions.textEncoder); 201 | textEncoderObject = chunkOptions.textEncoder; 202 | } 203 | } 204 | } 205 | } 206 | const charLengthMask = charLengthMaskIntParseIntNaN 207 | ? -1 208 | : charLengthMaskIntParseInt; 209 | const charLengthType = 210 | typeof chunkOptions === 'object' && chunkOptions.charLengthType 211 | ? chunkOptions.charLengthType 212 | : 'length'; 213 | try { 214 | if ( 215 | charLengthType === 'TextEncoder' && 216 | (typeof textEncoderObject === 'undefined' || 217 | textEncoderObject === null || 218 | textEncoderObject === '') 219 | ) { 220 | textEncoderObject = new TextEncoder(); 221 | } 222 | } catch (ex) { 223 | throw new ReferenceError( 224 | "TextEncoder is not natively defined, new TextEncoder must be passed in with the 'chunkOptions.textEncoder' property." 225 | ); 226 | } 227 | const textEncoder = textEncoderObject; 228 | const chunks = []; 229 | let characters = runes(text); 230 | while ( 231 | chunkLength(characters, charLengthMask, charLengthType, textEncoder) > 0 232 | ) { 233 | const splitAt = chunkIndexOf( 234 | characters, 235 | chunkSizeInt, 236 | charLengthMask, 237 | charLengthType, 238 | textEncoder 239 | ); 240 | const chunk = characters.slice(0, splitAt).join('').trim(); 241 | if (chunk !== '' && chunk !== null) { 242 | chunks.push(chunk); 243 | } 244 | characters = characters.slice(splitAt); 245 | } 246 | return chunks; 247 | }; 248 | --------------------------------------------------------------------------------