├── .babelrc
├── .eslintrc.js
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── README.md
├── bin
├── release.sh
└── server.js
├── package.json
├── src
├── __tests__
│ └── index.js
└── index.js
└── yarn.lock
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": ["@babel/preset-env"],
3 | "plugins": [
4 | "add-module-exports"
5 | ]
6 | }
7 |
--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | extends: 'algolia',
3 | "env": {
4 | "jest": true
5 | }
6 | };
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | npm-debug.log
3 | dist/
4 | coverage/
5 | package-lock.json
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 | - node
4 | notifications:
5 | email: false
6 | cache:
7 | yarn: true
8 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | ## [2.0.1](https://github.com/algolia/chunk-text/compare/v1.0.5...v2.0.1) (2020-09-14)
3 |
4 | ### Features
5 |
6 | * enhanced multi-byte character support using `TextEncoder` (#8) ([dab7672](https://github.com/algolia/chunk-text/commit/dab7672))
7 |
8 |
9 | ## [1.0.5](https://github.com/algolia/chunk-text/compare/v1.0.4...v1.0.5) (2017-07-19)
10 |
11 | Fixed the build.
12 |
13 |
14 | ## [1.0.4](https://github.com/algolia/chunk-text/compare/v1.0.3...v1.0.4) (2017-06-27)
15 |
16 |
17 | ### Bug Fixes
18 |
19 | * add support for multibyte characters (#2) ([1398956](https://github.com/algolia/chunk-text/commit/1398956))
20 |
21 |
22 | ### Features
23 |
24 | * **test:** add travis (#4) ([c88466b](https://github.com/algolia/chunk-text/commit/c88466b))
25 |
26 |
27 |
28 |
29 | ## [1.0.3](https://github.com/algolia/chunk-text/compare/v1.0.2...v1.0.3) (2017-06-19)
30 |
31 |
32 |
33 |
34 | ## [1.0.2](https://github.com/algolia/chunk-text/compare/v1.0.1...v1.0.2) (2017-06-18)
35 |
36 |
37 | ### Bug Fixes
38 |
39 | * remove default key when requiring in nodejs ([0414419](https://github.com/algolia/chunk-text/commit/0414419))
40 |
41 |
42 |
43 |
44 | ## [1.0.1](https://github.com/algolia/chunk-text/compare/v1.0.0...v1.0.1) (2017-06-18)
45 |
46 |
47 |
48 |
49 | # 1.0.0 (2017-06-18)
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Chunk Text
2 | ===
3 |
4 | > chunk/split a string by length without cutting/truncating words.
5 |
6 |
7 | ``` javascript
8 | const out = chunk('hello world how are you?', 7);
9 | /* ['hello', 'world', 'how are', 'you?'] */
10 | ```
11 |
12 |
13 | ## Installation
14 |
15 | ``` bash
16 | $ npm install chunk-text
17 | # yarn add chunk-text
18 | ```
19 |
20 |
21 | ## Usage
22 |
23 | All number values are parsed according to `Number.parseInt`.
24 |
25 | ``` javascript
26 | const chunk = require('chunk-text');
27 | ```
28 |
29 | #### chunk(text, chunkSize);
30 |
31 | Chunks the `text` string into an array of strings that each have a maximum length of `chunkSize`.
32 |
33 | ``` javascript
34 | const out = chunk('hello world how are you?', 7);
35 | /* ['hello', 'world', 'how are', 'you?'] */
36 | ```
37 |
38 | If no space is detected before `chunkSize` is reached, then it will truncate the word to always
39 | ensure the resulting text chunks have at maximum a length of `chunkSize`.
40 |
41 | ``` javascript
42 | const out = chunk('hello world', 4);
43 | /* ['hell', 'o', 'worl', 'd'] */
44 | ```
45 |
46 | #### chunk(text, chunkSize, chunkOptions);
47 |
48 | Chunks the `text` string into an array of strings that each have a maximum length of `chunkSize`, as determined by `chunkOptions.charLengthMask`.
49 |
50 | The default behavior if `chunkOptions.charLengthMask` is excluded is equal to `chunkOptions.charLengthMask=-1`.
51 |
52 | For single-byte characters, `chunkOptions.charLengthMask` never changes the results.
53 |
54 | For multi-byte characters, `chunkOptions.charLengthMask` allows awareness of multi-byte glyphs according to the following table:
55 |
56 | | `chunkOptions.charLengthMask` | result |
57 | |-------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
58 | | -1 | - same as default, same as `chunkOptions.charLengthMask=1`
- each character counts as 1 towards length |
59 | | 0 | - each character counts as the number of bytes it contains |
60 | | >0 | - each character counts as the number of bytes it contains, up to a limit of `chunkOptions.charLengthMask=N`
- a 7-byte ZWJ emoji such as runningPerson+ZWJ+femaleSymbol (🏃🏽♀️) counts as 2, when `chunkOptions.charLengthMask=2` |
61 |
62 | You can also substitute from the default `chunkOptions.charLengthType` property of `length` to `TextEncoder`.
63 |
64 | This enables you to pass any object to `chunkOptions.textEncoder` which matches the signature, `chunkOptions.textEncoder.encode(text).length`
65 |
66 | If your environment natively contains the `TextEncoder` prototype and `chunkOptions.textEncoder` isn't provided,
67 |
68 | the module attempts `new TextEncoder()` in order to use this `chunkOptions.charLengthType`.
69 |
70 | If
71 |
72 | - `chunkOptions.charLengthType` is set to `TextEncoder`.
73 | - `chunkOptions.textEncoder` isn't provided.
74 | - `TextEncoder` prototype isn't provided by the environment.
75 |
76 | Then
77 |
78 | - `ReferenceError` will occur.
79 |
80 | End If
81 |
82 | ``` javascript
83 | // one woman runner emoji with a colour is seven bytes, or five characters
84 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
85 | // (actually encodes to 17)
86 | const runner = '🏃🏽♀️';
87 |
88 | const outDefault = chunk(runner+runner+runner, 4);
89 | /* [ '🏃🏽♀️🏃🏽♀️🏃🏽♀️' ] */
90 |
91 | const outZero = chunk(runner+runner+runner, 4, { charLengthMask: 0 });
92 | /* [ '🏃🏽♀️', '🏃🏽♀️', '🏃🏽♀️' ] */
93 |
94 | const outTwo = chunk(runner+runner+runner, 4, { charLengthMask: 2 });
95 | /* [ '🏃🏽♀️🏃🏽♀️', '🏃🏽♀️' ] */
96 |
97 | // FLAG + RAINBOW
98 | // 2 each as length, 4 each as TextEncoder
99 | // 4 as length, 8 as TextEncoder
100 | // Node v14.5.0 does not provide TextEncoder natively.
101 | const flags = '🏳️🌈🏳️🌈';
102 |
103 | // \/ will fail if your environment doesn't already have TextEncoder prototype \/
104 | chunk(flags, 8, { charLengthMask: 0, charLengthType: 'TextEncoder' });
105 | // [ '🏳️🌈', '🏳️🌈' ]
106 | // /\ will fail if your environment doesn't already have TextEncoder prototype /\
107 |
108 | chunk(flags, 4, {
109 | charLengthMask: 0,
110 | charLengthType: 'TextEncoder',
111 | textEncoder: new TextEncoder(),
112 | })
113 | // [ '🏳️🌈', '🏳️🌈' ]
114 |
115 | chunk(flags, 999, {
116 | charLengthMask: 0,
117 | charLengthType: 'TextEncoder',
118 | textEncoder: {
119 | encode: () => ({ length: 999 }),
120 | },
121 | })
122 | // [ '🏳️🌈', '🏳️🌈' ]
123 | ```
124 |
125 | ## Usage in Algolia context
126 |
127 | This library was created by [Algolia](https://www.algolia.com/) to ease
128 | the optimizing of record payload sizes resulting in faster search responses from the API.
129 |
130 | In general, there is always a unique large "content attribute" per record,
131 | and this packages will allow to chunk that content into small chunks of text.
132 |
133 | The text chunks can then be [distributed over multiple records](https://www.algolia.com/doc/faq/basics/how-do-i-reduce-the-size-of-my-records/#faq-section).
134 |
135 | Here is an example of how to split an existing record into several ones:
136 |
137 | ``` javascript
138 | var chunk = require('chunk-text');
139 | var record = {
140 | post_id: 100,
141 | content: 'A large chunk of text here'
142 | };
143 |
144 | var chunks = chunk(record.content, 600); // Limit the chunk size to a length of 600.
145 | var records = [];
146 | chunks.forEach(function(content) {
147 | records.push(Object.assign({}, record, {content: content}));
148 | });
149 | ```
150 |
--------------------------------------------------------------------------------
/bin/release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -eu
3 |
4 | readonly CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
5 | if [ "$CURRENT_BRANCH" != master ]; then
6 | echo "You must be on 'master' branch to publish a release, aborting..."
7 | exit 1
8 | fi
9 |
10 | if ! git diff-index --quiet HEAD --; then
11 | echo "Working tree is not clean, aborting..."
12 | exit 1
13 | fi
14 |
15 | if ! yarn run build; then
16 | echo "Failed to build dist files, aborting..."
17 | exit 1
18 | fi
19 |
20 | if ! yarn test; then
21 | echo "Tests failed, aborting..."
22 | exit 1
23 | fi
24 |
25 | yarn run changelog:unreleased
26 |
27 | # Only update the package.json version
28 | # We need to update changelog before tagging
29 | # And publishing.
30 | yarn version --no-git-tag-version
31 |
32 | if ! yarn run changelog; then
33 | echo "Failed to update changelog, aborting..."
34 | exit 1
35 | fi
36 |
37 | yarn
38 | yarn build
39 |
40 | readonly PACKAGE_VERSION=$(< package.json grep version \
41 | | head -1 \
42 | | awk -F: '{ print $2 }' \
43 | | sed 's/[",]//g' \
44 | | tr -d '[:space:]')
45 |
46 | # Gives user a chance to review and eventually abort.
47 | git add --patch
48 |
49 | git commit --message="chore(release): v${PACKAGE_VERSION}"
50 |
51 | git push origin HEAD
52 |
53 | npm publish
54 |
55 | git tag "v$PACKAGE_VERSION"
56 | git push --tags
57 |
58 | echo "Pushed package to npm, and also pushed 'v$PACKAGE_VERSION' tag to git repository."
59 |
--------------------------------------------------------------------------------
/bin/server.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | const chunk = require('../dist/index.js');
3 | console.log(chunk(process.argv[2], Number.parseInt(process.argv[3], 10), typeof process.argv[4] !== 'undefined' && process.argv[4] !== null && process.argv[4] !== '' ? JSON.parse(process.argv[4]) : ''));
4 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "chunk-text",
3 | "version": "2.0.1",
4 | "description": "🔪 chunk/split a string by length without cutting/truncating words.",
5 | "type": "commonJs",
6 | "module": "./dist/index.js",
7 | "main": "./dist/index.js",
8 | "exports": {
9 | "import": "./dist/index.js",
10 | "default": "./dist/index.js",
11 | "chunk": "./dist/server.js",
12 | "chunk-text": "./dist/server.js"
13 | },
14 | "repository": "https://github.com/algolia/chunk-text",
15 | "author": "Raymond RUTJES ",
16 | "license": "MIT",
17 | "files": [
18 | "dist"
19 | ],
20 | "bin": {
21 | "chunk": "./bin/server.js",
22 | "chunk-text": "./bin/server.js"
23 | },
24 | "scripts": {
25 | "test": "npm run-script lint && jest",
26 | "build": "babel src --out-dir dist --delete-dir-on-start --ignore \"**/__tests__/*\"",
27 | "lint": "eslint src",
28 | "lint:fix": "npm run-script lint --fix",
29 | "changelog": "conventional-changelog --preset angular --infile CHANGELOG.md --same-file",
30 | "changelog:unreleased": "conventional-changelog --preset angular --output-unreleased",
31 | "start": "node --unhandled-rejections=strict --trace-warnings ./bin/server.js",
32 | "chunk": "npm run-script start",
33 | "chunk-text": "npm run-script start",
34 | "prepublishOnly": "npm run-script build"
35 | },
36 | "devDependencies": {
37 | "@babel/cli": "^7.10.5",
38 | "@babel/core": "^7.10.5",
39 | "@babel/preset-env": "^7.10.4",
40 | "@babel/runtime": "^7.10.5",
41 | "babel-core": "6.26.3",
42 | "babel-eslint": "10.1.0",
43 | "babel-jest": "^26.1.0",
44 | "babel-loader": "^8.1.0",
45 | "babel-plugin-add-module-exports": "^1.0.2",
46 | "conventional-changelog-cli": "^2.0.34",
47 | "eslint": "7.5.0",
48 | "eslint-config-algolia": "^16.0.0",
49 | "eslint-config-prettier": "^6.11.0",
50 | "eslint-plugin-eslint-comments": "^3.2.0",
51 | "eslint-plugin-import": "^2.3.0",
52 | "eslint-plugin-jest": "^23.18.0",
53 | "eslint-plugin-prettier": "^3.1.4",
54 | "fastestsmallesttextencoderdecoder-encodeinto": "^1.0.22",
55 | "jest": "^26.1.0",
56 | "prettier": "^2.0.5"
57 | },
58 | "dependencies": {
59 | "runes": "^0.4.3"
60 | },
61 | "keywords": [
62 | "chunk-text",
63 | "split",
64 | "chunk",
65 | "algolia",
66 | "text",
67 | "string",
68 | "array",
69 | "length",
70 | "index",
71 | "size",
72 | "splice",
73 | "slice",
74 | "text-processing",
75 | "text processing",
76 | "multi-byte",
77 | "multibyte",
78 | "multi",
79 | "byte",
80 | "runes",
81 | "rune",
82 | "glyphs",
83 | "glyph",
84 | "encoding",
85 | "emoji",
86 | "MIT"
87 | ]
88 | }
89 |
--------------------------------------------------------------------------------
/src/__tests__/index.js:
--------------------------------------------------------------------------------
1 | import chunk from '../index';
2 | import { TextEncoder } from 'fastestsmallesttextencoderdecoder-encodeinto';
3 | it("should throw if 'text' is missing or its type or value are invalid.", () => {
4 | expect(() => {
5 | chunk();
6 | }).toThrow(
7 | new TypeError('Text should be provided as first argument and be a string.')
8 | );
9 | });
10 |
11 | it("should throw if 'size' is missing or its type or value are invalid.", () => {
12 | expect(() => {
13 | chunk('hello world');
14 | }).toThrow(
15 | new TypeError(
16 | 'Size should be provided as 2nd argument and parseInt to a value greater than zero.'
17 | )
18 | );
19 | expect(() => {
20 | chunk('hello world', 0);
21 | }).toThrow(
22 | new TypeError(
23 | 'Size should be provided as 2nd argument and parseInt to a value greater than zero.'
24 | )
25 | );
26 | });
27 |
28 | it("should throw if 'type' argument's type or value is invalid.", () => {
29 | expect(() => {
30 | chunk('hello world', 1, { charLengthMask: 'one' });
31 | }).toThrow(
32 | new TypeError(
33 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
34 | )
35 | );
36 | expect(() => {
37 | chunk('hello world', 1, { charLengthMask: -2.001 });
38 | }).toThrow(
39 | new TypeError(
40 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
41 | )
42 | );
43 | expect(() => {
44 | chunk('hello world', 1, { charLengthMask: -2 });
45 | }).toThrow(
46 | new TypeError(
47 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
48 | )
49 | );
50 | expect(() => {
51 | chunk('hello world', 1, { charLengthMask: 3 });
52 | }).not.toThrow(
53 | new TypeError(
54 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
55 | )
56 | );
57 | expect(() => {
58 | chunk('hello world', 1, { charLengthMask: '3' });
59 | }).not.toThrow(
60 | new TypeError(
61 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
62 | )
63 | );
64 | });
65 |
66 | it("should not throw if 'type' type and value are missing or valid.", () => {
67 | expect(() => {
68 | chunk('hello world', 1, { charLengthMask: '' });
69 | }).toThrow(
70 | new TypeError(
71 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
72 | )
73 | );
74 | expect(() => {
75 | chunk('hello world', 1, { charLengthMask: null });
76 | }).toThrow(
77 | new TypeError(
78 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
79 | )
80 | );
81 | expect(() => {
82 | chunk('hello world', 1, { charLengthMask: undefined });
83 | }).toThrow(
84 | new TypeError(
85 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
86 | )
87 | );
88 | expect(() => {
89 | chunk('hello world', 1, {});
90 | }).not.toThrow(
91 | new TypeError(
92 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
93 | )
94 | );
95 | expect(() => {
96 | chunk('hello world', 1, { charLengthType: 'length' });
97 | }).not.toThrow(
98 | new TypeError(
99 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
100 | )
101 | );
102 | expect(() => {
103 | chunk('hello world', 1);
104 | }).not.toThrow(
105 | new TypeError(
106 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
107 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
108 | )
109 | );
110 | expect(() => {
111 | chunk('hello world', 1, { charLengthMask: -1.999 });
112 | }).not.toThrow(
113 | new TypeError(
114 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
115 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
116 | )
117 | );
118 | expect(() => {
119 | chunk('hello world', 1, { charLengthMask: -0.001 });
120 | }).not.toThrow(
121 | new TypeError(
122 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
123 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
124 | )
125 | );
126 | expect(() => {
127 | chunk('hello world', 1, { charLengthMask: 0.0 });
128 | }).not.toThrow(
129 | new TypeError(
130 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
131 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
132 | )
133 | );
134 | expect(() => {
135 | chunk('hello world', 1, { charLengthMask: 1.0 });
136 | }).not.toThrow(
137 | new TypeError(
138 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
139 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
140 | )
141 | );
142 | expect(() => {
143 | chunk('hello world', 1, { charLengthMask: new Number.BigInt(2.0) });
144 | }).not.toThrow(
145 | new TypeError(
146 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
147 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
148 | )
149 | );
150 | expect(() => {
151 | chunk('hello world', 1, { charLengthMask: 2.999 });
152 | }).not.toThrow(
153 | new TypeError(
154 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
155 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
156 | )
157 | );
158 | expect(() => {
159 | chunk('hello world', 1, { charLengthMask: '2.99999 years' });
160 | }).not.toThrow(
161 | new TypeError(
162 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
163 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
164 | )
165 | );
166 | expect(() => {
167 | chunk('hello world', 1, { charLengthMask: '2' });
168 | }).not.toThrow(
169 | new TypeError(
170 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
171 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'TextEncoder']"
172 | )
173 | );
174 | });
175 |
176 | it('should return an array of strings.', () => {
177 | const pieces = chunk('hello world', 5);
178 | expect(pieces).toEqual(['hello', 'world']);
179 | });
180 |
181 | it('should not cut in the middle of words', () => {
182 | const pieces = chunk('hello world how are you?', 7);
183 | expect(pieces).toEqual(['hello', 'world', 'how are', 'you?']);
184 | });
185 |
186 | it('should truncate a word if longer than size', () => {
187 | const pieces = chunk('hello you', 4);
188 | expect(pieces).toEqual(['hell', 'o', 'you']);
189 | });
190 |
191 | it('should count multi-byte characters as single characters by default', () => {
192 | // each of these characters is two bytes
193 | const chineseTextA = '𤻪';
194 | const chineseTextB = '𬜬';
195 | const chineseTextC = '𬜯';
196 | const chineseText = chineseTextA + chineseTextB + chineseTextC;
197 | expect(chunk(chineseText, 2)).toEqual([
198 | chineseTextA + chineseTextB,
199 | chineseTextC,
200 | ]);
201 | expect(chunk(chineseText, 1)).toEqual([
202 | chineseTextA,
203 | chineseTextB,
204 | chineseTextC,
205 | ]);
206 |
207 | // each of these characters is two bytes
208 | const fourCheese = '🧀🧀🧀🧀';
209 | const camembert = `${fourCheese} ${fourCheese}`;
210 | expect(chunk(camembert, 4)).toEqual([fourCheese, fourCheese]);
211 |
212 | // one woman runner emoji with a colour is seven bytes, or five characters
213 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
214 | const runner = '🏃🏽♀️';
215 | expect(
216 | chunk(runner + runner + runner + runner + runner + runner + runner, 3)
217 | ).toEqual([runner + runner + runner, runner + runner + runner, runner]);
218 | });
219 |
220 | it('should count all characters as single characters using charLengthMask -1 or 1 values', () => {
221 | // each of these characters is two bytes
222 | const chineseTextA = '𤻪';
223 | const chineseTextB = '𬜬';
224 | const chineseTextC = '𬜯';
225 | const chineseText = chineseTextA + chineseTextB + chineseTextC;
226 | expect(chunk(chineseText, 2, { charLengthMask: -1 })).toEqual([
227 | chineseTextA + chineseTextB,
228 | chineseTextC,
229 | ]);
230 | expect(chunk(chineseText, 1, { charLengthMask: -1 })).toEqual([
231 | chineseTextA,
232 | chineseTextB,
233 | chineseTextC,
234 | ]);
235 | expect(chunk(chineseText, 2, { charLengthMask: 1 })).toEqual([
236 | chineseTextA + chineseTextB,
237 | chineseTextC,
238 | ]);
239 | expect(chunk(chineseText, 1, { charLengthMask: 1 })).toEqual([
240 | chineseTextA,
241 | chineseTextB,
242 | chineseTextC,
243 | ]);
244 |
245 | // each of these characters is two bytes
246 | const fourCheese = '🧀🧀🧀🧀';
247 | const camembert = `${fourCheese} ${fourCheese}`;
248 | expect(chunk(camembert, 4, { charLengthMask: -1 })).toEqual([
249 | fourCheese,
250 | fourCheese,
251 | ]);
252 | expect(chunk(camembert, 4, { charLengthMask: 1 })).toEqual([
253 | fourCheese,
254 | fourCheese,
255 | ]);
256 |
257 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, Zero Width Joiner and ♀ Female Sign.
258 | // each of these characters is five bytes
259 | const womanRunningZWJ = '🏃♀️';
260 | const womenRunningZWJ = `${
261 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ
262 | } ${womanRunningZWJ + womanRunningZWJ}`;
263 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: -1 })).toEqual([
264 | womanRunningZWJ + womanRunningZWJ,
265 | womanRunningZWJ + womanRunningZWJ,
266 | womanRunningZWJ + womanRunningZWJ,
267 | ]);
268 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: 1 })).toEqual([
269 | womanRunningZWJ + womanRunningZWJ,
270 | womanRunningZWJ + womanRunningZWJ,
271 | womanRunningZWJ + womanRunningZWJ,
272 | ]);
273 | });
274 |
275 | it('should count characters as bytes using charLengthMask value 0', () => {
276 | // each of these characters is two bytes
277 | const chineseTextA = '𤻪';
278 | const chineseTextB = '𬜬';
279 | const chineseTextC = '𬜯';
280 | const chineseText = chineseTextA + chineseTextB + chineseTextC;
281 | expect(chunk(chineseText, 2, { charLengthMask: 0 })).toEqual([
282 | chineseTextA,
283 | chineseTextB,
284 | chineseTextC,
285 | ]);
286 | expect(chunk(chineseText, 1, { charLengthMask: 0 })).toEqual([
287 | chineseTextA,
288 | chineseTextB,
289 | chineseTextC,
290 | ]);
291 | expect(chunk(chineseText, 4, { charLengthMask: 0 })).toEqual([
292 | chineseTextA + chineseTextB,
293 | chineseTextC,
294 | ]);
295 | expect(chunk(chineseText, 6, { charLengthMask: 0 })).toEqual([
296 | chineseTextA + chineseTextB + chineseTextC,
297 | ]);
298 |
299 | // each of these characters is two bytes
300 | const twoCheese = '🧀🧀';
301 | const camembert = `${twoCheese + twoCheese} ${twoCheese + twoCheese}`;
302 | expect(chunk(camembert, 4, { charLengthMask: 0 })).toEqual([
303 | twoCheese,
304 | twoCheese,
305 | twoCheese,
306 | twoCheese,
307 | ]);
308 |
309 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, Zero Width Joiner and ♀ Female Sign.
310 | // each of these characters is five bytes
311 | const womanRunningZWJ = '🏃♀️';
312 | const womenRunningZWJ = `${
313 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ
314 | } ${womanRunningZWJ + womanRunningZWJ}`;
315 | expect(chunk(womenRunningZWJ, 10, { charLengthMask: 0 })).toEqual([
316 | womanRunningZWJ + womanRunningZWJ,
317 | womanRunningZWJ + womanRunningZWJ,
318 | womanRunningZWJ + womanRunningZWJ,
319 | ]);
320 | expect(
321 | chunk(
322 | `12123123 1231231 312312312 123 12 ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`,
323 | 44,
324 | { charLengthMask: 0 }
325 | )
326 | ).toEqual([
327 | `12123123 1231231 312312312 123 12 ${womanRunningZWJ}`,
328 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}`,
329 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`,
330 | ]);
331 |
332 | // one woman runner emoji with a colour is seven bytes, or five characters
333 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
334 | const runner = '🏃🏽♀️';
335 | expect(chunk(runner + runner + runner, 17, { charLengthMask: 0 })).toEqual([
336 | runner + runner,
337 | runner,
338 | ]);
339 | expect(
340 | chunk(
341 | `12123123 1231231 312312312 123 12 ${runner}${runner}${runner} ${runner}${runner}${runner} ${runner}${runner}${runner}${runner} ${runner} ${runner}${runner} ${runner}`,
342 | 28,
343 | { charLengthMask: 0 }
344 | )
345 | ).toEqual([
346 | `12123123 1231231 312312312`,
347 | `123 12 ${runner}${runner}${runner}`,
348 | `${runner}${runner}${runner}`,
349 | `${runner}${runner}${runner}${runner}`,
350 | `${runner} ${runner}${runner}`,
351 | `${runner}`,
352 | ]);
353 | });
354 |
355 | it('should count single width characters the same with all charLengthMask values', () => {
356 | for (let i = 0; i < 100; i++) {
357 | expect(chunk('hello you', 4, { charLengthMask: i })).toEqual([
358 | 'hell',
359 | 'o',
360 | 'you',
361 | ]);
362 | }
363 | });
364 |
365 | it('should count characters as bytes up to maximum N charLengthMask value > 0', () => {
366 | // each of these characters is two bytes
367 | const chineseTextA = '𤻪';
368 | const chineseTextB = '𬜬';
369 | const chineseTextC = '𬜯';
370 | const chineseText = chineseTextA + chineseTextB + chineseTextC;
371 | expect(chunk(chineseText, 2, { charLengthMask: 2 })).toEqual([
372 | chineseTextA,
373 | chineseTextB,
374 | chineseTextC,
375 | ]);
376 | expect(chunk(chineseText, 4, { charLengthMask: 2 })).toEqual([
377 | chineseTextA + chineseTextB,
378 | chineseTextC,
379 | ]);
380 | expect(chunk(chineseText, 2, { charLengthMask: 1 })).toEqual([
381 | chineseTextA + chineseTextB,
382 | chineseTextC,
383 | ]);
384 |
385 | // each of these characters is two bytes
386 | const cheese = '🧀';
387 | const twoCheese = cheese + cheese;
388 | const camembert = `${twoCheese + twoCheese} ${twoCheese + twoCheese}`;
389 | expect(chunk(camembert, 4, { charLengthMask: 2 })).toEqual([
390 | twoCheese,
391 | twoCheese,
392 | twoCheese,
393 | twoCheese,
394 | ]);
395 | expect(chunk(camembert, 2, { charLengthMask: 4 })).toEqual([
396 | cheese,
397 | cheese,
398 | cheese,
399 | cheese,
400 | cheese,
401 | cheese,
402 | cheese,
403 | cheese,
404 | ]);
405 |
406 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, Zero Width Joiner and ♀ Female Sign.
407 | // each of these characters is five bytes
408 | const womanRunningZWJ = '🏃♀️';
409 | const womenRunningZWJ = `${
410 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ
411 | } ${womanRunningZWJ + womanRunningZWJ}`;
412 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: 0 })).toEqual([
413 | womanRunningZWJ,
414 | womanRunningZWJ,
415 | womanRunningZWJ,
416 | womanRunningZWJ,
417 | womanRunningZWJ,
418 | womanRunningZWJ,
419 | ]);
420 | for (let i = 2; i < 100; i++) {
421 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: i })).toEqual([
422 | womanRunningZWJ,
423 | womanRunningZWJ,
424 | womanRunningZWJ,
425 | womanRunningZWJ,
426 | womanRunningZWJ,
427 | womanRunningZWJ,
428 | ]);
429 | }
430 | expect(chunk(womenRunningZWJ, 4, { charLengthMask: 1 })).toEqual([
431 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ,
432 | womanRunningZWJ + womanRunningZWJ,
433 | ]);
434 | expect(chunk(womenRunningZWJ, 4, { charLengthMask: 2 })).toEqual([
435 | womanRunningZWJ + womanRunningZWJ,
436 | womanRunningZWJ + womanRunningZWJ,
437 | womanRunningZWJ + womanRunningZWJ,
438 | ]);
439 | expect(chunk(womenRunningZWJ, 8, { charLengthMask: 4 })).toEqual([
440 | womanRunningZWJ + womanRunningZWJ,
441 | womanRunningZWJ + womanRunningZWJ,
442 | womanRunningZWJ + womanRunningZWJ,
443 | ]);
444 | for (let i = 9; i < 100; i++) {
445 | expect(chunk(womenRunningZWJ, 11, { charLengthMask: i })).toEqual([
446 | womanRunningZWJ + womanRunningZWJ,
447 | womanRunningZWJ + womanRunningZWJ,
448 | womanRunningZWJ + womanRunningZWJ,
449 | ]);
450 | }
451 | expect(
452 | chunk(
453 | `12123123 1231231 312312312 123 12 ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ} ${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`,
454 | 12,
455 | { charLengthMask: 2 }
456 | )
457 | ).toEqual([
458 | '12123123',
459 | '1231231',
460 | '312312312',
461 | '123 12',
462 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}`,
463 | `${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`,
464 | `${womanRunningZWJ}${womanRunningZWJ} ${womanRunningZWJ}`,
465 | ]);
466 |
467 | // one woman runner emoji with a colour is seven bytes, or five characters
468 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
469 | const runner = '🏃🏽♀️';
470 | expect(chunk(runner + runner + runner, 4, { charLengthMask: 2 })).toEqual([
471 | runner + runner,
472 | runner,
473 | ]);
474 | expect(
475 | chunk(
476 | `12123123 1231231 312312312 123 12 ${runner}${runner}${runner}${runner}${runner}${runner} ${runner}${runner}${runner}${runner} ${runner} ${runner}${runner} ${runner}`,
477 | 12,
478 | { charLengthMask: 2 }
479 | )
480 | ).toEqual([
481 | '12123123',
482 | '1231231',
483 | '312312312',
484 | '123 12',
485 | `${runner}${runner}${runner}${runner}${runner}${runner}`,
486 | `${runner}${runner}${runner}${runner} ${runner}`,
487 | `${runner}${runner} ${runner}`,
488 | ]);
489 | });
490 |
491 | it('should count N-byte characters with charLengthMask value 0 the same as charLengthMask value N', () => {
492 | // each of these characters is two bytes
493 | const camembert = '🧀🧀🧀🧀 🧀🧀🧀🧀';
494 | expect(chunk(camembert, 8, { charLengthMask: 2 })).toEqual(
495 | chunk(camembert, 8, { charLengthMask: 0 })
496 | );
497 |
498 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, Zero Width Joiner and ♀ Female Sign.
499 | // each of these characters is five bytes
500 | const womanRunningZWJ = '🏃♀️';
501 | const womenRunningZWJ = `${
502 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ
503 | } ${womanRunningZWJ + womanRunningZWJ}`;
504 | expect(chunk(womenRunningZWJ, 2, { charLengthMask: 0 })).toEqual(
505 | chunk(womenRunningZWJ, 2, { charLengthMask: 5 })
506 | );
507 |
508 | // one woman runner emoji with a colour is seven bytes, or five characters
509 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
510 | const runner = '🏃🏽♀️';
511 | const runners = runner + runner + runner;
512 | expect(chunk(runners, 2, { charLengthMask: 0 })).toEqual(
513 | chunk(runners, 2, { charLengthMask: 7 })
514 | );
515 | });
516 |
517 | it('should count default charLengthMask the same as charLengthMask value -1', () => {
518 | // each of these characters is two bytes
519 | const chineseText = '𤻪𬜬𬜯';
520 | expect(chunk(chineseText, 2)).toEqual(
521 | chunk(chineseText, 2, { charLengthMask: -1 })
522 | );
523 | expect(chunk(chineseText, 1)).toEqual(
524 | chunk(chineseText, 1, { charLengthMask: -1 })
525 | );
526 |
527 | // each of these characters is two bytes
528 | const camembert = '🧀🧀🧀🧀 🧀🧀🧀🧀';
529 | expect(chunk(camembert, 4)).toEqual(
530 | chunk(camembert, 4, { charLengthMask: -1 })
531 | );
532 |
533 | // The Woman Running emoji is a ZWJ sequence combining 🏃 Person Running, Zero Width Joiner and ♀ Female Sign.
534 | // each of these characters is five bytes
535 | const womanRunningZWJ = '🏃♀️';
536 | const womenRunningZWJ = `${
537 | womanRunningZWJ + womanRunningZWJ + womanRunningZWJ + womanRunningZWJ
538 | } ${womanRunningZWJ + womanRunningZWJ}`;
539 | expect(chunk(womenRunningZWJ, 2)).toEqual(
540 | chunk(womenRunningZWJ, 2, { charLengthMask: -1 })
541 | );
542 |
543 | // one woman runner emoji with a colour is seven bytes, or five characters
544 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
545 | const runner = '🏃🏽♀️';
546 | const runners = runner + runner + runner;
547 | expect(chunk(runners, 2)).toEqual(chunk(runners, 2, { charLengthMask: -1 }));
548 | });
549 |
550 | it('should not cut combined characters', () => {
551 | // one woman runner emoji with a colour is seven bytes, or five characters
552 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
553 | const runner = '🏃🏽♀️';
554 | const runners = runner + runner + runner;
555 | expect(chunk(runners, 3)).toEqual([runners]);
556 | expect(chunk(runners, 1)).toEqual([runner, runner, runner]);
557 |
558 | // FLAG + RAINBOW
559 | const flag = '🏳️🌈';
560 | const flags = flag + flag;
561 | expect(chunk(flags, 1)).toEqual([flag, flag]);
562 | });
563 |
564 | it('allows alternate TextEncoder', () => {
565 | // one woman runner emoji with a colour is seven bytes, or five characters
566 | // RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
567 | // 7 each as length, 17 each as TextEncoder
568 | // 21 as length, 51 as TextEncoder
569 | const runners = '🏃🏽♀️🏃🏽♀️🏃🏽♀️';
570 |
571 | expect(() => {
572 | chunk(runners, 14, { charLengthMask: 0, charLengthType: 'TextEncoder' });
573 | }).toThrow(
574 | new ReferenceError(
575 | "TextEncoder is not natively defined, new TextEncoder must be passed in with the 'chunkOptions.textEncoder' property."
576 | )
577 | );
578 |
579 | expect(
580 | chunk(runners, 51, {
581 | charLengthMask: 0,
582 | charLengthType: 'TextEncoder',
583 | textEncoder: new TextEncoder(),
584 | })
585 | ).toEqual(chunk(runners, 21, { charLengthMask: 0 }));
586 |
587 | // FLAG + RAINBOW
588 | // 2 each as length, 4 each as TextEncoder
589 | // 4 as length, 8 as TextEncoder
590 | // Node v14.5.0 does not provide TextEncoder natively.
591 | const flags = '🏳️🌈🏳️🌈';
592 |
593 | expect(
594 | chunk(flags, 4, {
595 | charLengthMask: 0,
596 | charLengthType: 'TextEncoder',
597 | textEncoder: new TextEncoder(),
598 | })
599 | ).toEqual(chunk(flags, 2, { charLengthMask: 0 }));
600 |
601 | expect(
602 | chunk(flags, 999, {
603 | charLengthMask: 0,
604 | charLengthType: 'TextEncoder',
605 | textEncoder: {
606 | encode: () => ({ length: 999 }),
607 | },
608 | })
609 | ).toEqual(chunk(flags, 2, { charLengthMask: 0 }));
610 | });
611 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | import runes from 'runes';
2 |
3 | const assertIsValidText = (text) => {
4 | if (typeof text !== 'string') {
5 | throw new TypeError(
6 | 'Text should be provided as first argument and be a string.'
7 | );
8 | }
9 | };
10 |
11 | const assertIsValidChunkSize = (chunkSize) => {
12 | if (Number.isNaN(chunkSize) || Number.parseInt(chunkSize, 10) <= 0) {
13 | throw new TypeError(
14 | 'Size should be provided as 2nd argument and parseInt to a value greater than zero.'
15 | );
16 | }
17 | };
18 |
19 | const assertIsValidChunkOptions = (chunkOptions) => {
20 | if (
21 | typeof chunkOptions !== 'object' &&
22 | typeof chunkOptions !== 'undefined' &&
23 | chunkOptions !== null &&
24 | chunkOptions !== ''
25 | ) {
26 | throw new TypeError(
27 | 'Options should be provided as 3rd (optional) argument and be an object.\n' +
28 | "Potential chunkOptions object properties include: ['charLengthMask', 'charLengthType', 'textEncoder']"
29 | );
30 | }
31 | };
32 |
33 | const assertIsValidCharLengthMask = (
34 | charLengthMask,
35 | charLengthMaskIntParseIntNaN,
36 | charLengthMaskInt
37 | ) => {
38 | if (charLengthMaskIntParseIntNaN || charLengthMaskInt < -1) {
39 | throw new TypeError(
40 | 'charLengthMask should be provided as a chunkOptions property and parseInt to a value >= -1.'
41 | );
42 | }
43 | };
44 |
45 | const assertIsValidTextEncoder = (textEncoder) => {
46 | if (
47 | typeof textEncoder === 'string' ||
48 | Array.isArray(textEncoder) ||
49 | typeof textEncoder === 'undefined' ||
50 | textEncoder === null
51 | ) {
52 | throw new TypeError(
53 | 'textEncoder should be provided as a chunkOptions property and be an object containing the .encode(text).length property.'
54 | );
55 | }
56 | };
57 |
58 | const assertIsValidCharLengthType = (charLengthType) => {
59 | if (
60 | typeof charLengthType !== 'string' ||
61 | !(charLengthType === 'length' || charLengthType === 'TextEncoder')
62 | ) {
63 | throw new TypeError(
64 | "charLengthType should be provided as a chunkOptions property and be a value in ['length', 'TextEncoder']"
65 | );
66 | }
67 | };
68 |
69 | const chunkLength = (
70 | characters,
71 | charLengthMask,
72 | charLengthType,
73 | textEncoder
74 | ) => {
75 | let length;
76 | if (
77 | typeof characters === 'undefined' ||
78 | characters === null ||
79 | characters === ''
80 | ) {
81 | length = -1;
82 | } else {
83 | let charactersArray;
84 | if (typeof characters === 'string') {
85 | charactersArray = [characters];
86 | } else if (Array.isArray(characters) && characters.length) {
87 | charactersArray = characters;
88 | }
89 |
90 | if (
91 | !Array.isArray(charactersArray) ||
92 | !charactersArray.length ||
93 | charactersArray === null
94 | ) {
95 | length = -1;
96 | } else if (charLengthMask === 0) {
97 | length = charactersArray
98 | .map(
99 | (character) =>
100 | (charLengthType === 'TextEncoder'
101 | ? textEncoder.encode(character)
102 | : character
103 | ).length
104 | )
105 | .reduce((accumulator, currentValue) => accumulator + currentValue);
106 | } else if (charLengthMask > 0) {
107 | const arrayLength = charactersArray
108 | .map(
109 | (character) =>
110 | (charLengthType === 'TextEncoder'
111 | ? textEncoder.encode(character)
112 | : character
113 | ).length
114 | )
115 | .reduce(
116 | (accumulator, currentValue) =>
117 | accumulator +
118 | (currentValue > charLengthMask ? charLengthMask : currentValue)
119 | );
120 | const maxLength = charactersArray.length * charLengthMask;
121 | length = maxLength > arrayLength ? arrayLength : maxLength;
122 | } else {
123 | length = charactersArray.length;
124 | }
125 | }
126 | return length;
127 | };
128 |
129 | const lastSpaceOrLength = (text, upTo) => {
130 | let lastIndex = text.lastIndexOf(' ', upTo);
131 | if (lastIndex === -1) {
132 | lastIndex = upTo;
133 | }
134 | if (lastIndex > text.length || upTo >= text.length) {
135 | lastIndex = text.length;
136 | }
137 | return lastIndex;
138 | };
139 |
140 | const chunkIndexOf = (
141 | characters,
142 | chunkSize,
143 | charLengthMask,
144 | charLengthType,
145 | textEncoder
146 | ) => {
147 | let splitAt = lastSpaceOrLength(characters, chunkSize);
148 |
149 | while (
150 | splitAt > 0 &&
151 | chunkSize <
152 | chunkLength(
153 | characters.slice(0, splitAt),
154 | charLengthMask,
155 | charLengthType,
156 | textEncoder
157 | )
158 | ) {
159 | splitAt = splitAt - 1;
160 | }
161 | splitAt = lastSpaceOrLength(characters, splitAt);
162 | if ((splitAt > -2 && splitAt < 1) || characters[splitAt] === ' ') {
163 | splitAt = splitAt + 1;
164 | }
165 | if (
166 | splitAt > characters.length ||
167 | splitAt < 0 ||
168 | (splitAt === 0 && characters.length === 1)
169 | ) {
170 | splitAt = characters.length;
171 | }
172 | return splitAt;
173 | };
174 |
175 | export default (text, chunkSize, chunkOptions) => {
176 | assertIsValidText(text);
177 | const chunkSizeInt = Number.parseInt(chunkSize, 10);
178 | assertIsValidChunkSize(chunkSizeInt);
179 | assertIsValidChunkOptions(chunkOptions);
180 |
181 | let charLengthMaskInt = -1;
182 | let charLengthMaskIntParseInt = -1;
183 | let charLengthMaskIntParseIntNaN = true;
184 | let textEncoderObject;
185 | if (typeof chunkOptions === 'object') {
186 | if (Object.prototype.hasOwnProperty.call(chunkOptions, 'charLengthMask')) {
187 | charLengthMaskInt = chunkOptions.charLengthMask;
188 | charLengthMaskIntParseInt = Number.parseInt(charLengthMaskInt, 10);
189 | charLengthMaskIntParseIntNaN = Number.isNaN(charLengthMaskIntParseInt);
190 | assertIsValidCharLengthMask(
191 | charLengthMaskInt,
192 | charLengthMaskIntParseIntNaN,
193 | charLengthMaskIntParseInt
194 | );
195 | }
196 | if (Object.prototype.hasOwnProperty.call(chunkOptions, 'charLengthType')) {
197 | assertIsValidCharLengthType(chunkOptions.charLengthType);
198 | if (chunkOptions.charLengthType === 'TextEncoder') {
199 | if (Object.prototype.hasOwnProperty.call(chunkOptions, 'textEncoder')) {
200 | assertIsValidTextEncoder(chunkOptions.textEncoder);
201 | textEncoderObject = chunkOptions.textEncoder;
202 | }
203 | }
204 | }
205 | }
206 | const charLengthMask = charLengthMaskIntParseIntNaN
207 | ? -1
208 | : charLengthMaskIntParseInt;
209 | const charLengthType =
210 | typeof chunkOptions === 'object' && chunkOptions.charLengthType
211 | ? chunkOptions.charLengthType
212 | : 'length';
213 | try {
214 | if (
215 | charLengthType === 'TextEncoder' &&
216 | (typeof textEncoderObject === 'undefined' ||
217 | textEncoderObject === null ||
218 | textEncoderObject === '')
219 | ) {
220 | textEncoderObject = new TextEncoder();
221 | }
222 | } catch (ex) {
223 | throw new ReferenceError(
224 | "TextEncoder is not natively defined, new TextEncoder must be passed in with the 'chunkOptions.textEncoder' property."
225 | );
226 | }
227 | const textEncoder = textEncoderObject;
228 | const chunks = [];
229 | let characters = runes(text);
230 | while (
231 | chunkLength(characters, charLengthMask, charLengthType, textEncoder) > 0
232 | ) {
233 | const splitAt = chunkIndexOf(
234 | characters,
235 | chunkSizeInt,
236 | charLengthMask,
237 | charLengthType,
238 | textEncoder
239 | );
240 | const chunk = characters.slice(0, splitAt).join('').trim();
241 | if (chunk !== '' && chunk !== null) {
242 | chunks.push(chunk);
243 | }
244 | characters = characters.slice(splitAt);
245 | }
246 | return chunks;
247 | };
248 |
--------------------------------------------------------------------------------