├── examples
├── rollup
│ ├── .gitignore
│ ├── index.html
│ ├── rollup.config.js
│ ├── package.json
│ ├── index.js
│ └── package-lock.json
├── thai.txt
├── node.js
└── elixir.ex
├── .gitignore
├── dist
├── index.html
├── break_iterator.wasm
├── break_iterator_cja.wasm
├── bundled.d.ts
├── bundled_cja.d.ts
├── index.d.ts
├── index.js
└── module.js
├── .babelrc
├── src
├── break_iterator.wasm
├── types.d.ts
├── bundled.js
├── bundled.d.ts
├── index.d.ts
└── index.js
├── .babelrc.bundled
├── rollup.config.js
├── filters.json
├── .github
└── workflows
│ ├── test.yml
│ ├── build.yml
│ └── build-emsdk.yml
├── Dockerfile.emsdk
├── Dockerfile.icu
├── Dockerfile
├── rollup.config.bundled.js
├── package.json
├── icu.py
├── break_iterator.c
├── README.md
└── test
└── index.test.js
/examples/rollup/.gitignore:
--------------------------------------------------------------------------------
1 | out.js
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | node_modules/
3 |
--------------------------------------------------------------------------------
/dist/index.html:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/examples/thai.txt:
--------------------------------------------------------------------------------
1 | ยังมีอาสาสมัครน้อยมากเมื่อเทียบกับประชากรที่เข้าถึงอินเทอร์เน็ตได้
2 |
--------------------------------------------------------------------------------
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": ["@babel/preset-env"],
3 | "plugins": ["@babel/plugin-transform-runtime"]
4 | }
5 |
--------------------------------------------------------------------------------
/dist/break_iterator.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/surferseo/intl-segmenter-polyfill/HEAD/dist/break_iterator.wasm
--------------------------------------------------------------------------------
/src/break_iterator.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/surferseo/intl-segmenter-polyfill/HEAD/src/break_iterator.wasm
--------------------------------------------------------------------------------
/examples/rollup/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/dist/break_iterator_cja.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/surferseo/intl-segmenter-polyfill/HEAD/dist/break_iterator_cja.wasm
--------------------------------------------------------------------------------
/src/types.d.ts:
--------------------------------------------------------------------------------
1 | declare module 'break_iterator.wasm' {
2 | const exports: (imports: object) => { instance: WebAssembly.Instance }
3 | export default exports
4 | }
5 |
--------------------------------------------------------------------------------
/.babelrc.bundled:
--------------------------------------------------------------------------------
1 | {
2 | "presets": [
3 | [
4 | "@babel/preset-env",
5 | { "modules": false, "useBuiltIns": "usage", "corejs": 3 }
6 | ]
7 | ]
8 | }
9 |
--------------------------------------------------------------------------------
/src/bundled.js:
--------------------------------------------------------------------------------
1 | import break_iterator from './break_iterator.wasm'
2 | import { createIntlSegmenterPolyfillFromFactory } from './index'
3 |
4 | export const createIntlSegmenterPolyfill = () => {
5 | return createIntlSegmenterPolyfillFromFactory(break_iterator)
6 | }
7 |
--------------------------------------------------------------------------------
/rollup.config.js:
--------------------------------------------------------------------------------
1 | import babel from '@rollup/plugin-babel'
2 |
3 | export default {
4 | input: 'src/index.js',
5 | output: {
6 | file: 'dist/index.js',
7 | format: 'umd',
8 | name: 'IntlSegmenterPolyfill',
9 | },
10 | plugins: [babel({ babelHelpers: 'runtime' })],
11 | }
12 |
--------------------------------------------------------------------------------
/examples/rollup/rollup.config.js:
--------------------------------------------------------------------------------
1 | import commonjs from '@rollup/plugin-commonjs'
2 | import { wasm } from '@rollup/plugin-wasm'
3 |
4 | export default {
5 | input: 'index.js',
6 | output: {
7 | file: 'out.js',
8 | format: 'iife',
9 | },
10 | plugins: [commonjs(), wasm()],
11 | }
12 |
--------------------------------------------------------------------------------
/filters.json:
--------------------------------------------------------------------------------
1 | {
2 | "strategy": "additive",
3 | "featureFilters": {
4 | "brkitr_rules": "include",
5 | "brkitr_tree": "include",
6 | "cnvalias": "include",
7 | "ulayout": "include",
8 | "brkitr_dictionaries": {
9 | "whitelist": ["thaidict"]
10 | }
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | - push
5 | - pull_request
6 |
7 | jobs:
8 | test:
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - uses: actions/checkout@v2
13 | - name: Install modules
14 | run: npm install
15 | - name: Run tests
16 | run: npm run build && npm run test
17 |
--------------------------------------------------------------------------------
/examples/rollup/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "intl-segmenter-polyfill-rollup-example",
3 | "version": "1.0.0",
4 | "description": "",
5 | "scripts": {
6 | "test": "echo \"Error: no test specified\" && exit 1"
7 | },
8 | "author": "",
9 | "license": "ISC",
10 | "devDependencies": {
11 | "@rollup/plugin-commonjs": "^13.0.0",
12 | "@rollup/plugin-wasm": "^5.0.0",
13 | "rollup": "^2.16.1"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/dist/bundled.d.ts:
--------------------------------------------------------------------------------
1 | export declare const createIntlSegmenterPolyfill: () => Promise<{
2 | new (locale: string, options: {
3 | granularity: "word" | "grapheme";
4 | }): {
5 | locale: string;
6 | options: {
7 | granularity: "word" | "grapheme";
8 | };
9 | segment(input: string): {
10 | segment: string;
11 | index: number;
12 | isWordLike: boolean;
13 | breakType: "number" | "none" | "word" | "kana" | "ideo";
14 | }[];
15 | };
16 | }>;
17 |
--------------------------------------------------------------------------------
/src/bundled.d.ts:
--------------------------------------------------------------------------------
1 | export declare const createIntlSegmenterPolyfill: () => Promise<{
2 | new (locale: string, options: {
3 | granularity: "word" | "grapheme";
4 | }): {
5 | locale: string;
6 | options: {
7 | granularity: "word" | "grapheme";
8 | };
9 | segment(input: string): {
10 | segment: string;
11 | index: number;
12 | isWordLike: boolean;
13 | breakType: "number" | "none" | "word" | "kana" | "ideo";
14 | }[];
15 | };
16 | }>;
17 |
--------------------------------------------------------------------------------
/dist/bundled_cja.d.ts:
--------------------------------------------------------------------------------
1 | export declare const createIntlSegmenterPolyfill: () => Promise<{
2 | new (locale: string, options: {
3 | granularity: "word" | "grapheme";
4 | }): {
5 | locale: string;
6 | options: {
7 | granularity: "word" | "grapheme";
8 | };
9 | segment(input: string): {
10 | segment: string;
11 | index: number;
12 | isWordLike: boolean;
13 | breakType: "number" | "none" | "word" | "kana" | "ideo";
14 | }[];
15 | };
16 | }>;
17 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build WASM
2 |
3 | on: push
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - uses: actions/checkout@v2
11 | - name: Build WASM
12 | run: ./build.sh
13 | - name: Install modules
14 | run: npm install
15 | - name: Run tests
16 | run: npm run build && npm run test
17 | - name: Archive production artifacts
18 | uses: actions/upload-artifact@v1
19 | with:
20 | name: break_iterator.wasm
21 | path: dist/break_iterator.wasm
22 |
--------------------------------------------------------------------------------
/.github/workflows/build-emsdk.yml:
--------------------------------------------------------------------------------
1 | name: Publish EMSDK docker to Registry
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | paths: 'Dockerfile.emsdk'
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v2
14 | - name: Publish EMSDK docker to Registry
15 | uses: elgohr/Publish-Docker-Github-Action@master
16 | with:
17 | name: surferseo/emsdk
18 | username: ${{ secrets.DOCKER_USERNAME }}
19 | password: ${{ secrets.DOCKER_PASSWORD }}
20 | dockerfile: Dockerfile.emsdk
21 |
--------------------------------------------------------------------------------
/Dockerfile.emsdk:
--------------------------------------------------------------------------------
1 | FROM debian:buster
2 |
3 | RUN apt-get update && apt-get install -y build-essential git python clang llvm cmake libxml2 wget python-pip python3 python3-pip zip unzip ca-certificates
4 |
5 | RUN mkdir -p /emsdk
6 |
7 | WORKDIR /
8 | RUN git clone https://github.com/emscripten-core/emsdk.git
9 | WORKDIR /emsdk
10 |
11 | RUN git checkout 6b0d151917fe508007d9d76791369ec94c4eb304
12 | RUN ./emsdk install sdk-upstream-master-64bit
13 |
14 | FROM debian:buster
15 |
16 | RUN apt-get update && apt-get install -y python
17 | COPY --from=0 /emsdk /emsdk
18 | WORKDIR /emsdk
19 | RUN ./emsdk activate sdk-upstream-master-64bit
20 |
--------------------------------------------------------------------------------
/Dockerfile.icu:
--------------------------------------------------------------------------------
1 | FROM debian:buster
2 |
3 | RUN apt-get update && apt-get install -y build-essential git python
4 |
5 |
6 | WORKDIR /
7 | RUN git clone https://github.com/unicode-org/icu
8 | WORKDIR /icu/icu4c/source
9 | RUN git checkout bb7b8481bdce7eb8ac40b3dbfd0a567b3c754cd6
10 |
11 | RUN ./runConfigureICU Linux --with-data-packaging=archive
12 | RUN make -j$(nproc)
13 |
14 | COPY ./filters.json /
15 | RUN ICU_DATA_FILTER_FILE=/filters.json ./runConfigureICU Linux --with-data-packaging=archive
16 | RUN cd data && make clean && make
17 | RUN mkdir -p /artifacts
18 | RUN cp data/out/icu* /artifacts
19 |
20 | WORKDIR /artifacts
21 | RUN apt-get update && apt-get install -y xxd
22 | RUN xxd -i icudt67l.dat data.h
23 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM surferseo/emsdk
2 |
3 | RUN apt-get update && apt-get install -y git
4 |
5 | WORKDIR /
6 | RUN git clone https://github.com/unicode-org/icu
7 | RUN cd /icu && git checkout bb7b8481bdce7eb8ac40b3dbfd0a567b3c754cd6
8 | RUN mv /icu/icu4c /icu/icu
9 |
10 | COPY ./build /build
11 | WORKDIR /build
12 |
13 | # for `source /emsdk/emsdk_env.sh` to work
14 | SHELL ["/bin/bash", "-c"]
15 |
16 | RUN cp /build/icu.py /emsdk/emscripten/master/tools/ports
17 | RUN mkdir -p /artifacts
18 | RUN source /emsdk/emsdk_env.sh; EMCC_LOCAL_PORTS="icu=/icu" emcc break_iterator.c -s USE_ICU=1 -o /artifacts/break_iterator.wasm -s EXPORTED_FUNCTIONS='["_main", "_break_iterator", "_utf8_break_iterator", "_malloc", "_free"]' -s ERROR_ON_UNDEFINED_SYMBOLS=0
19 |
--------------------------------------------------------------------------------
/rollup.config.bundled.js:
--------------------------------------------------------------------------------
1 | import babel from '@rollup/plugin-babel'
2 | import wasm from '@rollup/plugin-wasm'
3 |
4 | import resolve from 'rollup-plugin-node-resolve'
5 | import commonjs from 'rollup-plugin-commonjs'
6 |
7 | export default {
8 | input: 'src/bundled.js',
9 | output: {
10 | file: 'dist/bundled.js',
11 | format: 'umd',
12 | name: 'IntlSegmenterPolyfillBundled',
13 | },
14 | plugins: [
15 | wasm(),
16 | babel({
17 | babelrc: false,
18 | babelHelpers: 'bundled',
19 | exclude: 'node_modules/**',
20 | presets: [
21 | [
22 | '@babel/preset-env',
23 | {
24 | corejs: 3,
25 | modules: false,
26 | useBuiltIns: 'usage',
27 | targets: {
28 | ie: '11',
29 | },
30 | },
31 | ],
32 | ],
33 | }),
34 | resolve(),
35 | commonjs(),
36 | ],
37 | }
38 |
--------------------------------------------------------------------------------
/examples/rollup/index.js:
--------------------------------------------------------------------------------
1 | import break_iterator from '../../dist/break_iterator.wasm'
2 | import { createIntlSegmenterPolyfillFromFactory } from '../../dist/index'
3 |
4 | ;(async function () {
5 | const Segmenter = await createIntlSegmenterPolyfillFromFactory(break_iterator)
6 |
7 | const segmenter = new Segmenter('en', { granularity: 'word' })
8 |
9 | const updateSegmentList = (value) => {
10 | const segments = segmenter
11 | .segment(value)
12 | .map(
13 | ({ segment, isWordLike, breakType }) =>
14 | `${segment} – ${breakType} (isWordLike=${isWordLike})`
15 | )
16 | document.querySelector('ul').innerHTML = segments
17 | .map((segment) => `${segment}`)
18 | .join('\n')
19 | }
20 |
21 | document.querySelector('textarea').addEventListener('keyup', (e) => {
22 | updateSegmentList(e.currentTarget.value)
23 | })
24 |
25 | updateSegmentList(document.querySelector('textarea').value)
26 | })()
27 |
--------------------------------------------------------------------------------
/examples/node.js:
--------------------------------------------------------------------------------
1 | const { createIntlSegmenterPolyfill } = require('../dist/index.js')
2 | const fs = require('fs')
3 |
4 | const wasmBuffer = fs.readFileSync('../dist/break_iterator.wasm')
5 | let wasmBinary = new Uint8Array(wasmBuffer)
6 |
7 | ;(async () => {
8 | const Segmenter = await createIntlSegmenterPolyfill(wasmBinary)
9 | const thai = fs.readFileSync('./thai.txt', 'utf-8')
10 |
11 | console.log(
12 | new Segmenter('th', { granularity: 'word' })
13 | .segment(thai)
14 | .filter(({ isWordLike }) => isWordLike),
15 | )
16 |
17 | const wiki = fs.readFileSync('./wikipedia.txt', 'utf-8')
18 |
19 | const hrstart = process.hrtime()
20 |
21 | new Segmenter('en', { granularity: 'word' })
22 | .segment(wiki)
23 | .filter(({ isWordLike }) => isWordLike)
24 | .forEach(({ segment }) => console.log(segment))
25 | // console.log(
26 | // )
27 |
28 | const hrend = process.hrtime(hrstart)
29 | console.info('Execution time (hr): %ds %dms', hrend[0], hrend[1] / 1000000)
30 | })()
31 |
--------------------------------------------------------------------------------
/src/index.d.ts:
--------------------------------------------------------------------------------
1 | import 'fast-text-encoding';
2 | export declare const createIntlSegmenterPolyfillFromFactory: (wasmFactory: (imports: Object) => {
3 | instance: WebAssembly.Instance;
4 | }) => Promise<{
5 | new (locale: string, options: {
6 | granularity: 'word' | 'grapheme';
7 | }): {
8 | locale: string;
9 | options: {
10 | granularity: 'word' | 'grapheme';
11 | };
12 | segment(input: string): {
13 | segment: string;
14 | index: number;
15 | isWordLike: boolean;
16 | breakType: "number" | "none" | "word" | "kana" | "ideo";
17 | }[];
18 | };
19 | }>;
20 | export declare const createIntlSegmenterPolyfill: (wasm: ArrayBufferLike | PromiseLike) => Promise<{
21 | new (locale: string, options: {
22 | granularity: 'word' | 'grapheme';
23 | }): {
24 | locale: string;
25 | options: {
26 | granularity: 'word' | 'grapheme';
27 | };
28 | segment(input: string): {
29 | segment: string;
30 | index: number;
31 | isWordLike: boolean;
32 | breakType: "number" | "none" | "word" | "kana" | "ideo";
33 | }[];
34 | };
35 | }>;
36 |
--------------------------------------------------------------------------------
/dist/index.d.ts:
--------------------------------------------------------------------------------
1 | import 'fast-text-encoding';
2 | export declare const createIntlSegmenterPolyfillFromFactory: (wasmFactory: (imports: Object) => {
3 | instance: WebAssembly.Instance;
4 | }) => Promise<{
5 | new (locale: string, options: {
6 | granularity: 'word' | 'grapheme';
7 | }): {
8 | locale: string;
9 | options: {
10 | granularity: 'word' | 'grapheme';
11 | };
12 | segment(input: string): {
13 | segment: string;
14 | index: number;
15 | isWordLike: boolean;
16 | breakType: "number" | "none" | "word" | "kana" | "ideo";
17 | }[];
18 | };
19 | }>;
20 | export declare const createIntlSegmenterPolyfill: (wasm: ArrayBufferLike | PromiseLike) => Promise<{
21 | new (locale: string, options: {
22 | granularity: 'word' | 'grapheme';
23 | }): {
24 | locale: string;
25 | options: {
26 | granularity: 'word' | 'grapheme';
27 | };
28 | segment(input: string): {
29 | segment: string;
30 | index: number;
31 | isWordLike: boolean;
32 | breakType: "number" | "none" | "word" | "kana" | "ideo";
33 | }[];
34 | };
35 | }>;
36 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "intl-segmenter-polyfill",
3 | "version": "0.4.4",
4 | "description": "This repo builds .wasm module using icu4c for breaking text into words, so that we can polyfill [Intl Segmenter Proposal](https://github.com/tc39/proposal-intl-segmenter) with full compatibility, even on browsers that do not expose v8BreakIterator api.",
5 | "main": "dist/index.js",
6 | "files": [
7 | "dist/",
8 | "src/"
9 | ],
10 | "scripts": {
11 | "prepublish": "npm run build",
12 | "build": "cp src/break_iterator.wasm dist/ && rollup -c rollup.config.js && rollup -c rollup.config.bundled.js",
13 | "test": "jest"
14 | },
15 | "repository": {
16 | "type": "git",
17 | "url": "git+https://github.com/surferseo/intl-segmenter-polyfill.git"
18 | },
19 | "author": "Lucjan Suski",
20 | "license": "ISC",
21 | "bugs": {
22 | "url": "https://github.com/surferseo/intl-segmenter-polyfill/issues"
23 | },
24 | "homepage": "https://github.com/surferseo/intl-segmenter-polyfill#readme",
25 | "devDependencies": {
26 | "@babel/plugin-transform-runtime": "^7.10.1",
27 | "@babel/preset-env": "^7.10.2",
28 | "@rollup/plugin-babel": "^5.0.3",
29 | "@rollup/plugin-wasm": "^5.0.0",
30 | "core-js": "^3.6.5",
31 | "jest": "^26.0.1",
32 | "rollup": "^2.16.1",
33 | "rollup-plugin-commonjs": "^10.1.0",
34 | "rollup-plugin-node-resolve": "^5.2.0"
35 | },
36 | "dependencies": {
37 | "fast-text-encoding": "^1.0.2"
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/icu.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 The Emscripten Authors. All rights reserved.
2 | # Emscripten is available under two separate licenses, the MIT license and the
3 | # University of Illinois/NCSA Open Source License. Both these licenses can be
4 | # found in the LICENSE file.
5 |
6 | import logging
7 | import os
8 | import shutil
9 |
10 | TAG = 'release-67-1'
11 | VERSION = '67_1'
12 | HASH = 'e0c366e097d5cd9840e7c440a87f8f338cc59b1ed7ec527eecbb5671c6c48261b217424a7ee95870915c19b70b1afa2e486100e73acae3515d30bb3872661c11'
13 | SUBDIR = ''
14 |
15 | def get(ports, settings, shared):
16 | if settings.USE_ICU != 1:
17 | return []
18 |
19 | url = 'https://github.com/unicode-org/icu/releases/download/%s/icu4c-%s-src.zip' % (TAG, VERSION)
20 | ports.fetch_project('icu', url, 'icu', sha512hash=HASH)
21 | libname = ports.get_lib_name('libicuuc')
22 |
23 | def create():
24 | logging.info('building port: icu')
25 |
26 | source_path = os.path.join(ports.get_dir(), 'icu', 'icu')
27 | dest_path = os.path.join(shared.Cache.get_path('ports-builds'), 'icu')
28 |
29 | shutil.rmtree(dest_path, ignore_errors=True)
30 | print(source_path)
31 | print(dest_path)
32 | shutil.copytree(source_path, dest_path)
33 |
34 | final = os.path.join(dest_path, libname)
35 | ports.build_port(os.path.join(dest_path, 'source', 'common'), final, [os.path.join(dest_path, 'source', 'common')], ['-DU_COMMON_IMPLEMENTATION=1'])
36 |
37 | ports.install_header_dir(os.path.join(dest_path, 'source', 'common', 'unicode'))
38 | return final
39 |
40 | return [shared.Cache.get(libname, create)]
41 |
42 |
43 | def clear(ports, shared):
44 | shared.Cache.erase_file(ports.get_lib_name('libicuuc'))
45 |
46 |
47 | def process_args(ports, args, settings, shared):
48 | if settings.USE_ICU == 1:
49 | get(ports, settings, shared)
50 | return args
51 |
52 |
53 | def show():
54 | return 'icu (USE_ICU=1; Unicode License)'
55 |
--------------------------------------------------------------------------------
/break_iterator.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #include "data.h"
10 | #include "unicode/utext.h"
11 |
12 | typedef void array_push(int32_t start, int32_t end, int32_t type,
13 | const void* callback_id);
14 |
15 | extern array_push push;
16 |
17 | void utf8_break_iterator(int8_t break_type, const char* locale,
18 | const char* to_break, int32_t to_break_len,
19 | const void* callback_id) {
20 | UErrorCode status = U_ZERO_ERROR;
21 |
22 | udata_setCommonData(icudt67l_dat, &status);
23 |
24 | UBreakIterator* iter;
25 |
26 | UChar string_to_break[to_break_len + 1];
27 | u_uastrcpy(string_to_break, to_break);
28 | iter = ubrk_open(break_type, locale, string_to_break,
29 | u_strlen(string_to_break), &status);
30 |
31 | int32_t end;
32 | int32_t start = ubrk_first(iter);
33 | int32_t n = 0;
34 | for (end = ubrk_next(iter); end != UBRK_DONE;
35 | start = end, end = ubrk_next(iter)) {
36 | push(start, end, ubrk_getRuleStatus(iter), callback_id);
37 | }
38 |
39 | ubrk_close(iter);
40 | }
41 |
42 | // differs from utf8_break_iterator in that it operates on raw bytes
43 | // in case unicode implementation is not compatible (ie. in Elixir)
44 | void break_iterator(int8_t break_type, const char* locale, const char* to_break,
45 | const void* callback_id) {
46 | UErrorCode status = U_ZERO_ERROR;
47 | UText* utext_to_break = NULL;
48 |
49 | udata_setCommonData(icudt67l_dat, &status);
50 |
51 | UBreakIterator* iter;
52 |
53 | utext_to_break = utext_openUTF8(utext_to_break, to_break, -1, &status);
54 |
55 | iter = ubrk_open(break_type, locale, NULL, -1, &status);
56 | ubrk_setUText(iter, utext_to_break, &status);
57 |
58 | int32_t end;
59 | int32_t start = ubrk_first(iter);
60 | int32_t n = 0;
61 | for (end = ubrk_next(iter); end != UBRK_DONE;
62 | start = end, end = ubrk_next(iter)) {
63 | push(start, end, ubrk_getRuleStatus(iter), callback_id);
64 | }
65 |
66 | utext_close(utext_to_break);
67 | ubrk_close(iter);
68 | }
69 |
70 | // for WASI _start function to be generated
71 | int main() { return 0; }
72 |
--------------------------------------------------------------------------------
/examples/elixir.ex:
--------------------------------------------------------------------------------
1 | defmodule WasmexTest do
2 | use GenServer
3 |
4 | def init([]) do
5 | {:ok, bytes} = File.read("break_iterator.wasm")
6 | {:ok, instance} = Wasmex.start_link(%{
7 | bytes: bytes,
8 | imports: %{
9 | wasi_snapshot_preview1: %{
10 | proc_exit: {:fn, [:i32], [], fn _ -> 0 end},
11 | fd_close: {:fn, [:i32], [:i32], fn _ -> 0 end},
12 | environ_sizes_get: {:fn, [:i32, :i32], [:i32], fn _ -> 0 end},
13 | environ_get: {:fn, [:i32, :i32], [:i32], fn _ -> 0 end}
14 | },
15 | env: %{
16 | __sys_stat64: {:fn, [:i32, :i32], [:i32], fn _ -> 0 end},
17 | push: {:fn, [:i32, :i32, :i32], [], fn (%{memory: memory}, slice_start, slice_end, type) ->
18 | pid_binary_length = Wasmex.Memory.get(memory, 100)
19 | pid = Wasmex.Memory.read_string(memory, 101, pid_binary_length) |> Base.decode64! |> :erlang.binary_to_term
20 | send(pid, {:received_value, {slice_start, slice_end, type}})
21 | nil
22 | end },
23 | }
24 | }
25 | })
26 |
27 | {:ok, instance}
28 | end
29 |
30 | def handle_call({:break, locale, string}, _from, instance) do
31 | string = string <> <<0>>
32 | {:ok, memory} = Wasmex.memory(instance, :uint8, 0)
33 |
34 | breaks = Task.async(fn ->
35 | pid = self()
36 | pid_binary = :erlang.term_to_binary(self()) |> Base.encode64
37 |
38 | Wasmex.Memory.write_binary(memory, 0, locale <> <<0>>)
39 | Wasmex.Memory.set(memory, 100, byte_size(pid_binary))
40 | Wasmex.Memory.write_binary(memory, 101, pid_binary)
41 | Wasmex.Memory.write_binary(memory, 10000, string)
42 |
43 | Task.async(fn ->
44 | Wasmex.call_function(instance, "break_iterator", [0, 10000, byte_size(string)])
45 | send(pid, :done)
46 | end)
47 |
48 | receive_all_values()
49 | end) |> Task.await
50 |
51 | reply = breaks |> Enum.map(fn {slice_start, slice_end, type} ->
52 | {slice_start, slice_end, type, string |> binary_part(slice_start, slice_end - slice_start)}
53 | # {string |> String.slice(slice_start, slice_end - slice_start), type}
54 | end)
55 |
56 | {:reply, {reply, memory}, instance}
57 | end
58 |
59 | def receive_all_values(values \\ []) do
60 | receive do
61 | :done -> values |> Enum.reverse
62 | {:received_value, value} -> receive_all_values( [value | values])
63 | end
64 | end
65 | end
66 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | // polyfill TextEncoder and TextDecoder, which is missing on Edge 18
2 | import 'fast-text-encoding'
3 |
4 | const BREAK_TYPES = {
5 | grapheme: 0,
6 | word: 1,
7 | sentence: 3,
8 | }
9 |
10 | const getSegmentType = (type) => {
11 | if (type < 100) {
12 | return 'none'
13 | } else if (type >= 100 && type < 200) {
14 | return 'number'
15 | } else if (type >= 200 && type < 300) {
16 | return 'word'
17 | } else if (type >= 300 && type < 400) {
18 | return 'kana'
19 | } else if (type >= 400 && type < 500) {
20 | return 'ideo'
21 | }
22 | }
23 |
24 | const instantiateWasmModule = (wasm, imports) => {
25 | if (typeof wasm.then === 'function') {
26 | if (WebAssembly.instantiateStreaming != null) {
27 | return wasm.then((response) =>
28 | WebAssembly.instantiateStreaming(response, imports),
29 | )
30 | }
31 |
32 | return wasm
33 | .then((response) => response.arrayBuffer())
34 | .then((buffer) => WebAssembly.instantiate(buffer, imports))
35 | } else {
36 | return WebAssembly.instantiate(wasm, imports)
37 | }
38 | }
39 |
40 | const createIntlSegmenterPolyfillFromInstance = async (
41 | wasmInstance,
42 | values,
43 | ) => {
44 | const allocStr = (str) => {
45 | const encoder = new TextEncoder()
46 | const view = encoder.encode(str + '\0')
47 | // typescript does not play well with webassembly
48 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
49 | const exports = wasmInstance.exports
50 |
51 | const ptr = exports.malloc(view.length)
52 | const memory = new Uint8Array(exports.memory.buffer, ptr, view.length)
53 | memory.set(view)
54 | return [ptr, view]
55 | }
56 |
57 | return class Segmenter {
58 | constructor(locale, options) {
59 | this.locale = locale
60 | this.options = options || {}
61 | }
62 |
63 | segment(input) {
64 | const locale = this.locale
65 | const granularity = this.options.granularity || 'grapheme'
66 | const exports = wasmInstance.exports
67 |
68 | values.current = []
69 | const [inputPtr, inputView] = allocStr(input)
70 | const [localePtr] = allocStr(locale)
71 | exports.utf8_break_iterator(BREAK_TYPES[granularity], localePtr, inputPtr, inputView.length)
72 |
73 | exports.free(localePtr)
74 | exports.free(inputPtr)
75 |
76 | let index = 0
77 |
78 | const segments = values.current.map(([start, end, segmentType]) => {
79 | const segment = input.slice(start, end)
80 | const returnValue = {
81 | segment,
82 | index: index,
83 | isWordLike:
84 | granularity === 'word'
85 | ? getSegmentType(segmentType) !== 'none'
86 | : undefined,
87 | breakType:
88 | granularity === 'word' ? getSegmentType(segmentType) : undefined,
89 | }
90 | index += segment.length
91 | return returnValue
92 | })
93 |
94 | segments.containing = (indexToFind) =>
95 | segments.find(
96 | ({ index, segment }) =>
97 | indexToFind >= index && indexToFind <= index + segment.length - 1,
98 | )
99 |
100 | return segments
101 | }
102 | }
103 | }
104 |
105 | const getImports = (callback) => ({
106 | env: {
107 | push: (start, end, segmentType) => {
108 | callback([start, end, segmentType])
109 | },
110 | __sys_stat64: () => { },
111 | },
112 | wasi_snapshot_preview1: {
113 | proc_exit: () => { },
114 | fd_close: () => { },
115 | environ_sizes_get: () => { },
116 | environ_get: () => { },
117 | },
118 | })
119 |
120 | export const createIntlSegmenterPolyfillFromFactory = async (wasmFactory) => {
121 | let values = { current: [] }
122 | const { instance } = await wasmFactory(
123 | getImports((value) => {
124 | values.current.push(value)
125 | }),
126 | )
127 |
128 | return createIntlSegmenterPolyfillFromInstance(instance, values)
129 | }
130 |
131 | export const createIntlSegmenterPolyfill = async (wasm) => {
132 | let values = { current: [] }
133 |
134 | const { instance } = await instantiateWasmModule(
135 | wasm,
136 | getImports((value) => {
137 | values.current.push(value)
138 | }),
139 | )
140 |
141 | return createIntlSegmenterPolyfillFromInstance(instance, values)
142 | }
143 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Intl Segmenter Polyfill
2 |
3 | [](https://www.npmjs.com/package/intl-segmenter-polyfill)
4 | 
5 | 
6 |
7 | Provides .wasm module built with icu4c for breaking text into words, so that we can polyfill [Intl Segmenter Proposal](https://github.com/tc39/proposal-intl-segmenter) with full compatibility, even on browsers that do not expose v8BreakIterator api.
8 |
9 | **By default it bundles only Thai language dictionary. Modify `filters.json` if you need to support [other exotic languages](https://github.com/unicode-org/icu/tree/master/icu4c/source/data/brkitr/dictionaries).**
10 |
11 | ## Usage
12 |
13 | ```
14 | npm install --save intl-segmenter-polyfill
15 | ```
16 |
17 | ### Web – fetch
18 |
19 | This is the most efficient way as you can lazily load the wasm module only when you need it and use `instantiateStreaming` for the best performance. Serve `break_iterator.wasm` as a static asset with `application/wasm` content-type and you are good to go.
20 |
21 | #### index.js
22 |
23 | ```js
24 | import { createIntlSegmenterPolyfill } from 'intl-segmenter-polyfill'
25 | ;(async function () {
26 | const Segmenter = await createIntlSegmenterPolyfill(
27 | fetch('/path/to/break_iterator.wasm'),
28 | )
29 |
30 | const segmenter = new Segmenter('en', { granularity: 'word' })
31 | const segments = segmenter.segment('foo bar baz')
32 | })()
33 | ```
34 |
35 | ### Web – bundle with base64 encoded module
36 |
37 | This is the simplest way to use the polyfill, at the cost of base64 encoded module – it's ~33% bigger and cannot be loaded on demand.
38 |
39 | #### index.js
40 |
41 | ```js
42 | import { createIntlSegmenterPolyfill } from 'intl-segmenter-polyfill/bundled'
43 | ;(async function () {
44 | const Segmenter = await createIntlSegmenterPolyfill()
45 | const segmenter = new Segmenter('en', { granularity: 'word' })
46 | const segments = segmenter.segment('foo bar baz')
47 | console.log(segments)
48 | })()
49 | ```
50 |
51 | #### OR using plain old
55 |
64 | ```
65 |
66 | ### Web – Rollup / Webpack wasm loader
67 |
68 | @rollup/plugin-wasm and webpack wasm-loader can be used with `createIntlSegmenterPolyfillFromFactory`
69 |
70 | #### rollup.config.js
71 |
72 | ```js
73 | import commonjs from '@rollup/plugin-commonjs'
74 | import { wasm } from '@rollup/plugin-wasm'
75 |
76 | export default {
77 | input: 'index.js',
78 | output: {
79 | file: 'out.js',
80 | format: 'iife',
81 | },
82 | plugins: [commonjs(), wasm()],
83 | }
84 | ```
85 |
86 | #### index.js
87 |
88 | ```js
89 | import { createIntlSegmenterPolyfillFromFactory } from 'intl-segmenter-polyfill'
90 | import break_iterator from 'intl-segmenter-polyfill/break_iterator.wasm'
91 | ;(async function () {
92 | const Segmenter = await createIntlSegmenterPolyfillFromFactory(break_iterator)
93 |
94 | const segmenter = new Segmenter('en', { granularity: 'word' })
95 | const segments = segmenter.segment('foo bar baz')
96 | })()
97 | ```
98 |
99 | ### Node
100 |
101 | ```js
102 | const {createIntlSegmenterPolyfill} = require('intl-segmenter-polyfill')
103 | const fs = require('fs')
104 |
105 | const wasmBuffer = fs.readFileSync('node_modules/intl-segmenter-polyfill/break_iterator.wasm')
106 | let wasmBinary = new Uint8Array(wasmBuffer)
107 |
108 | ;(async () => {
109 | const Segmenter = await createIntlSegmenterPolyfill(wasmBinary);
110 | const segmenter = new Segmenter("en", { granularity: 'word' });
111 | const segments = segmenter.segment("foo bar baz");
112 | )()
113 | ```
114 |
115 | ## Supported browsers
116 |
117 | Besides Chrome, Firefox and Safari with reasonable versions, it polyfills TextEncoder/TextDecoder to support Edge 18 (non-chromium).
118 |
119 | ## Building
120 |
121 | Running `./build.sh` while having docker installed should output `break_iterator.wasm` ready to be used in Node, browsers or Wasmer without a lot of special treatment (see examples above or `examples/`).
122 |
--------------------------------------------------------------------------------
/test/index.test.js:
--------------------------------------------------------------------------------
1 | test("Bundled module", async () => {
2 | const Segmenter =
3 | await require("../dist/bundled.js").createIntlSegmenterPolyfill();
4 | const segments = new Segmenter("en", { granularity: "word" }).segment(
5 | "foo bar"
6 | );
7 | expect(Array.from(segments)).toEqual([
8 | { breakType: "word", index: 0, isWordLike: true, segment: "foo" },
9 | { breakType: "none", index: 3, isWordLike: false, segment: " " },
10 | { breakType: "word", index: 4, isWordLike: true, segment: "bar" },
11 | ]);
12 | });
13 |
14 | test("FS loaded module", async () => {
15 | const fs = require("fs");
16 | const wasmBuffer = fs.readFileSync("./dist/break_iterator.wasm");
17 | const wasmBinary = new Uint8Array(wasmBuffer);
18 |
19 | const Segmenter =
20 | await require("../dist/index.js").createIntlSegmenterPolyfill(wasmBinary);
21 | const segments = new Segmenter("en", { granularity: "word" }).segment(
22 | "foo bar"
23 | );
24 | expect(Array.from(segments)).toEqual([
25 | { breakType: "word", index: 0, isWordLike: true, segment: "foo" },
26 | { breakType: "none", index: 3, isWordLike: false, segment: " " },
27 | { breakType: "word", index: 4, isWordLike: true, segment: "bar" },
28 | ]);
29 | });
30 |
31 | test("segments.containing() direct access", async () => {
32 | const Segmenter =
33 | await require("../dist/bundled.js").createIntlSegmenterPolyfill();
34 | const segments = new Segmenter("en", { granularity: "word" }).segment(
35 | "foo bar"
36 | );
37 | expect(segments.containing(0).segment).toEqual("foo");
38 | expect(segments.containing(1).segment).toEqual("foo");
39 | expect(segments.containing(3).segment).toEqual(" ");
40 | expect(segments.containing(5).segment).toEqual("bar");
41 | expect(segments.containing(8)).toEqual(undefined);
42 | });
43 |
44 | test("segment by grapheme", async () => {
45 | const Segmenter =
46 | await require("../dist/bundled.js").createIntlSegmenterPolyfill();
47 | const segments = new Segmenter("en", { granularity: "grapheme" }).segment(
48 | "foo bar"
49 | );
50 | expect(segments.map(({ segment }) => segment)).toEqual([
51 | "f",
52 | "o",
53 | "o",
54 | " ",
55 | "b",
56 | "a",
57 | "r",
58 | ]);
59 | });
60 |
61 | test("defaults to grapheme segmenting", async () => {
62 | const Segmenter =
63 | await require("../dist/bundled.js").createIntlSegmenterPolyfill();
64 | const segments = new Segmenter("en").segment("foo bar");
65 | expect(segments.map(({ segment }) => segment)).toEqual([
66 | "f",
67 | "o",
68 | "o",
69 | " ",
70 | "b",
71 | "a",
72 | "r",
73 | ]);
74 | });
75 |
76 | test("segment by sentence", async () => {
77 | const Segmenter =
78 | await require("../dist/bundled.js").createIntlSegmenterPolyfill();
79 | const segments = new Segmenter("en", { granularity: "sentence" }).segment(
80 | "Foo bar. Foo bar."
81 | );
82 | expect(segments.map(({ segment }) => segment)).toEqual([
83 | "Foo bar. ",
84 | "Foo bar.",
85 | ]);
86 | });
87 |
88 | test("Segments Thai words", async () => {
89 | const Segmenter =
90 | await require("../dist/bundled.js").createIntlSegmenterPolyfill();
91 | const segments = new Segmenter("en", { granularity: "word" }).segment(
92 | "ยังมีอาสาสมัครน้อยมากเมื่อเทียบกับประชากรที่เข้าถึงอินเทอร์เน็ตได้"
93 | );
94 | expect(segments.map(({ segment }) => segment)).toEqual([
95 | "ยัง",
96 | "มี",
97 | "อาสา",
98 | "สมัคร",
99 | "น้อย",
100 | "มาก",
101 | "เมื่อ",
102 | "เทียบ",
103 | "กับ",
104 | "ประชากร",
105 | "ที่",
106 | "เข้า",
107 | "ถึง",
108 | "อินเทอร์เน็ต",
109 | "ได้",
110 | ]);
111 | });
112 |
113 | test("Segments Japanese words", async () => {
114 | const Segmenter =
115 | await require("../dist/bundled_cja.js").createIntlSegmenterPolyfill();
116 | const segments = new Segmenter("en", { granularity: "word" }).segment(
117 | "チンドン屋は、チンドン太鼓と呼ばれる楽器を鳴らすなどして人目を集め、その地域の商品や店舗などの宣伝を行う日本の請負広告業である。披露目屋・広目屋・東西屋と呼ぶ地域もある。"
118 | );
119 | expect(segments.map(({ segment }) => segment)).toEqual([
120 | "チン",
121 | "ドン",
122 | "屋",
123 | "は",
124 | "、",
125 | "チン",
126 | "ドン",
127 | "太鼓",
128 | "と",
129 | "呼ばれる",
130 | "楽器",
131 | "を",
132 | "鳴らす",
133 | "など",
134 | "し",
135 | "て",
136 | "人目",
137 | "を",
138 | "集め",
139 | "、",
140 | "その",
141 | "地域",
142 | "の",
143 | "商品",
144 | "や",
145 | "店舗",
146 | "など",
147 | "の",
148 | "宣伝",
149 | "を",
150 | "行う",
151 | "日本",
152 | "の",
153 | "請負",
154 | "広告",
155 | "業",
156 | "で",
157 | "ある",
158 | "。",
159 | "披露",
160 | "目",
161 | "屋",
162 | "・",
163 | "広目屋",
164 | "・",
165 | "東西",
166 | "屋",
167 | "と",
168 | "呼ぶ",
169 | "地域",
170 | "も",
171 | "ある",
172 | "。",
173 | ]);
174 | });
175 |
--------------------------------------------------------------------------------
/examples/rollup/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "intl-segmenter-polyfill-rollup-example",
3 | "version": "1.0.0",
4 | "lockfileVersion": 1,
5 | "requires": true,
6 | "dependencies": {
7 | "@rollup/plugin-commonjs": {
8 | "version": "13.0.0",
9 | "resolved": "https://registry.npmjs.org/@rollup/plugin-commonjs/-/plugin-commonjs-13.0.0.tgz",
10 | "integrity": "sha512-Anxc3qgkAi7peAyesTqGYidG5GRim9jtg8xhmykNaZkImtvjA7Wsqep08D2mYsqw1IF7rA3lYfciLgzUSgRoqw==",
11 | "dev": true,
12 | "requires": {
13 | "@rollup/pluginutils": "^3.0.8",
14 | "commondir": "^1.0.1",
15 | "estree-walker": "^1.0.1",
16 | "glob": "^7.1.2",
17 | "is-reference": "^1.1.2",
18 | "magic-string": "^0.25.2",
19 | "resolve": "^1.11.0"
20 | }
21 | },
22 | "@rollup/plugin-wasm": {
23 | "version": "5.0.0",
24 | "resolved": "https://registry.npmjs.org/@rollup/plugin-wasm/-/plugin-wasm-5.0.0.tgz",
25 | "integrity": "sha512-3yVc14qT1hX9Zs8qnieXkzLJL8hpdQyW/fwH3V5/doXDQfQPAHSoPirDLm8Fum4U1JZxnsOnk/6GF5gqKh7dsQ==",
26 | "dev": true
27 | },
28 | "@rollup/pluginutils": {
29 | "version": "3.1.0",
30 | "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-3.1.0.tgz",
31 | "integrity": "sha512-GksZ6pr6TpIjHm8h9lSQ8pi8BE9VeubNT0OMJ3B5uZJ8pz73NPiqOtCog/x2/QzM1ENChPKxMDhiQuRHsqc+lg==",
32 | "dev": true,
33 | "requires": {
34 | "@types/estree": "0.0.39",
35 | "estree-walker": "^1.0.1",
36 | "picomatch": "^2.2.2"
37 | }
38 | },
39 | "@types/estree": {
40 | "version": "0.0.39",
41 | "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.39.tgz",
42 | "integrity": "sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw==",
43 | "dev": true
44 | },
45 | "balanced-match": {
46 | "version": "1.0.0",
47 | "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz",
48 | "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=",
49 | "dev": true
50 | },
51 | "brace-expansion": {
52 | "version": "1.1.11",
53 | "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
54 | "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
55 | "dev": true,
56 | "requires": {
57 | "balanced-match": "^1.0.0",
58 | "concat-map": "0.0.1"
59 | }
60 | },
61 | "commondir": {
62 | "version": "1.0.1",
63 | "resolved": "https://registry.npmjs.org/commondir/-/commondir-1.0.1.tgz",
64 | "integrity": "sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs=",
65 | "dev": true
66 | },
67 | "concat-map": {
68 | "version": "0.0.1",
69 | "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
70 | "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
71 | "dev": true
72 | },
73 | "estree-walker": {
74 | "version": "1.0.1",
75 | "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-1.0.1.tgz",
76 | "integrity": "sha512-1fMXF3YP4pZZVozF8j/ZLfvnR8NSIljt56UhbZ5PeeDmmGHpgpdwQt7ITlGvYaQukCvuBRMLEiKiYC+oeIg4cg==",
77 | "dev": true
78 | },
79 | "fs.realpath": {
80 | "version": "1.0.0",
81 | "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
82 | "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=",
83 | "dev": true
84 | },
85 | "fsevents": {
86 | "version": "2.1.3",
87 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.1.3.tgz",
88 | "integrity": "sha512-Auw9a4AxqWpa9GUfj370BMPzzyncfBABW8Mab7BGWBYDj4Isgq+cDKtx0i6u9jcX9pQDnswsaaOTgTmA5pEjuQ==",
89 | "dev": true,
90 | "optional": true
91 | },
92 | "glob": {
93 | "version": "7.1.6",
94 | "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
95 | "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
96 | "dev": true,
97 | "requires": {
98 | "fs.realpath": "^1.0.0",
99 | "inflight": "^1.0.4",
100 | "inherits": "2",
101 | "minimatch": "^3.0.4",
102 | "once": "^1.3.0",
103 | "path-is-absolute": "^1.0.0"
104 | }
105 | },
106 | "inflight": {
107 | "version": "1.0.6",
108 | "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
109 | "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
110 | "dev": true,
111 | "requires": {
112 | "once": "^1.3.0",
113 | "wrappy": "1"
114 | }
115 | },
116 | "inherits": {
117 | "version": "2.0.4",
118 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
119 | "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
120 | "dev": true
121 | },
122 | "is-reference": {
123 | "version": "1.2.0",
124 | "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-1.2.0.tgz",
125 | "integrity": "sha512-ZVxq+5TkOx6GQdnoMm2aRdCKADdcrOWXLGzGT+vIA8DMpqEJaRk5AL1bS80zJ2bjHunVmjdzfCt0e4BymIEqKQ==",
126 | "dev": true,
127 | "requires": {
128 | "@types/estree": "0.0.44"
129 | },
130 | "dependencies": {
131 | "@types/estree": {
132 | "version": "0.0.44",
133 | "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.44.tgz",
134 | "integrity": "sha512-iaIVzr+w2ZJ5HkidlZ3EJM8VTZb2MJLCjw3V+505yVts0gRC4UMvjw0d1HPtGqI/HQC/KdsYtayfzl+AXY2R8g==",
135 | "dev": true
136 | }
137 | }
138 | },
139 | "magic-string": {
140 | "version": "0.25.7",
141 | "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.25.7.tgz",
142 | "integrity": "sha512-4CrMT5DOHTDk4HYDlzmwu4FVCcIYI8gauveasrdCu2IKIFOJ3f0v/8MDGJCDL9oD2ppz/Av1b0Nj345H9M+XIA==",
143 | "dev": true,
144 | "requires": {
145 | "sourcemap-codec": "^1.4.4"
146 | }
147 | },
148 | "minimatch": {
149 | "version": "3.0.4",
150 | "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
151 | "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
152 | "dev": true,
153 | "requires": {
154 | "brace-expansion": "^1.1.7"
155 | }
156 | },
157 | "once": {
158 | "version": "1.4.0",
159 | "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
160 | "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
161 | "dev": true,
162 | "requires": {
163 | "wrappy": "1"
164 | }
165 | },
166 | "path-is-absolute": {
167 | "version": "1.0.1",
168 | "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
169 | "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=",
170 | "dev": true
171 | },
172 | "path-parse": {
173 | "version": "1.0.6",
174 | "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz",
175 | "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==",
176 | "dev": true
177 | },
178 | "picomatch": {
179 | "version": "2.2.2",
180 | "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.2.tgz",
181 | "integrity": "sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg==",
182 | "dev": true
183 | },
184 | "resolve": {
185 | "version": "1.17.0",
186 | "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.17.0.tgz",
187 | "integrity": "sha512-ic+7JYiV8Vi2yzQGFWOkiZD5Z9z7O2Zhm9XMaTxdJExKasieFCr+yXZ/WmXsckHiKl12ar0y6XiXDx3m4RHn1w==",
188 | "dev": true,
189 | "requires": {
190 | "path-parse": "^1.0.6"
191 | }
192 | },
193 | "rollup": {
194 | "version": "2.16.1",
195 | "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.16.1.tgz",
196 | "integrity": "sha512-UYupMcbFtoWLB6ZtL4hPZNUTlkXjJfGT33Mmhz3hYLNmRj/cOvX2B26ZxDQuEpwtLdcyyyraBGQ7EfzmMJnXXg==",
197 | "dev": true,
198 | "requires": {
199 | "fsevents": "~2.1.2"
200 | }
201 | },
202 | "rollup-plugin-base64": {
203 | "version": "git+https://github.com/gzuidhof/rollup-plugin-base64.git#099d622f58c15f313dc1d8ca3cfe98434bc365ed",
204 | "from": "git+https://github.com/gzuidhof/rollup-plugin-base64.git",
205 | "dev": true,
206 | "requires": {
207 | "@rollup/pluginutils": "^3.1.0"
208 | }
209 | },
210 | "sourcemap-codec": {
211 | "version": "1.4.8",
212 | "resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz",
213 | "integrity": "sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA==",
214 | "dev": true
215 | },
216 | "wrappy": {
217 | "version": "1.0.2",
218 | "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
219 | "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=",
220 | "dev": true
221 | }
222 | }
223 | }
224 |
--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
1 | (function (global, factory) {
2 | typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('@babel/runtime/regenerator'), require('@babel/runtime/helpers/slicedToArray'), require('@babel/runtime/helpers/classCallCheck'), require('@babel/runtime/helpers/createClass'), require('@babel/runtime/helpers/asyncToGenerator'), require('fast-text-encoding')) :
3 | typeof define === 'function' && define.amd ? define(['exports', '@babel/runtime/regenerator', '@babel/runtime/helpers/slicedToArray', '@babel/runtime/helpers/classCallCheck', '@babel/runtime/helpers/createClass', '@babel/runtime/helpers/asyncToGenerator', 'fast-text-encoding'], factory) :
4 | (global = global || self, factory(global.IntlSegmenterPolyfill = {}, global._regeneratorRuntime, global._slicedToArray, global._classCallCheck, global._createClass, global._asyncToGenerator));
5 | }(this, (function (exports, _regeneratorRuntime, _slicedToArray, _classCallCheck, _createClass, _asyncToGenerator) { 'use strict';
6 |
7 | _regeneratorRuntime = _regeneratorRuntime && Object.prototype.hasOwnProperty.call(_regeneratorRuntime, 'default') ? _regeneratorRuntime['default'] : _regeneratorRuntime;
8 | _slicedToArray = _slicedToArray && Object.prototype.hasOwnProperty.call(_slicedToArray, 'default') ? _slicedToArray['default'] : _slicedToArray;
9 | _classCallCheck = _classCallCheck && Object.prototype.hasOwnProperty.call(_classCallCheck, 'default') ? _classCallCheck['default'] : _classCallCheck;
10 | _createClass = _createClass && Object.prototype.hasOwnProperty.call(_createClass, 'default') ? _createClass['default'] : _createClass;
11 | _asyncToGenerator = _asyncToGenerator && Object.prototype.hasOwnProperty.call(_asyncToGenerator, 'default') ? _asyncToGenerator['default'] : _asyncToGenerator;
12 |
13 | var BREAK_TYPES = {
14 | grapheme: 0,
15 | word: 1,
16 | sentence: 3
17 | };
18 |
19 | var getSegmentType = function getSegmentType(type) {
20 | if (type < 100) {
21 | return 'none';
22 | } else if (type >= 100 && type < 200) {
23 | return 'number';
24 | } else if (type >= 200 && type < 300) {
25 | return 'word';
26 | } else if (type >= 300 && type < 400) {
27 | return 'kana';
28 | } else if (type >= 400 && type < 500) {
29 | return 'ideo';
30 | }
31 | };
32 |
33 | var instantiateWasmModule = function instantiateWasmModule(wasm, imports) {
34 | if (typeof wasm.then === 'function') {
35 | if (WebAssembly.instantiateStreaming != null) {
36 | return wasm.then(function (response) {
37 | return WebAssembly.instantiateStreaming(response, imports);
38 | });
39 | }
40 |
41 | return wasm.then(function (response) {
42 | return response.arrayBuffer();
43 | }).then(function (buffer) {
44 | return WebAssembly.instantiate(buffer, imports);
45 | });
46 | } else {
47 | return WebAssembly.instantiate(wasm, imports);
48 | }
49 | };
50 |
51 | var createIntlSegmenterPolyfillFromInstance = /*#__PURE__*/function () {
52 | var _ref = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee(wasmInstance, values) {
53 | var allocStr;
54 | return _regeneratorRuntime.wrap(function _callee$(_context) {
55 | while (1) {
56 | switch (_context.prev = _context.next) {
57 | case 0:
58 | allocStr = function allocStr(str) {
59 | var encoder = new TextEncoder();
60 | var view = encoder.encode(str + '\0'); // typescript does not play well with webassembly
61 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
62 |
63 | var exports = wasmInstance.exports;
64 | var ptr = exports.malloc(view.length);
65 | var memory = new Uint8Array(exports.memory.buffer, ptr, view.length);
66 | memory.set(view);
67 | return [ptr, view];
68 | };
69 |
70 | return _context.abrupt("return", /*#__PURE__*/function () {
71 | function Segmenter(locale, options) {
72 | _classCallCheck(this, Segmenter);
73 |
74 | this.locale = locale;
75 | this.options = options || {};
76 | }
77 |
78 | _createClass(Segmenter, [{
79 | key: "segment",
80 | value: function segment(input) {
81 | var locale = this.locale;
82 | var granularity = this.options.granularity || 'grapheme';
83 | var exports = wasmInstance.exports;
84 | values.current = [];
85 |
86 | var _allocStr = allocStr(input),
87 | _allocStr2 = _slicedToArray(_allocStr, 2),
88 | inputPtr = _allocStr2[0],
89 | inputView = _allocStr2[1];
90 |
91 | var _allocStr3 = allocStr(locale),
92 | _allocStr4 = _slicedToArray(_allocStr3, 1),
93 | localePtr = _allocStr4[0];
94 |
95 | exports.utf8_break_iterator(BREAK_TYPES[granularity], localePtr, inputPtr, inputView.length);
96 | exports.free(localePtr);
97 | exports.free(inputPtr);
98 | var index = 0;
99 | var segments = values.current.map(function (_ref2) {
100 | var _ref3 = _slicedToArray(_ref2, 3),
101 | start = _ref3[0],
102 | end = _ref3[1],
103 | segmentType = _ref3[2];
104 |
105 | var segment = input.slice(start, end);
106 | var returnValue = {
107 | segment: segment,
108 | index: index,
109 | isWordLike: granularity === 'word' ? getSegmentType(segmentType) !== 'none' : undefined,
110 | breakType: granularity === 'word' ? getSegmentType(segmentType) : undefined
111 | };
112 | index += segment.length;
113 | return returnValue;
114 | });
115 |
116 | segments.containing = function (indexToFind) {
117 | return segments.find(function (_ref4) {
118 | var index = _ref4.index,
119 | segment = _ref4.segment;
120 | return indexToFind >= index && indexToFind <= index + segment.length - 1;
121 | });
122 | };
123 |
124 | return segments;
125 | }
126 | }]);
127 |
128 | return Segmenter;
129 | }());
130 |
131 | case 2:
132 | case "end":
133 | return _context.stop();
134 | }
135 | }
136 | }, _callee);
137 | }));
138 |
139 | return function createIntlSegmenterPolyfillFromInstance(_x, _x2) {
140 | return _ref.apply(this, arguments);
141 | };
142 | }();
143 |
144 | var getImports = function getImports(callback) {
145 | return {
146 | env: {
147 | push: function push(start, end, segmentType) {
148 | callback([start, end, segmentType]);
149 | },
150 | __sys_stat64: function __sys_stat64() {}
151 | },
152 | wasi_snapshot_preview1: {
153 | proc_exit: function proc_exit() {},
154 | fd_close: function fd_close() {},
155 | environ_sizes_get: function environ_sizes_get() {},
156 | environ_get: function environ_get() {}
157 | }
158 | };
159 | };
160 |
161 | var createIntlSegmenterPolyfillFromFactory = /*#__PURE__*/function () {
162 | var _ref5 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee2(wasmFactory) {
163 | var values, _yield$wasmFactory, instance;
164 |
165 | return _regeneratorRuntime.wrap(function _callee2$(_context2) {
166 | while (1) {
167 | switch (_context2.prev = _context2.next) {
168 | case 0:
169 | values = {
170 | current: []
171 | };
172 | _context2.next = 3;
173 | return wasmFactory(getImports(function (value) {
174 | values.current.push(value);
175 | }));
176 |
177 | case 3:
178 | _yield$wasmFactory = _context2.sent;
179 | instance = _yield$wasmFactory.instance;
180 | return _context2.abrupt("return", createIntlSegmenterPolyfillFromInstance(instance, values));
181 |
182 | case 6:
183 | case "end":
184 | return _context2.stop();
185 | }
186 | }
187 | }, _callee2);
188 | }));
189 |
190 | return function createIntlSegmenterPolyfillFromFactory(_x3) {
191 | return _ref5.apply(this, arguments);
192 | };
193 | }();
194 | var createIntlSegmenterPolyfill = /*#__PURE__*/function () {
195 | var _ref6 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee3(wasm) {
196 | var values, _yield$instantiateWas, instance;
197 |
198 | return _regeneratorRuntime.wrap(function _callee3$(_context3) {
199 | while (1) {
200 | switch (_context3.prev = _context3.next) {
201 | case 0:
202 | values = {
203 | current: []
204 | };
205 | _context3.next = 3;
206 | return instantiateWasmModule(wasm, getImports(function (value) {
207 | values.current.push(value);
208 | }));
209 |
210 | case 3:
211 | _yield$instantiateWas = _context3.sent;
212 | instance = _yield$instantiateWas.instance;
213 | return _context3.abrupt("return", createIntlSegmenterPolyfillFromInstance(instance, values));
214 |
215 | case 6:
216 | case "end":
217 | return _context3.stop();
218 | }
219 | }
220 | }, _callee3);
221 | }));
222 |
223 | return function createIntlSegmenterPolyfill(_x4) {
224 | return _ref6.apply(this, arguments);
225 | };
226 | }();
227 |
228 | exports.createIntlSegmenterPolyfill = createIntlSegmenterPolyfill;
229 | exports.createIntlSegmenterPolyfillFromFactory = createIntlSegmenterPolyfillFromFactory;
230 |
231 | Object.defineProperty(exports, '__esModule', { value: true });
232 |
233 | })));
234 |
--------------------------------------------------------------------------------
/dist/module.js:
--------------------------------------------------------------------------------
1 |
2 | function _loadWasmModule (sync, src, imports) {
3 | var buf = null
4 | var isNode = typeof process !== 'undefined' && process.versions != null && process.versions.node != null
5 | if (isNode) {
6 | buf = Buffer.from(src, 'base64')
7 | } else {
8 | var raw = globalThis.atob(src)
9 | var rawLength = raw.length
10 | buf = new Uint8Array(new ArrayBuffer(rawLength))
11 | for(var i = 0; i < rawLength; i++) {
12 | buf[i] = raw.charCodeAt(i)
13 | }
14 | }
15 |
16 | if (imports && !sync) {
17 | return WebAssembly.instantiate(buf, imports)
18 | } else if (!imports && !sync) {
19 | return WebAssembly.compile(buf)
20 | } else {
21 | var mod = new WebAssembly.Module(buf)
22 | return imports ? new WebAssembly.Instance(mod, imports) : mod
23 | }
24 | }
25 | (function (global, factory) {
26 | typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory(require('break_iterator.wasm')) :
27 | typeof define === 'function' && define.amd ? define(['break_iterator.wasm'], factory) :
28 | (global = global || self, global.IntlSegmenterPolyfill = factory(global.break_iterator));
29 | }(this, (function (break_iterator) { 'use strict';
30 |
31 | break_iterator = break_iterator && Object.prototype.hasOwnProperty.call(break_iterator, 'default') ? break_iterator['default'] : break_iterator;
32 |
33 | /*! *****************************************************************************
34 | Copyright (c) Microsoft Corporation.
35 |
36 | Permission to use, copy, modify, and/or distribute this software for any
37 | purpose with or without fee is hereby granted.
38 |
39 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
40 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
41 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
42 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
43 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
44 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
45 | PERFORMANCE OF THIS SOFTWARE.
46 | ***************************************************************************** */
47 |
48 | function __awaiter(thisArg, _arguments, P, generator) {
49 | function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
50 | return new (P || (P = Promise))(function (resolve, reject) {
51 | function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
52 | function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
53 | function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
54 | step((generator = generator.apply(thisArg, _arguments || [])).next());
55 | });
56 | }
57 |
58 | function __generator(thisArg, body) {
59 | var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
60 | return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
61 | function verb(n) { return function (v) { return step([n, v]); }; }
62 | function step(op) {
63 | if (f) throw new TypeError("Generator is already executing.");
64 | while (_) try {
65 | if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
66 | if (y = 0, t) op = [op[0] & 2, t.value];
67 | switch (op[0]) {
68 | case 0: case 1: t = op; break;
69 | case 4: _.label++; return { value: op[1], done: false };
70 | case 5: _.label++; y = op[1]; op = [0]; continue;
71 | case 7: op = _.ops.pop(); _.trys.pop(); continue;
72 | default:
73 | if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
74 | if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
75 | if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
76 | if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
77 | if (t[2]) _.ops.pop();
78 | _.trys.pop(); continue;
79 | }
80 | op = body.call(thisArg, _);
81 | } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
82 | if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
83 | }
84 | }
85 |
86 | (function(l){function m(){}function k(c,a){c=void 0===c?"utf-8":c;a=void 0===a?{fatal:!1}:a;if(-1===n.indexOf(c.toLowerCase()))throw new RangeError("Failed to construct 'TextDecoder': The encoding label provided ('"+c+"') is invalid.");if(a.fatal)throw Error("Failed to construct 'TextDecoder': the 'fatal' option is unsupported.");}if(l.TextEncoder&&l.TextDecoder)return !1;var n=["utf-8","utf8","unicode-1-1-utf-8"];Object.defineProperty(m.prototype,"encoding",{value:"utf-8"});m.prototype.encode=function(c,
87 | a){a=void 0===a?{stream:!1}:a;if(a.stream)throw Error("Failed to encode: the 'stream' option is unsupported.");a=0;for(var g=c.length,f=0,b=Math.max(32,g+(g>>1)+7),e=new Uint8Array(b>>3<<3);a=d){if(a=d)continue}f+4>e.length&&(b+=8,b*=1+a/c.length*2,b=b>>3<<3,h=new Uint8Array(b),h.set(e),e=h);if(0===(d&4294967168))e[f++]=d;else {if(0===(d&4294965248))e[f++]=
88 | d>>6&31|192;else if(0===(d&4294901760))e[f++]=d>>12&15|224,e[f++]=d>>6&63|128;else if(0===(d&4292870144))e[f++]=d>>18&7|240,e[f++]=d>>12&63|128,e[f++]=d>>6&63|128;else continue;e[f++]=d&63|128;}}return e.slice?e.slice(0,f):e.subarray(0,f)};Object.defineProperty(k.prototype,"encoding",{value:"utf-8"});Object.defineProperty(k.prototype,"fatal",{value:!1});Object.defineProperty(k.prototype,"ignoreBOM",{value:!1});k.prototype.decode=function(c,a){a=void 0===a?{stream:!1}:a;if(a.stream)throw Error("Failed to decode: the 'stream' option is unsupported.");
89 | a=c;!(a instanceof Uint8Array)&&a.buffer instanceof ArrayBuffer&&(a=new Uint8Array(c.buffer));c=0;for(var g=[],f=[];;){var b=c>>10&1023|55296),b=56320|b&1023);g.push(b);}}};l.TextEncoder=m;l.TextDecoder=k;})("undefined"!==typeof window?window:"undefined"!==typeof global?global:undefined);
91 |
92 | var BREAK_TYPES = {
93 | grapheme: 0,
94 | word: 1,
95 | sentence: 3
96 | };
97 | var getSegmentType = function (type) {
98 | if (type < 100) {
99 | return 'none';
100 | }
101 | else if (type >= 100 && type < 200) {
102 | return 'number';
103 | }
104 | else if (type >= 200 && type < 300) {
105 | return 'word';
106 | }
107 | else if (type >= 300 && type < 400) {
108 | return 'kana';
109 | }
110 | else if (type >= 400 && type < 500) {
111 | return 'ideo';
112 | }
113 | };
114 | var createIntlSegmenterPolyfillFromInstance = function (wasmInstance, values) { return __awaiter(void 0, void 0, void 0, function () {
115 | var allocStr;
116 | return __generator(this, function (_a) {
117 | allocStr = function (str) {
118 | var encoder = new TextEncoder();
119 | var view = encoder.encode(str + '\0');
120 | // typescript does not play well with webassembly
121 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
122 | var exports = wasmInstance.exports;
123 | var ptr = exports.malloc(view.length);
124 | var memory = new Uint8Array(exports.memory.buffer, ptr, view.length);
125 | memory.set(view);
126 | return [ptr, view];
127 | };
128 | return [2 /*return*/, /** @class */ (function () {
129 | function Segmenter(locale, options) {
130 | this.locale = locale;
131 | this.options = options;
132 | }
133 | Segmenter.prototype.segment = function (input) {
134 | var locale = this.locale;
135 | var granularity = this.options.granularity;
136 | var exports = wasmInstance.exports;
137 | values.current = [];
138 | var _a = allocStr(input), inputPtr = _a[0], inputView = _a[1];
139 | var localePtr = allocStr(locale)[0];
140 | exports.break_iterator(BREAK_TYPES[granularity], localePtr, inputPtr);
141 | exports.free(localePtr);
142 | exports.free(inputPtr);
143 | var decoder = new TextDecoder();
144 | return values.current.map(function (_a) {
145 | var start = _a[0], end = _a[1], segmentType = _a[2];
146 | return ({
147 | segment: decoder.decode(inputView.slice(start, end)),
148 | index: decoder.decode(inputView.slice(0, start)).length,
149 | isWordLike: granularity === 'word'
150 | ? getSegmentType(segmentType) !== 'none'
151 | : undefined,
152 | breakType: granularity === 'word' ? getSegmentType(segmentType) : undefined
153 | });
154 | });
155 | };
156 | return Segmenter;
157 | }())];
158 | });
159 | }); };
160 | var getImports = function (callback) { return ({
161 | env: {
162 | push: function (start, end, segmentType) {
163 | callback([start, end, segmentType]);
164 | },
165 | __sys_stat64: function () { }
166 | },
167 | wasi_snapshot_preview1: {
168 | proc_exit: function () { },
169 | fd_close: function () { },
170 | environ_sizes_get: function () { },
171 | environ_get: function () { }
172 | }
173 | }); };
174 | var createIntlSegmenterPolyfillFromFactory = function (wasmFactory) { return __awaiter(void 0, void 0, void 0, function () {
175 | var values, instance;
176 | return __generator(this, function (_a) {
177 | switch (_a.label) {
178 | case 0:
179 | values = { current: [] };
180 | return [4 /*yield*/, wasmFactory(getImports(function (value) {
181 | console.log(value);
182 | values.current.push(value);
183 | }))];
184 | case 1:
185 | instance = (_a.sent()).instance;
186 | return [2 /*return*/, createIntlSegmenterPolyfillFromInstance(instance, values)];
187 | }
188 | });
189 | }); };
190 |
191 | var createIntlSegmenterPolyfill = function () {
192 | return createIntlSegmenterPolyfillFromFactory(break_iterator);
193 | };
194 |
195 | return createIntlSegmenterPolyfill;
196 |
197 | })));
198 |
--------------------------------------------------------------------------------