├── examples ├── rollup │ ├── .gitignore │ ├── index.html │ ├── rollup.config.js │ ├── package.json │ ├── index.js │ └── package-lock.json ├── thai.txt ├── node.js └── elixir.ex ├── .gitignore ├── dist ├── index.html ├── break_iterator.wasm ├── break_iterator_cja.wasm ├── bundled.d.ts ├── bundled_cja.d.ts ├── index.d.ts ├── index.js └── module.js ├── .babelrc ├── src ├── break_iterator.wasm ├── types.d.ts ├── bundled.js ├── bundled.d.ts ├── index.d.ts └── index.js ├── .babelrc.bundled ├── rollup.config.js ├── filters.json ├── .github └── workflows │ ├── test.yml │ ├── build.yml │ └── build-emsdk.yml ├── Dockerfile.emsdk ├── Dockerfile.icu ├── Dockerfile ├── rollup.config.bundled.js ├── package.json ├── icu.py ├── break_iterator.c ├── README.md └── test └── index.test.js /examples/rollup/.gitignore: -------------------------------------------------------------------------------- 1 | out.js 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | node_modules/ 3 | -------------------------------------------------------------------------------- /dist/index.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/thai.txt: -------------------------------------------------------------------------------- 1 | ยังมีอาสาสมัครน้อยมากเมื่อเทียบกับประชากรที่เข้าถึงอินเทอร์เน็ตได้ 2 | -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["@babel/preset-env"], 3 | "plugins": ["@babel/plugin-transform-runtime"] 4 | } 5 | -------------------------------------------------------------------------------- /dist/break_iterator.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/surferseo/intl-segmenter-polyfill/HEAD/dist/break_iterator.wasm -------------------------------------------------------------------------------- /src/break_iterator.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/surferseo/intl-segmenter-polyfill/HEAD/src/break_iterator.wasm -------------------------------------------------------------------------------- /examples/rollup/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /dist/break_iterator_cja.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/surferseo/intl-segmenter-polyfill/HEAD/dist/break_iterator_cja.wasm -------------------------------------------------------------------------------- /src/types.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'break_iterator.wasm' { 2 | const exports: (imports: object) => { instance: WebAssembly.Instance } 3 | export default exports 4 | } 5 | -------------------------------------------------------------------------------- /.babelrc.bundled: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | [ 4 | "@babel/preset-env", 5 | { "modules": false, "useBuiltIns": "usage", "corejs": 3 } 6 | ] 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /src/bundled.js: -------------------------------------------------------------------------------- 1 | import break_iterator from './break_iterator.wasm' 2 | import { createIntlSegmenterPolyfillFromFactory } from './index' 3 | 4 | export const createIntlSegmenterPolyfill = () => { 5 | return createIntlSegmenterPolyfillFromFactory(break_iterator) 6 | } 7 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import babel from '@rollup/plugin-babel' 2 | 3 | export default { 4 | input: 'src/index.js', 5 | output: { 6 | file: 'dist/index.js', 7 | format: 'umd', 8 | name: 'IntlSegmenterPolyfill', 9 | }, 10 | plugins: [babel({ babelHelpers: 'runtime' })], 11 | } 12 | -------------------------------------------------------------------------------- /examples/rollup/rollup.config.js: -------------------------------------------------------------------------------- 1 | import commonjs from '@rollup/plugin-commonjs' 2 | import { wasm } from '@rollup/plugin-wasm' 3 | 4 | export default { 5 | input: 'index.js', 6 | output: { 7 | file: 'out.js', 8 | format: 'iife', 9 | }, 10 | plugins: [commonjs(), wasm()], 11 | } 12 | -------------------------------------------------------------------------------- /filters.json: -------------------------------------------------------------------------------- 1 | { 2 | "strategy": "additive", 3 | "featureFilters": { 4 | "brkitr_rules": "include", 5 | "brkitr_tree": "include", 6 | "cnvalias": "include", 7 | "ulayout": "include", 8 | "brkitr_dictionaries": { 9 | "whitelist": ["thaidict"] 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Install modules 14 | run: npm install 15 | - name: Run tests 16 | run: npm run build && npm run test 17 | -------------------------------------------------------------------------------- /examples/rollup/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "intl-segmenter-polyfill-rollup-example", 3 | "version": "1.0.0", 4 | "description": "", 5 | "scripts": { 6 | "test": "echo \"Error: no test specified\" && exit 1" 7 | }, 8 | "author": "", 9 | "license": "ISC", 10 | "devDependencies": { 11 | "@rollup/plugin-commonjs": "^13.0.0", 12 | "@rollup/plugin-wasm": "^5.0.0", 13 | "rollup": "^2.16.1" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /dist/bundled.d.ts: -------------------------------------------------------------------------------- 1 | export declare const createIntlSegmenterPolyfill: () => Promise<{ 2 | new (locale: string, options: { 3 | granularity: "word" | "grapheme"; 4 | }): { 5 | locale: string; 6 | options: { 7 | granularity: "word" | "grapheme"; 8 | }; 9 | segment(input: string): { 10 | segment: string; 11 | index: number; 12 | isWordLike: boolean; 13 | breakType: "number" | "none" | "word" | "kana" | "ideo"; 14 | }[]; 15 | }; 16 | }>; 17 | -------------------------------------------------------------------------------- /src/bundled.d.ts: -------------------------------------------------------------------------------- 1 | export declare const createIntlSegmenterPolyfill: () => Promise<{ 2 | new (locale: string, options: { 3 | granularity: "word" | "grapheme"; 4 | }): { 5 | locale: string; 6 | options: { 7 | granularity: "word" | "grapheme"; 8 | }; 9 | segment(input: string): { 10 | segment: string; 11 | index: number; 12 | isWordLike: boolean; 13 | breakType: "number" | "none" | "word" | "kana" | "ideo"; 14 | }[]; 15 | }; 16 | }>; 17 | -------------------------------------------------------------------------------- /dist/bundled_cja.d.ts: -------------------------------------------------------------------------------- 1 | export declare const createIntlSegmenterPolyfill: () => Promise<{ 2 | new (locale: string, options: { 3 | granularity: "word" | "grapheme"; 4 | }): { 5 | locale: string; 6 | options: { 7 | granularity: "word" | "grapheme"; 8 | }; 9 | segment(input: string): { 10 | segment: string; 11 | index: number; 12 | isWordLike: boolean; 13 | breakType: "number" | "none" | "word" | "kana" | "ideo"; 14 | }[]; 15 | }; 16 | }>; 17 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build WASM 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | - name: Build WASM 12 | run: ./build.sh 13 | - name: Install modules 14 | run: npm install 15 | - name: Run tests 16 | run: npm run build && npm run test 17 | - name: Archive production artifacts 18 | uses: actions/upload-artifact@v1 19 | with: 20 | name: break_iterator.wasm 21 | path: dist/break_iterator.wasm 22 | -------------------------------------------------------------------------------- /.github/workflows/build-emsdk.yml: -------------------------------------------------------------------------------- 1 | name: Publish EMSDK docker to Registry 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | paths: 'Dockerfile.emsdk' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Publish EMSDK docker to Registry 15 | uses: elgohr/Publish-Docker-Github-Action@master 16 | with: 17 | name: surferseo/emsdk 18 | username: ${{ secrets.DOCKER_USERNAME }} 19 | password: ${{ secrets.DOCKER_PASSWORD }} 20 | dockerfile: Dockerfile.emsdk 21 | -------------------------------------------------------------------------------- /Dockerfile.emsdk: -------------------------------------------------------------------------------- 1 | FROM debian:buster 2 | 3 | RUN apt-get update && apt-get install -y build-essential git python clang llvm cmake libxml2 wget python-pip python3 python3-pip zip unzip ca-certificates 4 | 5 | RUN mkdir -p /emsdk 6 | 7 | WORKDIR / 8 | RUN git clone https://github.com/emscripten-core/emsdk.git 9 | WORKDIR /emsdk 10 | 11 | RUN git checkout 6b0d151917fe508007d9d76791369ec94c4eb304 12 | RUN ./emsdk install sdk-upstream-master-64bit 13 | 14 | FROM debian:buster 15 | 16 | RUN apt-get update && apt-get install -y python 17 | COPY --from=0 /emsdk /emsdk 18 | WORKDIR /emsdk 19 | RUN ./emsdk activate sdk-upstream-master-64bit 20 | -------------------------------------------------------------------------------- /Dockerfile.icu: -------------------------------------------------------------------------------- 1 | FROM debian:buster 2 | 3 | RUN apt-get update && apt-get install -y build-essential git python 4 | 5 | 6 | WORKDIR / 7 | RUN git clone https://github.com/unicode-org/icu 8 | WORKDIR /icu/icu4c/source 9 | RUN git checkout bb7b8481bdce7eb8ac40b3dbfd0a567b3c754cd6 10 | 11 | RUN ./runConfigureICU Linux --with-data-packaging=archive 12 | RUN make -j$(nproc) 13 | 14 | COPY ./filters.json / 15 | RUN ICU_DATA_FILTER_FILE=/filters.json ./runConfigureICU Linux --with-data-packaging=archive 16 | RUN cd data && make clean && make 17 | RUN mkdir -p /artifacts 18 | RUN cp data/out/icu* /artifacts 19 | 20 | WORKDIR /artifacts 21 | RUN apt-get update && apt-get install -y xxd 22 | RUN xxd -i icudt67l.dat data.h 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM surferseo/emsdk 2 | 3 | RUN apt-get update && apt-get install -y git 4 | 5 | WORKDIR / 6 | RUN git clone https://github.com/unicode-org/icu 7 | RUN cd /icu && git checkout bb7b8481bdce7eb8ac40b3dbfd0a567b3c754cd6 8 | RUN mv /icu/icu4c /icu/icu 9 | 10 | COPY ./build /build 11 | WORKDIR /build 12 | 13 | # for `source /emsdk/emsdk_env.sh` to work 14 | SHELL ["/bin/bash", "-c"] 15 | 16 | RUN cp /build/icu.py /emsdk/emscripten/master/tools/ports 17 | RUN mkdir -p /artifacts 18 | RUN source /emsdk/emsdk_env.sh; EMCC_LOCAL_PORTS="icu=/icu" emcc break_iterator.c -s USE_ICU=1 -o /artifacts/break_iterator.wasm -s EXPORTED_FUNCTIONS='["_main", "_break_iterator", "_utf8_break_iterator", "_malloc", "_free"]' -s ERROR_ON_UNDEFINED_SYMBOLS=0 19 | -------------------------------------------------------------------------------- /rollup.config.bundled.js: -------------------------------------------------------------------------------- 1 | import babel from '@rollup/plugin-babel' 2 | import wasm from '@rollup/plugin-wasm' 3 | 4 | import resolve from 'rollup-plugin-node-resolve' 5 | import commonjs from 'rollup-plugin-commonjs' 6 | 7 | export default { 8 | input: 'src/bundled.js', 9 | output: { 10 | file: 'dist/bundled.js', 11 | format: 'umd', 12 | name: 'IntlSegmenterPolyfillBundled', 13 | }, 14 | plugins: [ 15 | wasm(), 16 | babel({ 17 | babelrc: false, 18 | babelHelpers: 'bundled', 19 | exclude: 'node_modules/**', 20 | presets: [ 21 | [ 22 | '@babel/preset-env', 23 | { 24 | corejs: 3, 25 | modules: false, 26 | useBuiltIns: 'usage', 27 | targets: { 28 | ie: '11', 29 | }, 30 | }, 31 | ], 32 | ], 33 | }), 34 | resolve(), 35 | commonjs(), 36 | ], 37 | } 38 | -------------------------------------------------------------------------------- /examples/rollup/index.js: -------------------------------------------------------------------------------- 1 | import break_iterator from '../../dist/break_iterator.wasm' 2 | import { createIntlSegmenterPolyfillFromFactory } from '../../dist/index' 3 | 4 | ;(async function () { 5 | const Segmenter = await createIntlSegmenterPolyfillFromFactory(break_iterator) 6 | 7 | const segmenter = new Segmenter('en', { granularity: 'word' }) 8 | 9 | const updateSegmentList = (value) => { 10 | const segments = segmenter 11 | .segment(value) 12 | .map( 13 | ({ segment, isWordLike, breakType }) => 14 | `${segment} – ${breakType} (isWordLike=${isWordLike})` 15 | ) 16 | document.querySelector('ul').innerHTML = segments 17 | .map((segment) => `
  • ${segment}
  • `) 18 | .join('\n') 19 | } 20 | 21 | document.querySelector('textarea').addEventListener('keyup', (e) => { 22 | updateSegmentList(e.currentTarget.value) 23 | }) 24 | 25 | updateSegmentList(document.querySelector('textarea').value) 26 | })() 27 | -------------------------------------------------------------------------------- /examples/node.js: -------------------------------------------------------------------------------- 1 | const { createIntlSegmenterPolyfill } = require('../dist/index.js') 2 | const fs = require('fs') 3 | 4 | const wasmBuffer = fs.readFileSync('../dist/break_iterator.wasm') 5 | let wasmBinary = new Uint8Array(wasmBuffer) 6 | 7 | ;(async () => { 8 | const Segmenter = await createIntlSegmenterPolyfill(wasmBinary) 9 | const thai = fs.readFileSync('./thai.txt', 'utf-8') 10 | 11 | console.log( 12 | new Segmenter('th', { granularity: 'word' }) 13 | .segment(thai) 14 | .filter(({ isWordLike }) => isWordLike), 15 | ) 16 | 17 | const wiki = fs.readFileSync('./wikipedia.txt', 'utf-8') 18 | 19 | const hrstart = process.hrtime() 20 | 21 | new Segmenter('en', { granularity: 'word' }) 22 | .segment(wiki) 23 | .filter(({ isWordLike }) => isWordLike) 24 | .forEach(({ segment }) => console.log(segment)) 25 | // console.log( 26 | // ) 27 | 28 | const hrend = process.hrtime(hrstart) 29 | console.info('Execution time (hr): %ds %dms', hrend[0], hrend[1] / 1000000) 30 | })() 31 | -------------------------------------------------------------------------------- /src/index.d.ts: -------------------------------------------------------------------------------- 1 | import 'fast-text-encoding'; 2 | export declare const createIntlSegmenterPolyfillFromFactory: (wasmFactory: (imports: Object) => { 3 | instance: WebAssembly.Instance; 4 | }) => Promise<{ 5 | new (locale: string, options: { 6 | granularity: 'word' | 'grapheme'; 7 | }): { 8 | locale: string; 9 | options: { 10 | granularity: 'word' | 'grapheme'; 11 | }; 12 | segment(input: string): { 13 | segment: string; 14 | index: number; 15 | isWordLike: boolean; 16 | breakType: "number" | "none" | "word" | "kana" | "ideo"; 17 | }[]; 18 | }; 19 | }>; 20 | export declare const createIntlSegmenterPolyfill: (wasm: ArrayBufferLike | PromiseLike) => Promise<{ 21 | new (locale: string, options: { 22 | granularity: 'word' | 'grapheme'; 23 | }): { 24 | locale: string; 25 | options: { 26 | granularity: 'word' | 'grapheme'; 27 | }; 28 | segment(input: string): { 29 | segment: string; 30 | index: number; 31 | isWordLike: boolean; 32 | breakType: "number" | "none" | "word" | "kana" | "ideo"; 33 | }[]; 34 | }; 35 | }>; 36 | -------------------------------------------------------------------------------- /dist/index.d.ts: -------------------------------------------------------------------------------- 1 | import 'fast-text-encoding'; 2 | export declare const createIntlSegmenterPolyfillFromFactory: (wasmFactory: (imports: Object) => { 3 | instance: WebAssembly.Instance; 4 | }) => Promise<{ 5 | new (locale: string, options: { 6 | granularity: 'word' | 'grapheme'; 7 | }): { 8 | locale: string; 9 | options: { 10 | granularity: 'word' | 'grapheme'; 11 | }; 12 | segment(input: string): { 13 | segment: string; 14 | index: number; 15 | isWordLike: boolean; 16 | breakType: "number" | "none" | "word" | "kana" | "ideo"; 17 | }[]; 18 | }; 19 | }>; 20 | export declare const createIntlSegmenterPolyfill: (wasm: ArrayBufferLike | PromiseLike) => Promise<{ 21 | new (locale: string, options: { 22 | granularity: 'word' | 'grapheme'; 23 | }): { 24 | locale: string; 25 | options: { 26 | granularity: 'word' | 'grapheme'; 27 | }; 28 | segment(input: string): { 29 | segment: string; 30 | index: number; 31 | isWordLike: boolean; 32 | breakType: "number" | "none" | "word" | "kana" | "ideo"; 33 | }[]; 34 | }; 35 | }>; 36 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "intl-segmenter-polyfill", 3 | "version": "0.4.4", 4 | "description": "This repo builds .wasm module using icu4c for breaking text into words, so that we can polyfill [Intl Segmenter Proposal](https://github.com/tc39/proposal-intl-segmenter) with full compatibility, even on browsers that do not expose v8BreakIterator api.", 5 | "main": "dist/index.js", 6 | "files": [ 7 | "dist/", 8 | "src/" 9 | ], 10 | "scripts": { 11 | "prepublish": "npm run build", 12 | "build": "cp src/break_iterator.wasm dist/ && rollup -c rollup.config.js && rollup -c rollup.config.bundled.js", 13 | "test": "jest" 14 | }, 15 | "repository": { 16 | "type": "git", 17 | "url": "git+https://github.com/surferseo/intl-segmenter-polyfill.git" 18 | }, 19 | "author": "Lucjan Suski", 20 | "license": "ISC", 21 | "bugs": { 22 | "url": "https://github.com/surferseo/intl-segmenter-polyfill/issues" 23 | }, 24 | "homepage": "https://github.com/surferseo/intl-segmenter-polyfill#readme", 25 | "devDependencies": { 26 | "@babel/plugin-transform-runtime": "^7.10.1", 27 | "@babel/preset-env": "^7.10.2", 28 | "@rollup/plugin-babel": "^5.0.3", 29 | "@rollup/plugin-wasm": "^5.0.0", 30 | "core-js": "^3.6.5", 31 | "jest": "^26.0.1", 32 | "rollup": "^2.16.1", 33 | "rollup-plugin-commonjs": "^10.1.0", 34 | "rollup-plugin-node-resolve": "^5.2.0" 35 | }, 36 | "dependencies": { 37 | "fast-text-encoding": "^1.0.2" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /icu.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Emscripten Authors. All rights reserved. 2 | # Emscripten is available under two separate licenses, the MIT license and the 3 | # University of Illinois/NCSA Open Source License. Both these licenses can be 4 | # found in the LICENSE file. 5 | 6 | import logging 7 | import os 8 | import shutil 9 | 10 | TAG = 'release-67-1' 11 | VERSION = '67_1' 12 | HASH = 'e0c366e097d5cd9840e7c440a87f8f338cc59b1ed7ec527eecbb5671c6c48261b217424a7ee95870915c19b70b1afa2e486100e73acae3515d30bb3872661c11' 13 | SUBDIR = '' 14 | 15 | def get(ports, settings, shared): 16 | if settings.USE_ICU != 1: 17 | return [] 18 | 19 | url = 'https://github.com/unicode-org/icu/releases/download/%s/icu4c-%s-src.zip' % (TAG, VERSION) 20 | ports.fetch_project('icu', url, 'icu', sha512hash=HASH) 21 | libname = ports.get_lib_name('libicuuc') 22 | 23 | def create(): 24 | logging.info('building port: icu') 25 | 26 | source_path = os.path.join(ports.get_dir(), 'icu', 'icu') 27 | dest_path = os.path.join(shared.Cache.get_path('ports-builds'), 'icu') 28 | 29 | shutil.rmtree(dest_path, ignore_errors=True) 30 | print(source_path) 31 | print(dest_path) 32 | shutil.copytree(source_path, dest_path) 33 | 34 | final = os.path.join(dest_path, libname) 35 | ports.build_port(os.path.join(dest_path, 'source', 'common'), final, [os.path.join(dest_path, 'source', 'common')], ['-DU_COMMON_IMPLEMENTATION=1']) 36 | 37 | ports.install_header_dir(os.path.join(dest_path, 'source', 'common', 'unicode')) 38 | return final 39 | 40 | return [shared.Cache.get(libname, create)] 41 | 42 | 43 | def clear(ports, shared): 44 | shared.Cache.erase_file(ports.get_lib_name('libicuuc')) 45 | 46 | 47 | def process_args(ports, args, settings, shared): 48 | if settings.USE_ICU == 1: 49 | get(ports, settings, shared) 50 | return args 51 | 52 | 53 | def show(): 54 | return 'icu (USE_ICU=1; Unicode License)' 55 | -------------------------------------------------------------------------------- /break_iterator.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "data.h" 10 | #include "unicode/utext.h" 11 | 12 | typedef void array_push(int32_t start, int32_t end, int32_t type, 13 | const void* callback_id); 14 | 15 | extern array_push push; 16 | 17 | void utf8_break_iterator(int8_t break_type, const char* locale, 18 | const char* to_break, int32_t to_break_len, 19 | const void* callback_id) { 20 | UErrorCode status = U_ZERO_ERROR; 21 | 22 | udata_setCommonData(icudt67l_dat, &status); 23 | 24 | UBreakIterator* iter; 25 | 26 | UChar string_to_break[to_break_len + 1]; 27 | u_uastrcpy(string_to_break, to_break); 28 | iter = ubrk_open(break_type, locale, string_to_break, 29 | u_strlen(string_to_break), &status); 30 | 31 | int32_t end; 32 | int32_t start = ubrk_first(iter); 33 | int32_t n = 0; 34 | for (end = ubrk_next(iter); end != UBRK_DONE; 35 | start = end, end = ubrk_next(iter)) { 36 | push(start, end, ubrk_getRuleStatus(iter), callback_id); 37 | } 38 | 39 | ubrk_close(iter); 40 | } 41 | 42 | // differs from utf8_break_iterator in that it operates on raw bytes 43 | // in case unicode implementation is not compatible (ie. in Elixir) 44 | void break_iterator(int8_t break_type, const char* locale, const char* to_break, 45 | const void* callback_id) { 46 | UErrorCode status = U_ZERO_ERROR; 47 | UText* utext_to_break = NULL; 48 | 49 | udata_setCommonData(icudt67l_dat, &status); 50 | 51 | UBreakIterator* iter; 52 | 53 | utext_to_break = utext_openUTF8(utext_to_break, to_break, -1, &status); 54 | 55 | iter = ubrk_open(break_type, locale, NULL, -1, &status); 56 | ubrk_setUText(iter, utext_to_break, &status); 57 | 58 | int32_t end; 59 | int32_t start = ubrk_first(iter); 60 | int32_t n = 0; 61 | for (end = ubrk_next(iter); end != UBRK_DONE; 62 | start = end, end = ubrk_next(iter)) { 63 | push(start, end, ubrk_getRuleStatus(iter), callback_id); 64 | } 65 | 66 | utext_close(utext_to_break); 67 | ubrk_close(iter); 68 | } 69 | 70 | // for WASI _start function to be generated 71 | int main() { return 0; } 72 | -------------------------------------------------------------------------------- /examples/elixir.ex: -------------------------------------------------------------------------------- 1 | defmodule WasmexTest do 2 | use GenServer 3 | 4 | def init([]) do 5 | {:ok, bytes} = File.read("break_iterator.wasm") 6 | {:ok, instance} = Wasmex.start_link(%{ 7 | bytes: bytes, 8 | imports: %{ 9 | wasi_snapshot_preview1: %{ 10 | proc_exit: {:fn, [:i32], [], fn _ -> 0 end}, 11 | fd_close: {:fn, [:i32], [:i32], fn _ -> 0 end}, 12 | environ_sizes_get: {:fn, [:i32, :i32], [:i32], fn _ -> 0 end}, 13 | environ_get: {:fn, [:i32, :i32], [:i32], fn _ -> 0 end} 14 | }, 15 | env: %{ 16 | __sys_stat64: {:fn, [:i32, :i32], [:i32], fn _ -> 0 end}, 17 | push: {:fn, [:i32, :i32, :i32], [], fn (%{memory: memory}, slice_start, slice_end, type) -> 18 | pid_binary_length = Wasmex.Memory.get(memory, 100) 19 | pid = Wasmex.Memory.read_string(memory, 101, pid_binary_length) |> Base.decode64! |> :erlang.binary_to_term 20 | send(pid, {:received_value, {slice_start, slice_end, type}}) 21 | nil 22 | end }, 23 | } 24 | } 25 | }) 26 | 27 | {:ok, instance} 28 | end 29 | 30 | def handle_call({:break, locale, string}, _from, instance) do 31 | string = string <> <<0>> 32 | {:ok, memory} = Wasmex.memory(instance, :uint8, 0) 33 | 34 | breaks = Task.async(fn -> 35 | pid = self() 36 | pid_binary = :erlang.term_to_binary(self()) |> Base.encode64 37 | 38 | Wasmex.Memory.write_binary(memory, 0, locale <> <<0>>) 39 | Wasmex.Memory.set(memory, 100, byte_size(pid_binary)) 40 | Wasmex.Memory.write_binary(memory, 101, pid_binary) 41 | Wasmex.Memory.write_binary(memory, 10000, string) 42 | 43 | Task.async(fn -> 44 | Wasmex.call_function(instance, "break_iterator", [0, 10000, byte_size(string)]) 45 | send(pid, :done) 46 | end) 47 | 48 | receive_all_values() 49 | end) |> Task.await 50 | 51 | reply = breaks |> Enum.map(fn {slice_start, slice_end, type} -> 52 | {slice_start, slice_end, type, string |> binary_part(slice_start, slice_end - slice_start)} 53 | # {string |> String.slice(slice_start, slice_end - slice_start), type} 54 | end) 55 | 56 | {:reply, {reply, memory}, instance} 57 | end 58 | 59 | def receive_all_values(values \\ []) do 60 | receive do 61 | :done -> values |> Enum.reverse 62 | {:received_value, value} -> receive_all_values( [value | values]) 63 | end 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | // polyfill TextEncoder and TextDecoder, which is missing on Edge 18 2 | import 'fast-text-encoding' 3 | 4 | const BREAK_TYPES = { 5 | grapheme: 0, 6 | word: 1, 7 | sentence: 3, 8 | } 9 | 10 | const getSegmentType = (type) => { 11 | if (type < 100) { 12 | return 'none' 13 | } else if (type >= 100 && type < 200) { 14 | return 'number' 15 | } else if (type >= 200 && type < 300) { 16 | return 'word' 17 | } else if (type >= 300 && type < 400) { 18 | return 'kana' 19 | } else if (type >= 400 && type < 500) { 20 | return 'ideo' 21 | } 22 | } 23 | 24 | const instantiateWasmModule = (wasm, imports) => { 25 | if (typeof wasm.then === 'function') { 26 | if (WebAssembly.instantiateStreaming != null) { 27 | return wasm.then((response) => 28 | WebAssembly.instantiateStreaming(response, imports), 29 | ) 30 | } 31 | 32 | return wasm 33 | .then((response) => response.arrayBuffer()) 34 | .then((buffer) => WebAssembly.instantiate(buffer, imports)) 35 | } else { 36 | return WebAssembly.instantiate(wasm, imports) 37 | } 38 | } 39 | 40 | const createIntlSegmenterPolyfillFromInstance = async ( 41 | wasmInstance, 42 | values, 43 | ) => { 44 | const allocStr = (str) => { 45 | const encoder = new TextEncoder() 46 | const view = encoder.encode(str + '\0') 47 | // typescript does not play well with webassembly 48 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 49 | const exports = wasmInstance.exports 50 | 51 | const ptr = exports.malloc(view.length) 52 | const memory = new Uint8Array(exports.memory.buffer, ptr, view.length) 53 | memory.set(view) 54 | return [ptr, view] 55 | } 56 | 57 | return class Segmenter { 58 | constructor(locale, options) { 59 | this.locale = locale 60 | this.options = options || {} 61 | } 62 | 63 | segment(input) { 64 | const locale = this.locale 65 | const granularity = this.options.granularity || 'grapheme' 66 | const exports = wasmInstance.exports 67 | 68 | values.current = [] 69 | const [inputPtr, inputView] = allocStr(input) 70 | const [localePtr] = allocStr(locale) 71 | exports.utf8_break_iterator(BREAK_TYPES[granularity], localePtr, inputPtr, inputView.length) 72 | 73 | exports.free(localePtr) 74 | exports.free(inputPtr) 75 | 76 | let index = 0 77 | 78 | const segments = values.current.map(([start, end, segmentType]) => { 79 | const segment = input.slice(start, end) 80 | const returnValue = { 81 | segment, 82 | index: index, 83 | isWordLike: 84 | granularity === 'word' 85 | ? getSegmentType(segmentType) !== 'none' 86 | : undefined, 87 | breakType: 88 | granularity === 'word' ? getSegmentType(segmentType) : undefined, 89 | } 90 | index += segment.length 91 | return returnValue 92 | }) 93 | 94 | segments.containing = (indexToFind) => 95 | segments.find( 96 | ({ index, segment }) => 97 | indexToFind >= index && indexToFind <= index + segment.length - 1, 98 | ) 99 | 100 | return segments 101 | } 102 | } 103 | } 104 | 105 | const getImports = (callback) => ({ 106 | env: { 107 | push: (start, end, segmentType) => { 108 | callback([start, end, segmentType]) 109 | }, 110 | __sys_stat64: () => { }, 111 | }, 112 | wasi_snapshot_preview1: { 113 | proc_exit: () => { }, 114 | fd_close: () => { }, 115 | environ_sizes_get: () => { }, 116 | environ_get: () => { }, 117 | }, 118 | }) 119 | 120 | export const createIntlSegmenterPolyfillFromFactory = async (wasmFactory) => { 121 | let values = { current: [] } 122 | const { instance } = await wasmFactory( 123 | getImports((value) => { 124 | values.current.push(value) 125 | }), 126 | ) 127 | 128 | return createIntlSegmenterPolyfillFromInstance(instance, values) 129 | } 130 | 131 | export const createIntlSegmenterPolyfill = async (wasm) => { 132 | let values = { current: [] } 133 | 134 | const { instance } = await instantiateWasmModule( 135 | wasm, 136 | getImports((value) => { 137 | values.current.push(value) 138 | }), 139 | ) 140 | 141 | return createIntlSegmenterPolyfillFromInstance(instance, values) 142 | } 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intl Segmenter Polyfill 2 | 3 | [![npm version](https://badge.fury.io/js/intl-segmenter-polyfill.svg)](https://www.npmjs.com/package/intl-segmenter-polyfill) 4 | ![Build WASM](https://github.com/surferseo/intl-segmenter-polyfill/workflows/Build%20WASM/badge.svg) 5 | ![Test](https://github.com/surferseo/intl-segmenter-polyfill/workflows/Test/badge.svg) 6 | 7 | Provides .wasm module built with icu4c for breaking text into words, so that we can polyfill [Intl Segmenter Proposal](https://github.com/tc39/proposal-intl-segmenter) with full compatibility, even on browsers that do not expose v8BreakIterator api. 8 | 9 | **By default it bundles only Thai language dictionary. Modify `filters.json` if you need to support [other exotic languages](https://github.com/unicode-org/icu/tree/master/icu4c/source/data/brkitr/dictionaries).** 10 | 11 | ## Usage 12 | 13 | ``` 14 | npm install --save intl-segmenter-polyfill 15 | ``` 16 | 17 | ### Web – fetch 18 | 19 | This is the most efficient way as you can lazily load the wasm module only when you need it and use `instantiateStreaming` for the best performance. Serve `break_iterator.wasm` as a static asset with `application/wasm` content-type and you are good to go. 20 | 21 | #### index.js 22 | 23 | ```js 24 | import { createIntlSegmenterPolyfill } from 'intl-segmenter-polyfill' 25 | ;(async function () { 26 | const Segmenter = await createIntlSegmenterPolyfill( 27 | fetch('/path/to/break_iterator.wasm'), 28 | ) 29 | 30 | const segmenter = new Segmenter('en', { granularity: 'word' }) 31 | const segments = segmenter.segment('foo bar baz') 32 | })() 33 | ``` 34 | 35 | ### Web – bundle with base64 encoded module 36 | 37 | This is the simplest way to use the polyfill, at the cost of base64 encoded module – it's ~33% bigger and cannot be loaded on demand. 38 | 39 | #### index.js 40 | 41 | ```js 42 | import { createIntlSegmenterPolyfill } from 'intl-segmenter-polyfill/bundled' 43 | ;(async function () { 44 | const Segmenter = await createIntlSegmenterPolyfill() 45 | const segmenter = new Segmenter('en', { granularity: 'word' }) 46 | const segments = segmenter.segment('foo bar baz') 47 | console.log(segments) 48 | })() 49 | ``` 50 | 51 | #### OR using plain old 55 | 64 | ``` 65 | 66 | ### Web – Rollup / Webpack wasm loader 67 | 68 | @rollup/plugin-wasm and webpack wasm-loader can be used with `createIntlSegmenterPolyfillFromFactory` 69 | 70 | #### rollup.config.js 71 | 72 | ```js 73 | import commonjs from '@rollup/plugin-commonjs' 74 | import { wasm } from '@rollup/plugin-wasm' 75 | 76 | export default { 77 | input: 'index.js', 78 | output: { 79 | file: 'out.js', 80 | format: 'iife', 81 | }, 82 | plugins: [commonjs(), wasm()], 83 | } 84 | ``` 85 | 86 | #### index.js 87 | 88 | ```js 89 | import { createIntlSegmenterPolyfillFromFactory } from 'intl-segmenter-polyfill' 90 | import break_iterator from 'intl-segmenter-polyfill/break_iterator.wasm' 91 | ;(async function () { 92 | const Segmenter = await createIntlSegmenterPolyfillFromFactory(break_iterator) 93 | 94 | const segmenter = new Segmenter('en', { granularity: 'word' }) 95 | const segments = segmenter.segment('foo bar baz') 96 | })() 97 | ``` 98 | 99 | ### Node 100 | 101 | ```js 102 | const {createIntlSegmenterPolyfill} = require('intl-segmenter-polyfill') 103 | const fs = require('fs') 104 | 105 | const wasmBuffer = fs.readFileSync('node_modules/intl-segmenter-polyfill/break_iterator.wasm') 106 | let wasmBinary = new Uint8Array(wasmBuffer) 107 | 108 | ;(async () => { 109 | const Segmenter = await createIntlSegmenterPolyfill(wasmBinary); 110 | const segmenter = new Segmenter("en", { granularity: 'word' }); 111 | const segments = segmenter.segment("foo bar baz"); 112 | )() 113 | ``` 114 | 115 | ## Supported browsers 116 | 117 | Besides Chrome, Firefox and Safari with reasonable versions, it polyfills TextEncoder/TextDecoder to support Edge 18 (non-chromium). 118 | 119 | ## Building 120 | 121 | Running `./build.sh` while having docker installed should output `break_iterator.wasm` ready to be used in Node, browsers or Wasmer without a lot of special treatment (see examples above or `examples/`). 122 | -------------------------------------------------------------------------------- /test/index.test.js: -------------------------------------------------------------------------------- 1 | test("Bundled module", async () => { 2 | const Segmenter = 3 | await require("../dist/bundled.js").createIntlSegmenterPolyfill(); 4 | const segments = new Segmenter("en", { granularity: "word" }).segment( 5 | "foo bar" 6 | ); 7 | expect(Array.from(segments)).toEqual([ 8 | { breakType: "word", index: 0, isWordLike: true, segment: "foo" }, 9 | { breakType: "none", index: 3, isWordLike: false, segment: " " }, 10 | { breakType: "word", index: 4, isWordLike: true, segment: "bar" }, 11 | ]); 12 | }); 13 | 14 | test("FS loaded module", async () => { 15 | const fs = require("fs"); 16 | const wasmBuffer = fs.readFileSync("./dist/break_iterator.wasm"); 17 | const wasmBinary = new Uint8Array(wasmBuffer); 18 | 19 | const Segmenter = 20 | await require("../dist/index.js").createIntlSegmenterPolyfill(wasmBinary); 21 | const segments = new Segmenter("en", { granularity: "word" }).segment( 22 | "foo bar" 23 | ); 24 | expect(Array.from(segments)).toEqual([ 25 | { breakType: "word", index: 0, isWordLike: true, segment: "foo" }, 26 | { breakType: "none", index: 3, isWordLike: false, segment: " " }, 27 | { breakType: "word", index: 4, isWordLike: true, segment: "bar" }, 28 | ]); 29 | }); 30 | 31 | test("segments.containing() direct access", async () => { 32 | const Segmenter = 33 | await require("../dist/bundled.js").createIntlSegmenterPolyfill(); 34 | const segments = new Segmenter("en", { granularity: "word" }).segment( 35 | "foo bar" 36 | ); 37 | expect(segments.containing(0).segment).toEqual("foo"); 38 | expect(segments.containing(1).segment).toEqual("foo"); 39 | expect(segments.containing(3).segment).toEqual(" "); 40 | expect(segments.containing(5).segment).toEqual("bar"); 41 | expect(segments.containing(8)).toEqual(undefined); 42 | }); 43 | 44 | test("segment by grapheme", async () => { 45 | const Segmenter = 46 | await require("../dist/bundled.js").createIntlSegmenterPolyfill(); 47 | const segments = new Segmenter("en", { granularity: "grapheme" }).segment( 48 | "foo bar" 49 | ); 50 | expect(segments.map(({ segment }) => segment)).toEqual([ 51 | "f", 52 | "o", 53 | "o", 54 | " ", 55 | "b", 56 | "a", 57 | "r", 58 | ]); 59 | }); 60 | 61 | test("defaults to grapheme segmenting", async () => { 62 | const Segmenter = 63 | await require("../dist/bundled.js").createIntlSegmenterPolyfill(); 64 | const segments = new Segmenter("en").segment("foo bar"); 65 | expect(segments.map(({ segment }) => segment)).toEqual([ 66 | "f", 67 | "o", 68 | "o", 69 | " ", 70 | "b", 71 | "a", 72 | "r", 73 | ]); 74 | }); 75 | 76 | test("segment by sentence", async () => { 77 | const Segmenter = 78 | await require("../dist/bundled.js").createIntlSegmenterPolyfill(); 79 | const segments = new Segmenter("en", { granularity: "sentence" }).segment( 80 | "Foo bar. Foo bar." 81 | ); 82 | expect(segments.map(({ segment }) => segment)).toEqual([ 83 | "Foo bar. ", 84 | "Foo bar.", 85 | ]); 86 | }); 87 | 88 | test("Segments Thai words", async () => { 89 | const Segmenter = 90 | await require("../dist/bundled.js").createIntlSegmenterPolyfill(); 91 | const segments = new Segmenter("en", { granularity: "word" }).segment( 92 | "ยังมีอาสาสมัครน้อยมากเมื่อเทียบกับประชากรที่เข้าถึงอินเทอร์เน็ตได้" 93 | ); 94 | expect(segments.map(({ segment }) => segment)).toEqual([ 95 | "ยัง", 96 | "มี", 97 | "อาสา", 98 | "สมัคร", 99 | "น้อย", 100 | "มาก", 101 | "เมื่อ", 102 | "เทียบ", 103 | "กับ", 104 | "ประชากร", 105 | "ที่", 106 | "เข้า", 107 | "ถึง", 108 | "อินเทอร์เน็ต", 109 | "ได้", 110 | ]); 111 | }); 112 | 113 | test("Segments Japanese words", async () => { 114 | const Segmenter = 115 | await require("../dist/bundled_cja.js").createIntlSegmenterPolyfill(); 116 | const segments = new Segmenter("en", { granularity: "word" }).segment( 117 | "チンドン屋は、チンドン太鼓と呼ばれる楽器を鳴らすなどして人目を集め、その地域の商品や店舗などの宣伝を行う日本の請負広告業である。披露目屋・広目屋・東西屋と呼ぶ地域もある。" 118 | ); 119 | expect(segments.map(({ segment }) => segment)).toEqual([ 120 | "チン", 121 | "ドン", 122 | "屋", 123 | "は", 124 | "、", 125 | "チン", 126 | "ドン", 127 | "太鼓", 128 | "と", 129 | "呼ばれる", 130 | "楽器", 131 | "を", 132 | "鳴らす", 133 | "など", 134 | "し", 135 | "て", 136 | "人目", 137 | "を", 138 | "集め", 139 | "、", 140 | "その", 141 | "地域", 142 | "の", 143 | "商品", 144 | "や", 145 | "店舗", 146 | "など", 147 | "の", 148 | "宣伝", 149 | "を", 150 | "行う", 151 | "日本", 152 | "の", 153 | "請負", 154 | "広告", 155 | "業", 156 | "で", 157 | "ある", 158 | "。", 159 | "披露", 160 | "目", 161 | "屋", 162 | "・", 163 | "広目屋", 164 | "・", 165 | "東西", 166 | "屋", 167 | "と", 168 | "呼ぶ", 169 | "地域", 170 | "も", 171 | "ある", 172 | "。", 173 | ]); 174 | }); 175 | -------------------------------------------------------------------------------- /examples/rollup/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "intl-segmenter-polyfill-rollup-example", 3 | "version": "1.0.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "@rollup/plugin-commonjs": { 8 | "version": "13.0.0", 9 | "resolved": "https://registry.npmjs.org/@rollup/plugin-commonjs/-/plugin-commonjs-13.0.0.tgz", 10 | "integrity": "sha512-Anxc3qgkAi7peAyesTqGYidG5GRim9jtg8xhmykNaZkImtvjA7Wsqep08D2mYsqw1IF7rA3lYfciLgzUSgRoqw==", 11 | "dev": true, 12 | "requires": { 13 | "@rollup/pluginutils": "^3.0.8", 14 | "commondir": "^1.0.1", 15 | "estree-walker": "^1.0.1", 16 | "glob": "^7.1.2", 17 | "is-reference": "^1.1.2", 18 | "magic-string": "^0.25.2", 19 | "resolve": "^1.11.0" 20 | } 21 | }, 22 | "@rollup/plugin-wasm": { 23 | "version": "5.0.0", 24 | "resolved": "https://registry.npmjs.org/@rollup/plugin-wasm/-/plugin-wasm-5.0.0.tgz", 25 | "integrity": "sha512-3yVc14qT1hX9Zs8qnieXkzLJL8hpdQyW/fwH3V5/doXDQfQPAHSoPirDLm8Fum4U1JZxnsOnk/6GF5gqKh7dsQ==", 26 | "dev": true 27 | }, 28 | "@rollup/pluginutils": { 29 | "version": "3.1.0", 30 | "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-3.1.0.tgz", 31 | "integrity": "sha512-GksZ6pr6TpIjHm8h9lSQ8pi8BE9VeubNT0OMJ3B5uZJ8pz73NPiqOtCog/x2/QzM1ENChPKxMDhiQuRHsqc+lg==", 32 | "dev": true, 33 | "requires": { 34 | "@types/estree": "0.0.39", 35 | "estree-walker": "^1.0.1", 36 | "picomatch": "^2.2.2" 37 | } 38 | }, 39 | "@types/estree": { 40 | "version": "0.0.39", 41 | "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.39.tgz", 42 | "integrity": "sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw==", 43 | "dev": true 44 | }, 45 | "balanced-match": { 46 | "version": "1.0.0", 47 | "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", 48 | "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=", 49 | "dev": true 50 | }, 51 | "brace-expansion": { 52 | "version": "1.1.11", 53 | "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", 54 | "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", 55 | "dev": true, 56 | "requires": { 57 | "balanced-match": "^1.0.0", 58 | "concat-map": "0.0.1" 59 | } 60 | }, 61 | "commondir": { 62 | "version": "1.0.1", 63 | "resolved": "https://registry.npmjs.org/commondir/-/commondir-1.0.1.tgz", 64 | "integrity": "sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs=", 65 | "dev": true 66 | }, 67 | "concat-map": { 68 | "version": "0.0.1", 69 | "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", 70 | "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", 71 | "dev": true 72 | }, 73 | "estree-walker": { 74 | "version": "1.0.1", 75 | "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-1.0.1.tgz", 76 | "integrity": "sha512-1fMXF3YP4pZZVozF8j/ZLfvnR8NSIljt56UhbZ5PeeDmmGHpgpdwQt7ITlGvYaQukCvuBRMLEiKiYC+oeIg4cg==", 77 | "dev": true 78 | }, 79 | "fs.realpath": { 80 | "version": "1.0.0", 81 | "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", 82 | "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", 83 | "dev": true 84 | }, 85 | "fsevents": { 86 | "version": "2.1.3", 87 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.1.3.tgz", 88 | "integrity": "sha512-Auw9a4AxqWpa9GUfj370BMPzzyncfBABW8Mab7BGWBYDj4Isgq+cDKtx0i6u9jcX9pQDnswsaaOTgTmA5pEjuQ==", 89 | "dev": true, 90 | "optional": true 91 | }, 92 | "glob": { 93 | "version": "7.1.6", 94 | "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz", 95 | "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==", 96 | "dev": true, 97 | "requires": { 98 | "fs.realpath": "^1.0.0", 99 | "inflight": "^1.0.4", 100 | "inherits": "2", 101 | "minimatch": "^3.0.4", 102 | "once": "^1.3.0", 103 | "path-is-absolute": "^1.0.0" 104 | } 105 | }, 106 | "inflight": { 107 | "version": "1.0.6", 108 | "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", 109 | "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", 110 | "dev": true, 111 | "requires": { 112 | "once": "^1.3.0", 113 | "wrappy": "1" 114 | } 115 | }, 116 | "inherits": { 117 | "version": "2.0.4", 118 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", 119 | "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", 120 | "dev": true 121 | }, 122 | "is-reference": { 123 | "version": "1.2.0", 124 | "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-1.2.0.tgz", 125 | "integrity": "sha512-ZVxq+5TkOx6GQdnoMm2aRdCKADdcrOWXLGzGT+vIA8DMpqEJaRk5AL1bS80zJ2bjHunVmjdzfCt0e4BymIEqKQ==", 126 | "dev": true, 127 | "requires": { 128 | "@types/estree": "0.0.44" 129 | }, 130 | "dependencies": { 131 | "@types/estree": { 132 | "version": "0.0.44", 133 | "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.44.tgz", 134 | "integrity": "sha512-iaIVzr+w2ZJ5HkidlZ3EJM8VTZb2MJLCjw3V+505yVts0gRC4UMvjw0d1HPtGqI/HQC/KdsYtayfzl+AXY2R8g==", 135 | "dev": true 136 | } 137 | } 138 | }, 139 | "magic-string": { 140 | "version": "0.25.7", 141 | "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.25.7.tgz", 142 | "integrity": "sha512-4CrMT5DOHTDk4HYDlzmwu4FVCcIYI8gauveasrdCu2IKIFOJ3f0v/8MDGJCDL9oD2ppz/Av1b0Nj345H9M+XIA==", 143 | "dev": true, 144 | "requires": { 145 | "sourcemap-codec": "^1.4.4" 146 | } 147 | }, 148 | "minimatch": { 149 | "version": "3.0.4", 150 | "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", 151 | "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", 152 | "dev": true, 153 | "requires": { 154 | "brace-expansion": "^1.1.7" 155 | } 156 | }, 157 | "once": { 158 | "version": "1.4.0", 159 | "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", 160 | "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", 161 | "dev": true, 162 | "requires": { 163 | "wrappy": "1" 164 | } 165 | }, 166 | "path-is-absolute": { 167 | "version": "1.0.1", 168 | "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", 169 | "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", 170 | "dev": true 171 | }, 172 | "path-parse": { 173 | "version": "1.0.6", 174 | "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", 175 | "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", 176 | "dev": true 177 | }, 178 | "picomatch": { 179 | "version": "2.2.2", 180 | "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.2.tgz", 181 | "integrity": "sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg==", 182 | "dev": true 183 | }, 184 | "resolve": { 185 | "version": "1.17.0", 186 | "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.17.0.tgz", 187 | "integrity": "sha512-ic+7JYiV8Vi2yzQGFWOkiZD5Z9z7O2Zhm9XMaTxdJExKasieFCr+yXZ/WmXsckHiKl12ar0y6XiXDx3m4RHn1w==", 188 | "dev": true, 189 | "requires": { 190 | "path-parse": "^1.0.6" 191 | } 192 | }, 193 | "rollup": { 194 | "version": "2.16.1", 195 | "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.16.1.tgz", 196 | "integrity": "sha512-UYupMcbFtoWLB6ZtL4hPZNUTlkXjJfGT33Mmhz3hYLNmRj/cOvX2B26ZxDQuEpwtLdcyyyraBGQ7EfzmMJnXXg==", 197 | "dev": true, 198 | "requires": { 199 | "fsevents": "~2.1.2" 200 | } 201 | }, 202 | "rollup-plugin-base64": { 203 | "version": "git+https://github.com/gzuidhof/rollup-plugin-base64.git#099d622f58c15f313dc1d8ca3cfe98434bc365ed", 204 | "from": "git+https://github.com/gzuidhof/rollup-plugin-base64.git", 205 | "dev": true, 206 | "requires": { 207 | "@rollup/pluginutils": "^3.1.0" 208 | } 209 | }, 210 | "sourcemap-codec": { 211 | "version": "1.4.8", 212 | "resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz", 213 | "integrity": "sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA==", 214 | "dev": true 215 | }, 216 | "wrappy": { 217 | "version": "1.0.2", 218 | "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", 219 | "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", 220 | "dev": true 221 | } 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /dist/index.js: -------------------------------------------------------------------------------- 1 | (function (global, factory) { 2 | typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('@babel/runtime/regenerator'), require('@babel/runtime/helpers/slicedToArray'), require('@babel/runtime/helpers/classCallCheck'), require('@babel/runtime/helpers/createClass'), require('@babel/runtime/helpers/asyncToGenerator'), require('fast-text-encoding')) : 3 | typeof define === 'function' && define.amd ? define(['exports', '@babel/runtime/regenerator', '@babel/runtime/helpers/slicedToArray', '@babel/runtime/helpers/classCallCheck', '@babel/runtime/helpers/createClass', '@babel/runtime/helpers/asyncToGenerator', 'fast-text-encoding'], factory) : 4 | (global = global || self, factory(global.IntlSegmenterPolyfill = {}, global._regeneratorRuntime, global._slicedToArray, global._classCallCheck, global._createClass, global._asyncToGenerator)); 5 | }(this, (function (exports, _regeneratorRuntime, _slicedToArray, _classCallCheck, _createClass, _asyncToGenerator) { 'use strict'; 6 | 7 | _regeneratorRuntime = _regeneratorRuntime && Object.prototype.hasOwnProperty.call(_regeneratorRuntime, 'default') ? _regeneratorRuntime['default'] : _regeneratorRuntime; 8 | _slicedToArray = _slicedToArray && Object.prototype.hasOwnProperty.call(_slicedToArray, 'default') ? _slicedToArray['default'] : _slicedToArray; 9 | _classCallCheck = _classCallCheck && Object.prototype.hasOwnProperty.call(_classCallCheck, 'default') ? _classCallCheck['default'] : _classCallCheck; 10 | _createClass = _createClass && Object.prototype.hasOwnProperty.call(_createClass, 'default') ? _createClass['default'] : _createClass; 11 | _asyncToGenerator = _asyncToGenerator && Object.prototype.hasOwnProperty.call(_asyncToGenerator, 'default') ? _asyncToGenerator['default'] : _asyncToGenerator; 12 | 13 | var BREAK_TYPES = { 14 | grapheme: 0, 15 | word: 1, 16 | sentence: 3 17 | }; 18 | 19 | var getSegmentType = function getSegmentType(type) { 20 | if (type < 100) { 21 | return 'none'; 22 | } else if (type >= 100 && type < 200) { 23 | return 'number'; 24 | } else if (type >= 200 && type < 300) { 25 | return 'word'; 26 | } else if (type >= 300 && type < 400) { 27 | return 'kana'; 28 | } else if (type >= 400 && type < 500) { 29 | return 'ideo'; 30 | } 31 | }; 32 | 33 | var instantiateWasmModule = function instantiateWasmModule(wasm, imports) { 34 | if (typeof wasm.then === 'function') { 35 | if (WebAssembly.instantiateStreaming != null) { 36 | return wasm.then(function (response) { 37 | return WebAssembly.instantiateStreaming(response, imports); 38 | }); 39 | } 40 | 41 | return wasm.then(function (response) { 42 | return response.arrayBuffer(); 43 | }).then(function (buffer) { 44 | return WebAssembly.instantiate(buffer, imports); 45 | }); 46 | } else { 47 | return WebAssembly.instantiate(wasm, imports); 48 | } 49 | }; 50 | 51 | var createIntlSegmenterPolyfillFromInstance = /*#__PURE__*/function () { 52 | var _ref = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee(wasmInstance, values) { 53 | var allocStr; 54 | return _regeneratorRuntime.wrap(function _callee$(_context) { 55 | while (1) { 56 | switch (_context.prev = _context.next) { 57 | case 0: 58 | allocStr = function allocStr(str) { 59 | var encoder = new TextEncoder(); 60 | var view = encoder.encode(str + '\0'); // typescript does not play well with webassembly 61 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 62 | 63 | var exports = wasmInstance.exports; 64 | var ptr = exports.malloc(view.length); 65 | var memory = new Uint8Array(exports.memory.buffer, ptr, view.length); 66 | memory.set(view); 67 | return [ptr, view]; 68 | }; 69 | 70 | return _context.abrupt("return", /*#__PURE__*/function () { 71 | function Segmenter(locale, options) { 72 | _classCallCheck(this, Segmenter); 73 | 74 | this.locale = locale; 75 | this.options = options || {}; 76 | } 77 | 78 | _createClass(Segmenter, [{ 79 | key: "segment", 80 | value: function segment(input) { 81 | var locale = this.locale; 82 | var granularity = this.options.granularity || 'grapheme'; 83 | var exports = wasmInstance.exports; 84 | values.current = []; 85 | 86 | var _allocStr = allocStr(input), 87 | _allocStr2 = _slicedToArray(_allocStr, 2), 88 | inputPtr = _allocStr2[0], 89 | inputView = _allocStr2[1]; 90 | 91 | var _allocStr3 = allocStr(locale), 92 | _allocStr4 = _slicedToArray(_allocStr3, 1), 93 | localePtr = _allocStr4[0]; 94 | 95 | exports.utf8_break_iterator(BREAK_TYPES[granularity], localePtr, inputPtr, inputView.length); 96 | exports.free(localePtr); 97 | exports.free(inputPtr); 98 | var index = 0; 99 | var segments = values.current.map(function (_ref2) { 100 | var _ref3 = _slicedToArray(_ref2, 3), 101 | start = _ref3[0], 102 | end = _ref3[1], 103 | segmentType = _ref3[2]; 104 | 105 | var segment = input.slice(start, end); 106 | var returnValue = { 107 | segment: segment, 108 | index: index, 109 | isWordLike: granularity === 'word' ? getSegmentType(segmentType) !== 'none' : undefined, 110 | breakType: granularity === 'word' ? getSegmentType(segmentType) : undefined 111 | }; 112 | index += segment.length; 113 | return returnValue; 114 | }); 115 | 116 | segments.containing = function (indexToFind) { 117 | return segments.find(function (_ref4) { 118 | var index = _ref4.index, 119 | segment = _ref4.segment; 120 | return indexToFind >= index && indexToFind <= index + segment.length - 1; 121 | }); 122 | }; 123 | 124 | return segments; 125 | } 126 | }]); 127 | 128 | return Segmenter; 129 | }()); 130 | 131 | case 2: 132 | case "end": 133 | return _context.stop(); 134 | } 135 | } 136 | }, _callee); 137 | })); 138 | 139 | return function createIntlSegmenterPolyfillFromInstance(_x, _x2) { 140 | return _ref.apply(this, arguments); 141 | }; 142 | }(); 143 | 144 | var getImports = function getImports(callback) { 145 | return { 146 | env: { 147 | push: function push(start, end, segmentType) { 148 | callback([start, end, segmentType]); 149 | }, 150 | __sys_stat64: function __sys_stat64() {} 151 | }, 152 | wasi_snapshot_preview1: { 153 | proc_exit: function proc_exit() {}, 154 | fd_close: function fd_close() {}, 155 | environ_sizes_get: function environ_sizes_get() {}, 156 | environ_get: function environ_get() {} 157 | } 158 | }; 159 | }; 160 | 161 | var createIntlSegmenterPolyfillFromFactory = /*#__PURE__*/function () { 162 | var _ref5 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee2(wasmFactory) { 163 | var values, _yield$wasmFactory, instance; 164 | 165 | return _regeneratorRuntime.wrap(function _callee2$(_context2) { 166 | while (1) { 167 | switch (_context2.prev = _context2.next) { 168 | case 0: 169 | values = { 170 | current: [] 171 | }; 172 | _context2.next = 3; 173 | return wasmFactory(getImports(function (value) { 174 | values.current.push(value); 175 | })); 176 | 177 | case 3: 178 | _yield$wasmFactory = _context2.sent; 179 | instance = _yield$wasmFactory.instance; 180 | return _context2.abrupt("return", createIntlSegmenterPolyfillFromInstance(instance, values)); 181 | 182 | case 6: 183 | case "end": 184 | return _context2.stop(); 185 | } 186 | } 187 | }, _callee2); 188 | })); 189 | 190 | return function createIntlSegmenterPolyfillFromFactory(_x3) { 191 | return _ref5.apply(this, arguments); 192 | }; 193 | }(); 194 | var createIntlSegmenterPolyfill = /*#__PURE__*/function () { 195 | var _ref6 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee3(wasm) { 196 | var values, _yield$instantiateWas, instance; 197 | 198 | return _regeneratorRuntime.wrap(function _callee3$(_context3) { 199 | while (1) { 200 | switch (_context3.prev = _context3.next) { 201 | case 0: 202 | values = { 203 | current: [] 204 | }; 205 | _context3.next = 3; 206 | return instantiateWasmModule(wasm, getImports(function (value) { 207 | values.current.push(value); 208 | })); 209 | 210 | case 3: 211 | _yield$instantiateWas = _context3.sent; 212 | instance = _yield$instantiateWas.instance; 213 | return _context3.abrupt("return", createIntlSegmenterPolyfillFromInstance(instance, values)); 214 | 215 | case 6: 216 | case "end": 217 | return _context3.stop(); 218 | } 219 | } 220 | }, _callee3); 221 | })); 222 | 223 | return function createIntlSegmenterPolyfill(_x4) { 224 | return _ref6.apply(this, arguments); 225 | }; 226 | }(); 227 | 228 | exports.createIntlSegmenterPolyfill = createIntlSegmenterPolyfill; 229 | exports.createIntlSegmenterPolyfillFromFactory = createIntlSegmenterPolyfillFromFactory; 230 | 231 | Object.defineProperty(exports, '__esModule', { value: true }); 232 | 233 | }))); 234 | -------------------------------------------------------------------------------- /dist/module.js: -------------------------------------------------------------------------------- 1 | 2 | function _loadWasmModule (sync, src, imports) { 3 | var buf = null 4 | var isNode = typeof process !== 'undefined' && process.versions != null && process.versions.node != null 5 | if (isNode) { 6 | buf = Buffer.from(src, 'base64') 7 | } else { 8 | var raw = globalThis.atob(src) 9 | var rawLength = raw.length 10 | buf = new Uint8Array(new ArrayBuffer(rawLength)) 11 | for(var i = 0; i < rawLength; i++) { 12 | buf[i] = raw.charCodeAt(i) 13 | } 14 | } 15 | 16 | if (imports && !sync) { 17 | return WebAssembly.instantiate(buf, imports) 18 | } else if (!imports && !sync) { 19 | return WebAssembly.compile(buf) 20 | } else { 21 | var mod = new WebAssembly.Module(buf) 22 | return imports ? new WebAssembly.Instance(mod, imports) : mod 23 | } 24 | } 25 | (function (global, factory) { 26 | typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory(require('break_iterator.wasm')) : 27 | typeof define === 'function' && define.amd ? define(['break_iterator.wasm'], factory) : 28 | (global = global || self, global.IntlSegmenterPolyfill = factory(global.break_iterator)); 29 | }(this, (function (break_iterator) { 'use strict'; 30 | 31 | break_iterator = break_iterator && Object.prototype.hasOwnProperty.call(break_iterator, 'default') ? break_iterator['default'] : break_iterator; 32 | 33 | /*! ***************************************************************************** 34 | Copyright (c) Microsoft Corporation. 35 | 36 | Permission to use, copy, modify, and/or distribute this software for any 37 | purpose with or without fee is hereby granted. 38 | 39 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 40 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 41 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 42 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 43 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 44 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 45 | PERFORMANCE OF THIS SOFTWARE. 46 | ***************************************************************************** */ 47 | 48 | function __awaiter(thisArg, _arguments, P, generator) { 49 | function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } 50 | return new (P || (P = Promise))(function (resolve, reject) { 51 | function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } 52 | function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } 53 | function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } 54 | step((generator = generator.apply(thisArg, _arguments || [])).next()); 55 | }); 56 | } 57 | 58 | function __generator(thisArg, body) { 59 | var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; 60 | return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; 61 | function verb(n) { return function (v) { return step([n, v]); }; } 62 | function step(op) { 63 | if (f) throw new TypeError("Generator is already executing."); 64 | while (_) try { 65 | if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; 66 | if (y = 0, t) op = [op[0] & 2, t.value]; 67 | switch (op[0]) { 68 | case 0: case 1: t = op; break; 69 | case 4: _.label++; return { value: op[1], done: false }; 70 | case 5: _.label++; y = op[1]; op = [0]; continue; 71 | case 7: op = _.ops.pop(); _.trys.pop(); continue; 72 | default: 73 | if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } 74 | if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } 75 | if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } 76 | if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } 77 | if (t[2]) _.ops.pop(); 78 | _.trys.pop(); continue; 79 | } 80 | op = body.call(thisArg, _); 81 | } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } 82 | if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; 83 | } 84 | } 85 | 86 | (function(l){function m(){}function k(c,a){c=void 0===c?"utf-8":c;a=void 0===a?{fatal:!1}:a;if(-1===n.indexOf(c.toLowerCase()))throw new RangeError("Failed to construct 'TextDecoder': The encoding label provided ('"+c+"') is invalid.");if(a.fatal)throw Error("Failed to construct 'TextDecoder': the 'fatal' option is unsupported.");}if(l.TextEncoder&&l.TextDecoder)return !1;var n=["utf-8","utf8","unicode-1-1-utf-8"];Object.defineProperty(m.prototype,"encoding",{value:"utf-8"});m.prototype.encode=function(c, 87 | a){a=void 0===a?{stream:!1}:a;if(a.stream)throw Error("Failed to encode: the 'stream' option is unsupported.");a=0;for(var g=c.length,f=0,b=Math.max(32,g+(g>>1)+7),e=new Uint8Array(b>>3<<3);a=d){if(a=d)continue}f+4>e.length&&(b+=8,b*=1+a/c.length*2,b=b>>3<<3,h=new Uint8Array(b),h.set(e),e=h);if(0===(d&4294967168))e[f++]=d;else {if(0===(d&4294965248))e[f++]= 88 | d>>6&31|192;else if(0===(d&4294901760))e[f++]=d>>12&15|224,e[f++]=d>>6&63|128;else if(0===(d&4292870144))e[f++]=d>>18&7|240,e[f++]=d>>12&63|128,e[f++]=d>>6&63|128;else continue;e[f++]=d&63|128;}}return e.slice?e.slice(0,f):e.subarray(0,f)};Object.defineProperty(k.prototype,"encoding",{value:"utf-8"});Object.defineProperty(k.prototype,"fatal",{value:!1});Object.defineProperty(k.prototype,"ignoreBOM",{value:!1});k.prototype.decode=function(c,a){a=void 0===a?{stream:!1}:a;if(a.stream)throw Error("Failed to decode: the 'stream' option is unsupported."); 89 | a=c;!(a instanceof Uint8Array)&&a.buffer instanceof ArrayBuffer&&(a=new Uint8Array(c.buffer));c=0;for(var g=[],f=[];;){var b=c>>10&1023|55296),b=56320|b&1023);g.push(b);}}};l.TextEncoder=m;l.TextDecoder=k;})("undefined"!==typeof window?window:"undefined"!==typeof global?global:undefined); 91 | 92 | var BREAK_TYPES = { 93 | grapheme: 0, 94 | word: 1, 95 | sentence: 3 96 | }; 97 | var getSegmentType = function (type) { 98 | if (type < 100) { 99 | return 'none'; 100 | } 101 | else if (type >= 100 && type < 200) { 102 | return 'number'; 103 | } 104 | else if (type >= 200 && type < 300) { 105 | return 'word'; 106 | } 107 | else if (type >= 300 && type < 400) { 108 | return 'kana'; 109 | } 110 | else if (type >= 400 && type < 500) { 111 | return 'ideo'; 112 | } 113 | }; 114 | var createIntlSegmenterPolyfillFromInstance = function (wasmInstance, values) { return __awaiter(void 0, void 0, void 0, function () { 115 | var allocStr; 116 | return __generator(this, function (_a) { 117 | allocStr = function (str) { 118 | var encoder = new TextEncoder(); 119 | var view = encoder.encode(str + '\0'); 120 | // typescript does not play well with webassembly 121 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 122 | var exports = wasmInstance.exports; 123 | var ptr = exports.malloc(view.length); 124 | var memory = new Uint8Array(exports.memory.buffer, ptr, view.length); 125 | memory.set(view); 126 | return [ptr, view]; 127 | }; 128 | return [2 /*return*/, /** @class */ (function () { 129 | function Segmenter(locale, options) { 130 | this.locale = locale; 131 | this.options = options; 132 | } 133 | Segmenter.prototype.segment = function (input) { 134 | var locale = this.locale; 135 | var granularity = this.options.granularity; 136 | var exports = wasmInstance.exports; 137 | values.current = []; 138 | var _a = allocStr(input), inputPtr = _a[0], inputView = _a[1]; 139 | var localePtr = allocStr(locale)[0]; 140 | exports.break_iterator(BREAK_TYPES[granularity], localePtr, inputPtr); 141 | exports.free(localePtr); 142 | exports.free(inputPtr); 143 | var decoder = new TextDecoder(); 144 | return values.current.map(function (_a) { 145 | var start = _a[0], end = _a[1], segmentType = _a[2]; 146 | return ({ 147 | segment: decoder.decode(inputView.slice(start, end)), 148 | index: decoder.decode(inputView.slice(0, start)).length, 149 | isWordLike: granularity === 'word' 150 | ? getSegmentType(segmentType) !== 'none' 151 | : undefined, 152 | breakType: granularity === 'word' ? getSegmentType(segmentType) : undefined 153 | }); 154 | }); 155 | }; 156 | return Segmenter; 157 | }())]; 158 | }); 159 | }); }; 160 | var getImports = function (callback) { return ({ 161 | env: { 162 | push: function (start, end, segmentType) { 163 | callback([start, end, segmentType]); 164 | }, 165 | __sys_stat64: function () { } 166 | }, 167 | wasi_snapshot_preview1: { 168 | proc_exit: function () { }, 169 | fd_close: function () { }, 170 | environ_sizes_get: function () { }, 171 | environ_get: function () { } 172 | } 173 | }); }; 174 | var createIntlSegmenterPolyfillFromFactory = function (wasmFactory) { return __awaiter(void 0, void 0, void 0, function () { 175 | var values, instance; 176 | return __generator(this, function (_a) { 177 | switch (_a.label) { 178 | case 0: 179 | values = { current: [] }; 180 | return [4 /*yield*/, wasmFactory(getImports(function (value) { 181 | console.log(value); 182 | values.current.push(value); 183 | }))]; 184 | case 1: 185 | instance = (_a.sent()).instance; 186 | return [2 /*return*/, createIntlSegmenterPolyfillFromInstance(instance, values)]; 187 | } 188 | }); 189 | }); }; 190 | 191 | var createIntlSegmenterPolyfill = function () { 192 | return createIntlSegmenterPolyfillFromFactory(break_iterator); 193 | }; 194 | 195 | return createIntlSegmenterPolyfill; 196 | 197 | }))); 198 | --------------------------------------------------------------------------------