├── .prettierignore
├── .gitignore
├── src
    ├── hash
    │   ├── index.ts
    │   ├── hash.ts
    │   └── crc32.ts
    ├── model
    │   ├── index.ts
    │   ├── types.ts
    │   └── model.ts
    ├── predict
    │   ├── index.ts
    │   ├── types.ts
    │   ├── proba.ts
    │   └── predict.ts
    ├── feature
    │   ├── index.ts
    │   ├── types.ts
    │   ├── char.ts
    │   ├── regexp.ts
    │   └── feature.ts
    ├── index.ts
    ├── util.ts
    └── tokenize.ts
├── docs
    ├── image
    │   ├── github.png
    │   ├── npm.svg
    │   └── pypi.svg
    ├── style
    │   └── reset.css
    └── index.html
├── tsconfig.cjs.json
├── test
    ├── tokenize
    │   └── tokenize.ts
    ├── feature
    │   ├── char.ts
    │   └── features.ts
    ├── predict
    │   ├── proba.ts
    │   └── predict.ts
    ├── hash
    │   └── hash.ts
    └── util
    │   └── ngram.ts
├── .prettierrc.json
├── jest.config.js
├── tsconfig.json
├── LICENSE
├── badges
    ├── badge-lines.svg
    ├── badge-branches.svg
    ├── badge-functions.svg
    └── badge-statements.svg
├── package.json
└── README.md


/.prettierignore:
--------------------------------------------------------------------------------
1 | /lib
2 | /coverage
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules/
3 | /lib
4 | /coverage
5 | 


--------------------------------------------------------------------------------
/src/hash/index.ts:
--------------------------------------------------------------------------------
1 | export * from './crc32'
2 | export * from './hash'
3 | 


--------------------------------------------------------------------------------
/src/model/index.ts:
--------------------------------------------------------------------------------
1 | export * from './types'
2 | export * from './model'
3 | 


--------------------------------------------------------------------------------
/docs/image/github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuhsak/wakachigaki/HEAD/docs/image/github.png


--------------------------------------------------------------------------------
/src/predict/index.ts:
--------------------------------------------------------------------------------
1 | export * from './types'
2 | export * from './proba'
3 | export * from './predict'
4 | 


--------------------------------------------------------------------------------
/src/feature/index.ts:
--------------------------------------------------------------------------------
1 | export * from './types'
2 | export * from './regexp'
3 | export * from './char'
4 | export * from './feature'
5 | 


--------------------------------------------------------------------------------
/src/predict/types.ts:
--------------------------------------------------------------------------------
1 | import type { NgramFeature } from '../feature'
2 | 
3 | export type NgramFeatureWithDistance = NgramFeature & { distance: number }
4 | 


--------------------------------------------------------------------------------
/src/hash/hash.ts:
--------------------------------------------------------------------------------
1 | import { crc32 } from './crc32'
2 | 
3 | export const hash = (nBuckets: number) => (text: string) =>
4 |   (crc32(new TextEncoder().encode(text)) % nBuckets).toString(16).toLowerCase()
5 | 


--------------------------------------------------------------------------------
/src/feature/types.ts:
--------------------------------------------------------------------------------
 1 | export type NgramFeature = {
 2 |   char: string
 3 |   features: {
 4 |     kind: 'type' | 'hash'
 5 |     size: number
 6 |     offset: number
 7 |     value: string
 8 |   }[]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tsconfig.cjs.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "./tsconfig.json",
 3 |   "compilerOptions": {
 4 |     "module": "commonjs",
 5 |     "outDir": "./lib/cjs",
 6 |     "tsBuildInfoFile": "./lib/cjs/.tsbuildinfo",
 7 |     "declaration": true,
 8 |     "declarationMap": true,
 9 |     "declarationDir": "./lib/types"
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/src/model/types.ts:
--------------------------------------------------------------------------------
 1 | type Weights = Record<number, Record<number, Record<string, number>>>
 2 | 
 3 | export type Weight = {
 4 |   hash: Weights
 5 |   type: Weights
 6 |   distance: number
 7 |   bias: number
 8 | }
 9 | 
10 | export type Model = {
11 |   version: number
12 |   config: {
13 |     nBuckets: number
14 |     size: number
15 |     offset: number
16 |     scale: number
17 |   }
18 |   weight: Weight
19 | }
20 | 


--------------------------------------------------------------------------------
/test/tokenize/tokenize.ts:
--------------------------------------------------------------------------------
 1 | import { tokenize, segment } from '../../src/tokenize'
 2 | 
 3 | describe('tokenize functions', () => {
 4 |   const tokens = tokenize('This is a Test')
 5 |   test('tokenize', () => {
 6 |     expect(Array.isArray(tokens)).toBe(true)
 7 |     expect(Array.isArray(tokenize('あいうえお'))).toBe(true)
 8 |   })
 9 |   test('segment', () => {
10 |     expect(segment('This is a Test')).toStrictEqual(tokens)
11 |   })
12 | })
13 | 


--------------------------------------------------------------------------------
/src/feature/char.ts:
--------------------------------------------------------------------------------
 1 | import * as R from './regexp'
 2 | 
 3 | const rules = [
 4 |   {
 5 |     fn: R.isKanji,
 6 |     rep: 'C',
 7 |   },
 8 |   { fn: R.isNumeralKanji, rep: 'S' },
 9 |   { fn: R.isHiragana, rep: 'H' },
10 |   { fn: R.isKatakana, rep: 'K' },
11 |   { fn: R.isAlphabet, rep: 'A' },
12 |   { fn: R.isNumeral, rep: 'N' },
13 | ]
14 | 
15 | export const getCharType = (char: string) =>
16 |   rules.reduce((rep, rule) => (rule.fn(char) ? rule.rep : rep), 'O')
17 | 


--------------------------------------------------------------------------------
/test/feature/char.ts:
--------------------------------------------------------------------------------
 1 | import { getCharType } from '../../src/feature'
 2 | 
 3 | describe('char functions', () => {
 4 |   test('getCharType', () => {
 5 |     expect(getCharType('a')).toBe('A')
 6 |     expect(getCharType('0')).toBe('N')
 7 |     expect(getCharType('あ')).toBe('H')
 8 |     expect(getCharType('ア')).toBe('K')
 9 |     expect(getCharType('漢')).toBe('C')
10 |     expect(getCharType('百')).toBe('S')
11 |     expect(getCharType('!')).toBe('O')
12 |   })
13 | })
14 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | export { tokenize, segment } from './tokenize'
 2 | export { predictProba, predict } from './predict'
 3 | export {
 4 |   regexp,
 5 |   isKanji,
 6 |   isNumeralKanji,
 7 |   isHiragana,
 8 |   isKatakana,
 9 |   isNumeral,
10 |   isAlphabet,
11 |   features,
12 | } from './feature'
13 | export { crc32, hash } from './hash'
14 | export { threshold, model } from './model'
15 | export { ngram, sigmoid } from './util'
16 | export type { NgramFeature } from './feature'
17 | 


--------------------------------------------------------------------------------
/docs/image/npm.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <svg version="1.1" id="npm" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 3 | 	 viewBox="0 0 780 250" style="enable-background:new 0 0 780 250;" xml:space="preserve">
 4 | <style type="text/css">
 5 | 	.st0{fill:#C12127;}
 6 | </style>
 7 | <path class="st0" d="M240,250h100v-50h100V0H240V250z M340,50h50v100h-50V50z M480,0v200h100V50h50v150h50V50h50v150h50V0H480z
 8 | 	 M0,200h100V50h50v150h50V0H0V200z"/>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "arrowParens": "always",
 3 |   "bracketSpacing": true,
 4 |   "jsxSingleQuote": true,
 5 |   "printWidth": 80,
 6 |   "quoteProps": "as-needed",
 7 |   "semi": false,
 8 |   "singleQuote": true,
 9 |   "tabWidth": 2,
10 |   "trailingComma": "all",
11 |   "useTabs": false,
12 |   "overrides": [
13 |     {
14 |       "files": ["*.md", "README"],
15 |       "options": { "parser": "markdown-nocjsp" }
16 |     },
17 |     {
18 |       "files": ["*.mdx"],
19 |       "options": { "parser": "mdx-nocjsp" }
20 |     }
21 |   ]
22 | }
23 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   clearMocks: true,
 3 |   roots: ['<rootDir>/test', '<rootDir>/src'],
 4 |   testMatch: [
 5 |     '**/test/**/*.(ts|tsx|js|jsx)',
 6 |     '**/*.(spec|test).(ts|tsx|js|jsx)',
 7 |   ],
 8 |   transform: {
 9 |     '^.+\\.(t|j)sx?$': [
10 |       '@swc/jest',
11 |       {
12 |         sourceMaps: true,
13 |       },
14 |     ],
15 |   },
16 |   collectCoverage: true,
17 |   coverageDirectory: 'coverage',
18 |   coverageProvider: 'babel',
19 |   coverageReporters: ['clover', 'json', 'lcov', 'text', 'json-summary'],
20 |   verbose: true,
21 | }
22 | 


--------------------------------------------------------------------------------
/src/util.ts:
--------------------------------------------------------------------------------
 1 | export const ngram = (chars: string[]) => (index: number) => {
 2 |   const get = (size: number, offset: number): string => {
 3 |     if (size === 1) {
 4 |       return chars[index + offset] || ''
 5 |     }
 6 |     return get(size - 1, offset) + get(1, offset + (size - 1))
 7 |   }
 8 |   return get
 9 | }
10 | 
11 | export const range = (start: number, end: number) => {
12 |   const tmp: number[] = []
13 |   for (let i = start; i < end; i++) {
14 |     tmp.push(i)
15 |   }
16 |   return tmp
17 | }
18 | 
19 | export const sigmoid = (n: number) => 1 / (1 + Math.exp(-1 * n))
20 | 


--------------------------------------------------------------------------------
/test/feature/features.ts:
--------------------------------------------------------------------------------
 1 | import { featurer } from '../../src/feature'
 2 | 
 3 | describe('feature functions', () => {
 4 |   test('features', () => {
 5 |     const text = 'aあ0漢カ百bhjかいオ'
 6 |     const feats = featurer(262144, 3, 3)(text)
 7 |     expect(feats).toHaveLength(text.length)
 8 |     feats.forEach((f) =>
 9 |       f.features.forEach((f) => {
10 |         expect(['type', 'hash'].includes(f.kind)).toEqual(true)
11 |         expect(typeof f.size).toEqual('number')
12 |         expect(typeof f.offset).toEqual('number')
13 |         expect(typeof f.value).toEqual('string')
14 |       }),
15 |     )
16 |   })
17 | })
18 | 


--------------------------------------------------------------------------------
/src/predict/proba.ts:
--------------------------------------------------------------------------------
 1 | import type { NgramFeatureWithDistance } from './types'
 2 | import { Weight } from '../model'
 3 | import { sigmoid } from '../util'
 4 | 
 5 | export const proba = (weight: Weight, scale: number) => {
 6 |   const { bias } = weight
 7 | 
 8 |   return (feature: NgramFeatureWithDistance) => {
 9 |     const features = feature.features.reduce<number>((score, f) => {
10 |       return score + (weight[f.kind][f.size]?.[f.offset]?.[f.value] || 0)
11 |     }, 0)
12 | 
13 |     const distance = feature.distance * weight.distance
14 | 
15 |     return sigmoid((bias + features + distance) / scale)
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/test/predict/proba.ts:
--------------------------------------------------------------------------------
 1 | import { proba } from '../../src/predict/proba'
 2 | 
 3 | describe('probability functions', () => {
 4 |   describe('proba', () => {
 5 |     test('works even if empty weights are given', () => {
 6 |       const p = proba({ type: {}, hash: {}, distance: 0, bias: 0 }, 1)
 7 |       const v = p({
 8 |         char: 'a',
 9 |         features: [
10 |           {
11 |             kind: 'type',
12 |             size: 1,
13 |             offset: 0,
14 |             value: 'A',
15 |           },
16 |         ],
17 |         distance: 0,
18 |       })
19 |       expect(v).toEqual(0.5)
20 |     })
21 |   })
22 | })
23 | 


--------------------------------------------------------------------------------
/test/hash/hash.ts:
--------------------------------------------------------------------------------
 1 | import { crc32, hash } from '../../src/hash'
 2 | 
 3 | describe('hash functions', () => {
 4 |   describe('crc32', () => {
 5 |     test('provides correct value', () => {
 6 |       expect(crc32(new TextEncoder().encode('abcdef'))).toEqual(1267612143)
 7 |       expect(crc32(new TextEncoder().encode('CRC32関数のテスト'))).toEqual(
 8 |         549135931,
 9 |       )
10 |     })
11 |   })
12 | 
13 |   describe('hash', () => {
14 |     test('provides correct hex', () => {
15 |       expect(hash(262144)('abcdef')).toEqual('239ef')
16 |       expect(hash(262144)('CRC32関数のテスト')).toEqual('3263b')
17 |     })
18 |   })
19 | })
20 | 


--------------------------------------------------------------------------------
/docs/style/reset.css:
--------------------------------------------------------------------------------
 1 | *,
 2 | *::before,
 3 | *::after {
 4 |   box-sizing: border-box;
 5 | }
 6 | body,
 7 | h1,
 8 | h2,
 9 | h3,
10 | h4,
11 | p,
12 | figure,
13 | blockquote,
14 | dl,
15 | dd {
16 |   margin: 0;
17 | }
18 | ul[role='list'],
19 | ol[role='list'] {
20 |   list-style: none;
21 | }
22 | html:focus-within {
23 |   scroll-behavior: smooth;
24 | }
25 | body {
26 |   /* min-height: 100vh; */
27 |   text-rendering: optimizeSpeed;
28 |   line-height: 1.5;
29 | }
30 | a:not([class]) {
31 |   text-decoration-skip-ink: auto;
32 | }
33 | img,
34 | picture {
35 |   max-width: 100%;
36 |   display: block;
37 | }
38 | input,
39 | button,
40 | textarea,
41 | select {
42 |   font: inherit;
43 | }
44 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "include": ["./src"],
 3 |   "compilerOptions": {
 4 |     "module": "esnext",
 5 |     "moduleResolution": "node",
 6 |     "target": "es6",
 7 |     "outDir": "./lib/esm",
 8 |     "tsBuildInfoFile": "./lib/esm/.tsbuildinfo",
 9 |     "rootDir": "./src",
10 |     "sourceMap": true,
11 |     "incremental": true,
12 |     "isolatedModules": true,
13 |     "strict": true,
14 |     "noImplicitReturns": true,
15 |     "noFallthroughCasesInSwitch": true,
16 |     "noUncheckedIndexedAccess": true,
17 |     "noImplicitOverride": true,
18 |     "downlevelIteration": true,
19 |     "esModuleInterop": true,
20 |     "skipLibCheck": true,
21 |     "forceConsistentCasingInFileNames": true
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/test/predict/predict.ts:
--------------------------------------------------------------------------------
 1 | import { predictor, probaPredictor } from '../../src/predict/predict'
 2 | 
 3 | const weight = { type: {}, hash: {}, distance: 0, bias: 0 }
 4 | 
 5 | const predictProba = probaPredictor(weight, 1)
 6 | 
 7 | const predict = predictor(weight, 1)
 8 | 
 9 | const features = [{ char: 'a', features: [] }]
10 | 
11 | describe('prediction functions', () => {
12 |   describe('predictProba', () => {
13 |     test('works', () => {
14 |       const probas = predictProba(features)
15 |       expect(probas[0]).toEqual(0.5)
16 |     })
17 |   })
18 | 
19 |   describe('predict', () => {
20 |     test('works with threshold', () => {
21 |       expect(predict(features, 0.6)[0]).toEqual(false)
22 |       expect(predict(features, 0.4)[0]).toEqual(true)
23 |     })
24 |   })
25 | })
26 | 


--------------------------------------------------------------------------------
/src/feature/regexp.ts:
--------------------------------------------------------------------------------
 1 | export const regexp = {
 2 |   Kanji:
 3 |     /^[々〇〻\u2E80-\u2FDF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]|[\uD840-\uD87F][\uDC00-\uDFFF]+$/,
 4 |   NumeralKanji: /^[一二三四五六七八九十百千万億兆]+$/,
 5 |   Hiragana: /^[ぁ-ん]+$/,
 6 |   Katakana: /^[ァ-ヴーｧ-ﾝﾞﾟ]+$/,
 7 |   Alphabet: /^[a-zA-Zａ-ｚＡ-Ｚ]+$/,
 8 |   Numeral: /^[0-9０-９]+$/,
 9 | }
10 | 
11 | export const isKanji = (text: string) => regexp.Kanji.test(text)
12 | export const isNumeralKanji = (text: string) => regexp.NumeralKanji.test(text)
13 | export const isHiragana = (text: string) => regexp.Hiragana.test(text)
14 | export const isKatakana = (text: string) => regexp.Katakana.test(text)
15 | export const isAlphabet = (text: string) => regexp.Alphabet.test(text)
16 | export const isNumeral = (text: string) => regexp.Numeral.test(text)
17 | 


--------------------------------------------------------------------------------
/src/tokenize.ts:
--------------------------------------------------------------------------------
 1 | import { featurer } from './feature'
 2 | import { predict } from './predict'
 3 | import { model } from './model'
 4 | 
 5 | const { nBuckets, size, offset } = model.config
 6 | 
 7 | export const tokenizer = (nBuckets: number, size: number, offset: number) => {
 8 |   const f = featurer(nBuckets, size, offset)
 9 | 
10 |   return (text: string) => {
11 |     const chars = f(text)
12 |     return predict(chars)
13 |       .reduce(
14 |         (acc, willBreak, i) => {
15 |           acc[acc.length - 1] += chars[i]!.char
16 |           if (willBreak) acc.push('')
17 |           return acc
18 |         },
19 |         [''],
20 |       )
21 |       .filter((c) => !!c)
22 |   }
23 | }
24 | 
25 | export const tokenize = tokenizer(nBuckets, size, offset)
26 | 
27 | export { tokenize as segment }
28 | 


--------------------------------------------------------------------------------
/test/util/ngram.ts:
--------------------------------------------------------------------------------
 1 | import { ngram, range } from '../../src/util'
 2 | 
 3 | describe('utility Functions', () => {
 4 |   describe('ngram', () => {
 5 |     const chars = [...'abcdefghijklmn']
 6 |     const last = chars.length - 1
 7 |     test('works', () => {
 8 |       expect(ngram(chars)(4)(3, -2)).toBe('cde')
 9 |       expect(ngram(chars)(2)(3, 2)).toBe('efg')
10 |     })
11 |     test('works even if given index is out of range', () => {
12 |       expect(ngram(chars)(-2)(5, -2)).toBe('a')
13 |       expect(ngram(chars)(last + 1)(1, -1)).toBe('n')
14 |     })
15 |     test('works even if given position is out of range', () => {
16 |       expect(ngram(chars)(0)(4, -2)).toBe('ab')
17 |       expect(ngram(chars)(last)(1, 2)).toBe('')
18 |     })
19 |     test('works even if given size is out of range', () => {
20 |       expect(ngram(chars)(0)(999, -2)).toBe(chars.join(''))
21 |     })
22 |   })
23 | 
24 |   test('range', () => {
25 |     expect(range(0, 3)).toStrictEqual([0, 1, 2])
26 |     expect(range(-2, 1)).toStrictEqual([-2, -1, 0])
27 |   })
28 | })
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Yuhsak Inoue
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/badges/badge-lines.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="136" height="20" role="img" aria-label="Coverage:lines: 100%"><title>Coverage:lines: 100%</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="136" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="93" height="20" fill="#555"/><rect x="93" width="43" height="20" fill="#4c1"/><rect width="136" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="475" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="830">Coverage:lines</text><text x="475" y="140" transform="scale(.1)" fill="#fff" textLength="830">Coverage:lines</text><text aria-hidden="true" x="1135" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">100%</text><text x="1135" y="140" transform="scale(.1)" fill="#fff" textLength="330">100%</text></g></svg>


--------------------------------------------------------------------------------
/badges/badge-branches.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="160" height="20" role="img" aria-label="Coverage:branches: 100%"><title>Coverage:branches: 100%</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="160" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="117" height="20" fill="#555"/><rect x="117" width="43" height="20" fill="#4c1"/><rect width="160" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="595" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1070">Coverage:branches</text><text x="595" y="140" transform="scale(.1)" fill="#fff" textLength="1070">Coverage:branches</text><text aria-hidden="true" x="1375" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">100%</text><text x="1375" y="140" transform="scale(.1)" fill="#fff" textLength="330">100%</text></g></svg>


--------------------------------------------------------------------------------
/badges/badge-functions.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="160" height="20" role="img" aria-label="Coverage:functions: 100%"><title>Coverage:functions: 100%</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="160" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="117" height="20" fill="#555"/><rect x="117" width="43" height="20" fill="#4c1"/><rect width="160" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="595" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1070">Coverage:functions</text><text x="595" y="140" transform="scale(.1)" fill="#fff" textLength="1070">Coverage:functions</text><text aria-hidden="true" x="1375" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">100%</text><text x="1375" y="140" transform="scale(.1)" fill="#fff" textLength="330">100%</text></g></svg>


--------------------------------------------------------------------------------
/badges/badge-statements.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="172" height="20" role="img" aria-label="Coverage:statements: 100%"><title>Coverage:statements: 100%</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="172" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="129" height="20" fill="#555"/><rect x="129" width="43" height="20" fill="#4c1"/><rect width="172" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="655" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1190">Coverage:statements</text><text x="655" y="140" transform="scale(.1)" fill="#fff" textLength="1190">Coverage:statements</text><text aria-hidden="true" x="1495" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">100%</text><text x="1495" y="140" transform="scale(.1)" fill="#fff" textLength="330">100%</text></g></svg>


--------------------------------------------------------------------------------
/src/predict/predict.ts:
--------------------------------------------------------------------------------
 1 | import type { NgramFeature } from '../feature'
 2 | import { threshold as t, model, Weight } from '../model'
 3 | import { proba } from './proba'
 4 | 
 5 | const {
 6 |   weight,
 7 |   config: { scale },
 8 | } = model
 9 | 
10 | export const probaPredictor = (weight: Weight, scale: number) => {
11 |   const p = proba(weight, scale)
12 | 
13 |   return (features: NgramFeature[], threshold = 0.5) => {
14 |     return features.reduce<{ value: number[]; distance: number }>(
15 |       (acc, feature) => {
16 |         const _p = p({ ...feature, distance: acc.distance })
17 |         const willBreak = _p > threshold
18 |         const distance = willBreak ? 0 : acc.distance + 1
19 |         return { value: [...acc.value, _p], distance }
20 |       },
21 |       { value: [], distance: 0 },
22 |     ).value
23 |   }
24 | }
25 | 
26 | export const predictProba = probaPredictor(weight, scale)
27 | 
28 | export const predictor = (weight: Weight, scale: number) => {
29 |   const predictProba = probaPredictor(weight, scale)
30 | 
31 |   return (features: NgramFeature[], threshold = t) =>
32 |     predictProba(features, threshold).map((p) => p > threshold)
33 | }
34 | 
35 | export const predict = predictor(weight, scale)
36 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "wakachigaki",
 3 |   "version": "1.3.2",
 4 |   "description": "Minimal japanese sentence tokenizer written in 100% pure TypeScript.",
 5 |   "author": {
 6 |     "name": "Yuhsak Inoue",
 7 |     "email": "yuhsak.inoue@gmail.com",
 8 |     "url": "https://github.com/yuhsak"
 9 |   },
10 |   "license": "MIT",
11 |   "repository": {
12 |     "type": "git",
13 |     "url": "https://github.com/yuhsak/wakachigaki.git"
14 |   },
15 |   "main": "lib/cjs/index.js",
16 |   "types": "lib/types/index.d.ts",
17 |   "module": "lib/esm/index.js",
18 |   "files": [
19 |     "lib",
20 |     "src",
21 |     "!lib/**/*.tsbuildinfo",
22 |     "tsconfig.json",
23 |     "tsconfig.cjs.json"
24 |   ],
25 |   "scripts": {
26 |     "clean:dist": "rimraf lib",
27 |     "clean:test": "rimraf coverage",
28 |     "clean": "run-p clean:*",
29 |     "check:tsc": "tsc --noEmit",
30 |     "check:perttier": "prettier --check .",
31 |     "check": "run-s check:*",
32 |     "test": "jest",
33 |     "build:watch": "npm run build -- --watch",
34 |     "build": "tsc -b tsconfig.json tsconfig.cjs.json",
35 |     "prebuild": "run-p clean:dist",
36 |     "start": "npm run watch",
37 |     "prepublishOnly": "npm run build"
38 |   },
39 |   "devDependencies": {
40 |     "@swc/core": "^1.2.107",
41 |     "@swc/jest": "^0.2.5",
42 |     "@types/jest": "^27.0.2",
43 |     "jest": "27.3.1",
44 |     "jest-coverage-badges": "^1.1.2",
45 |     "npm-run-all": "^4.1.5",
46 |     "prettier": "^2.4.1",
47 |     "prettier-plugin-md-nocjsp": "^1.2.0",
48 |     "rimraf": "^3.0.2",
49 |     "typescript": "^4.4.4"
50 |   },
51 |   "sideEffects": false,
52 |   "unpkg": "lib/esm/index.js"
53 | }
54 | 


--------------------------------------------------------------------------------
/src/feature/feature.ts:
--------------------------------------------------------------------------------
 1 | import type { NgramFeature } from './types'
 2 | import { ngram, range } from '../util'
 3 | import { hash } from '../hash'
 4 | import { getCharType } from './char'
 5 | import { model } from '../model'
 6 | 
 7 | const {
 8 |   config: { nBuckets, size, offset },
 9 | } = model
10 | 
11 | const markers = [
12 |   'B',
13 |   'D',
14 |   'E',
15 |   'F',
16 |   'G',
17 |   'I',
18 |   'J',
19 |   'L',
20 |   'M',
21 |   'P',
22 |   'Q',
23 |   'R',
24 |   'T',
25 |   'U',
26 |   'V',
27 |   'W',
28 |   'X',
29 |   'Y',
30 |   'Z',
31 | ]
32 | 
33 | export const featurer = (nBuckets: number, size: number, offset: number) => {
34 |   const prefix = markers.slice(0, offset)
35 |   const suffix = markers.slice().reverse().slice(0, offset)
36 |   const h = hash(nBuckets)
37 | 
38 |   return (text: string) => {
39 |     const source = text.normalize()
40 |     const chars = [...source]
41 | 
42 |     const ngramByChars = ngram([...prefix, ...source.toLowerCase(), ...suffix])
43 |     const ngramByTypes = ngram([
44 |       ...prefix,
45 |       ...chars.map(getCharType),
46 |       ...suffix,
47 |     ])
48 | 
49 |     return chars.map((char, i): NgramFeature => {
50 |       const index = i + offset
51 |       const ngramByCharsAt = ngramByChars(index)
52 |       const ngramByTypesAt = ngramByTypes(index)
53 | 
54 |       return range(1, size + 1).reduce<NgramFeature>(
55 |         (acc, s) => {
56 |           return range(-1 * offset, offset + 1 + 1 - s).reduce<NgramFeature>(
57 |             (acc, o) => {
58 |               const _t = ngramByTypesAt(s, o)
59 |               const _h = h(ngramByCharsAt(s, o))
60 |               return {
61 |                 ...acc,
62 |                 features: [
63 |                   ...acc.features,
64 |                   { kind: 'type', size: s, offset: o, value: _t },
65 |                   { kind: 'hash', size: s, offset: o, value: _h },
66 |                 ],
67 |               }
68 |             },
69 |             acc,
70 |           )
71 |         },
72 |         { char, features: [] },
73 |       )
74 |     })
75 |   }
76 | }
77 | 
78 | export const features = featurer(nBuckets, size, offset)
79 | 


--------------------------------------------------------------------------------
/docs/image/pypi.svg:
--------------------------------------------------------------------------------
1 | <svg clip-rule="evenodd" fill-rule="evenodd" height="2227" stroke-linecap="square" stroke-linejoin="round" stroke-miterlimit="1.5" viewBox=".006 0 673.409 600" width="2500" xmlns="http://www.w3.org/2000/svg"><path d="m.006 0h600v600h-600z" fill="#fff"/><g transform="translate(72.59 -63.209)"><path d="m673.41 236.016-64.271 8.252v281.357h64.276z" fill="#ffca1e" stroke="#d7c5b2" stroke-width="1.11" transform="matrix(1.02365 -.37275 .22115 .08093 -423.086 457.448)"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#ffd241" stroke="#d7c5b2" stroke-width=".35" transform="matrix(3.93359 -1.43837 0 .53084 -2266.43 1087.94)"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#2f6490" stroke="#d1e3f2" stroke-width=".62" transform="matrix(1.9604 -.71685 .2216 .07927 -1118.55 639.339)"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#3775a8" stroke="#d1e3f2" stroke-width="1.15" transform="matrix(1.02281 -.374 0 1.05695 -430.773 214.178)"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#2f6490" stroke="#fff" stroke-width="1.17" transform="matrix(-.97499 -.34924 0 1.05695 786.194 199.208)"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#efeeea" stroke="#d8d8d8" stroke-width="1.44" transform="matrix(-.97499 -.35652 0 .26885 786.194 618.355)"/><g stroke="#d1e3f2"><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#2f6490" stroke-width="1.44" transform="matrix(-.96834 -.35409 0 .53077 719.483 427.5)"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#3775a8" stroke-width="1.2" transform="matrix(.93554 -.3421 0 1.05695 -311.892 170.492)"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#3775a8" stroke-width="1.43" transform="matrix(.9742 -.35623 0 .53084 -463.744 428.761)"/><path d="m67.575 393.161 62.121 22.465 188.708-68.299m-125.165-29.141 124.732-45.602" fill="none"/></g><path d="m318.404 347.327 63.939-23.209" fill="none" stroke="#d7c5b2"/><path d="m609.139 244.268h64.276v281.357h-64.276z" fill="#2f6490" stroke="#d1e3f2" stroke-width="1.16" transform="matrix(.96788 -.35244 .22115 .08093 -576.168 513.583)"/><circle cx="637.517" cy="260.001" fill="#fff" r="15.71" transform="matrix(.7826 -.4024 .05494 .8614 -295.363 304.934)"/><path d="m195.786 198.125 61.696 22.126" fill="none" stroke="#d1e3f2"/><path d="m673.415 244.268h-64.276l.018 282.405 64.258-1.048z" fill="#ffd241" stroke="#d7c5b2" stroke-width="1.37" transform="matrix(1.02281 -.374 0 .52843 -430.773 491.983)"/><path d="m673.415 244.268h-64.276l.001 281.758 64.275-.401z" fill="#ffd241" stroke="#d7c5b2" stroke-width="1.48" transform="matrix(.93554 -.3421 0 .52743 -311.892 448.822)"/><circle cx="637.517" cy="260.001" fill="#fefdfd" r="15.71" transform="matrix(.77074 -.3963 .05156 .80832 -205.916 509.411)"/><path d="m192.412 468.059 126.028-45.977" fill="none" stroke="#d7c5b2"/></g></svg>


--------------------------------------------------------------------------------
/src/hash/crc32.ts:
--------------------------------------------------------------------------------
 1 | const xor = (a: number, b: number) => (a ^ b) >>> 0
 2 | 
 3 | const and = (a: number, b: number) => (a & b) >>> 0
 4 | 
 5 | const CRC32_TABLE = [
 6 |   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
 7 |   0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
 8 |   0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
 9 |   0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
10 |   0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
11 |   0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
12 |   0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
13 |   0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
14 |   0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
15 |   0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
16 |   0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
17 |   0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
18 |   0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
19 |   0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
20 |   0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
21 |   0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
22 |   0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
23 |   0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
24 |   0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
25 |   0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
26 |   0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
27 |   0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
28 |   0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
29 |   0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
30 |   0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
31 |   0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
32 |   0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
33 |   0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
34 |   0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
35 |   0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
36 |   0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
37 |   0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
38 |   0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
39 |   0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
40 |   0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
41 |   0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
42 |   0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
43 |   0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
44 |   0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
45 |   0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
46 |   0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
47 |   0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
48 |   0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
49 | ]
50 | 
51 | export const crc32 = (buffer: ArrayBuffer | Uint8Array) => {
52 |   let crc = 0xffffffff
53 |   const view = new Uint8Array(buffer)
54 |   for (let i = 0; i < view.byteLength; i++) {
55 |     crc = xor(crc >>> 8, CRC32_TABLE[and(xor(crc, view[i]!), 0xff)]!)
56 |   }
57 |   return xor(crc, 0xffffffff)
58 | }
59 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="ja">
  3 |   <head>
  4 |     <meta charset="utf-8" />
  5 |     <meta
  6 |       name="viewport"
  7 |       content="width=device-width, initial-scale=1, maximum-scale=1"
  8 |     />
  9 |     <title>wakachigaki | わずか6.2Kbの日本語分かち書きライブラリ</title>
 10 |     <link rel="stylesheet" href="./style/reset.css" />
 11 |     <script type="module">
 12 |       import { tokenize } from 'https://unpkg.com/wakachigaki@1.3.2'
 13 | 
 14 |       window.addEventListener('DOMContentLoaded', () => {
 15 |         const source = document.querySelector('input.source')
 16 |         const dist = document.querySelector('textarea.dist')
 17 |         source.value = '非常に効果的な機械学習モデル'
 18 | 
 19 |         const update = (e) => {
 20 |           const { value } = e.target
 21 |           const tokens = tokenize(value)
 22 |           dist.value = tokens.join(' | ')
 23 |         }
 24 | 
 25 |         update({ target: source })
 26 | 
 27 |         source.addEventListener('keyup', update)
 28 |         source.addEventListener('change', update)
 29 |         source.addEventListener('blur', update)
 30 |       })
 31 |     </script>
 32 |     <style>
 33 |       h1 {
 34 |         font-size: 6rem;
 35 |         font-weight: bold;
 36 |       }
 37 | 
 38 |       p,
 39 |       span {
 40 |         font-size: 1.5rem;
 41 |       }
 42 | 
 43 |       input,
 44 |       textarea {
 45 |         font-size: 1rem;
 46 |         line-height: 2em;
 47 |       }
 48 | 
 49 |       @media all and (min-width: 1024px) and (max-width: 1280px) {
 50 |         h1 {
 51 |           font-size: 5rem;
 52 |         }
 53 | 
 54 |         p,
 55 |         span {
 56 |           font-size: 1.25rem;
 57 |         }
 58 |       }
 59 | 
 60 |       @media all and (min-width: 768px) and (max-width: 1024px) {
 61 |         h1 {
 62 |           font-size: 4rem;
 63 |         }
 64 | 
 65 |         p,
 66 |         span {
 67 |           font-size: 1rem;
 68 |         }
 69 |       }
 70 | 
 71 |       @media all and (min-width: 480px) and (max-width: 768px) {
 72 |         h1 {
 73 |           font-size: 3rem;
 74 |         }
 75 | 
 76 |         p,
 77 |         span,
 78 |         input,
 79 |         textarea {
 80 |           font-size: 0.9rem;
 81 |         }
 82 |       }
 83 | 
 84 |       @media all and (max-width: 480px) {
 85 |         h1 {
 86 |           font-size: 2rem;
 87 |         }
 88 | 
 89 |         p,
 90 |         span,
 91 |         input,
 92 |         textarea {
 93 |           font-size: 0.8rem;
 94 |         }
 95 |       }
 96 | 
 97 |       main {
 98 |         display: flex;
 99 |         flex-direction: column;
100 |         align-items: center;
101 |         padding: 2.5rem 1.5rem 2.5rem 1.5rem;
102 |       }
103 | 
104 |       main > * {
105 |         margin-bottom: 2.5rem;
106 |       }
107 | 
108 |       .title {
109 |         background-clip: text;
110 |         -webkit-background-clip: text;
111 |         -webkit-text-fill-color: transparent;
112 |         background-image: linear-gradient(135deg, #7928ca, #ff0080);
113 |       }
114 | 
115 |       .icons {
116 |         display: flex;
117 |         align-items: center;
118 |         margin-top: 0.5rem;
119 |       }
120 | 
121 |       .icons > * {
122 |         margin: 0 0.5rem 0 0.5rem;
123 |       }
124 | 
125 |       a.card {
126 |         border-radius: 2px;
127 |         padding: 1rem;
128 |         transition: all 0.5s;
129 |       }
130 | 
131 |       a.card:hover {
132 |         box-shadow: rgba(0, 0, 0, 0.1) 0px 10px 15px -3px,
133 |           rgba(0, 0, 0, 0.05) 0px 4px 6px -2px;
134 |       }
135 | 
136 |       input.source {
137 |         width: 100%;
138 |         max-width: 50rem;
139 |         padding: 0.5rem;
140 |         margin-bottom: 1.5rem;
141 |       }
142 | 
143 |       textarea {
144 |         width: 100%;
145 |         max-width: 50rem;
146 |         padding: 0.5rem;
147 |         min-height: 10rem;
148 |       }
149 | 
150 |       textarea:disabled {
151 |         color: inherit;
152 |         -webkit-text-fill-color: #000;
153 |         opacity: 0.75;
154 |         background-color: #fafafa;
155 |       }
156 | 
157 |       main > .no-margin {
158 |         margin-bottom: 0;
159 |       }
160 | 
161 |       .stretch {
162 |         align-self: stretch;
163 |       }
164 | 
165 |       .flex {
166 |         display: flex;
167 |         flex-direction: column;
168 |       }
169 | 
170 |       .center {
171 |         align-items: center;
172 |       }
173 |     </style>
174 |   </head>
175 |   <body>
176 |     <main>
177 |       <h1 class="title">
178 |         <a href="https://yuhsak.github.io/wakachigaki/">wakachigaki</a>
179 |       </h1>
180 |       <div class="flex center">
181 |         <p>わずか6.2Kbの日本語分かち書きライブラリ</p>
182 |         <div class="icons">
183 |           <a
184 |             class="card"
185 |             href="https://pypi.org/project/wakachigaki/1.3.2"
186 |             target="_blank"
187 |             ><img src="./image/pypi.svg" width="48"
188 |           /></a>
189 |           <a
190 |             class="card"
191 |             href="https://www.npmjs.com/package/wakachigaki"
192 |             target="_blank"
193 |             ><img src="./image/npm.svg" width="48"
194 |           /></a>
195 |           <a
196 |             class="card"
197 |             href="https://github.com/yuhsak/wakachigaki"
198 |             target="_blank"
199 |             ><img src="./image/github.png" width="24"
200 |           /></a>
201 |         </div>
202 |       </div>
203 |       <div class="stretch flex center no-margin">
204 |         <input
205 |           class="source"
206 |           type="text"
207 |           placeholder="分かち書きしたい文を入力"
208 |         />
209 |         <textarea class="dist" disabled></textarea>
210 |       </div>
211 |     </main>
212 |   </body>
213 | </html>
214 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # wakachigaki
  2 | 
  3 | ![](./badges/badge-statements.svg)
  4 | ![](./badges/badge-branches.svg)
  5 | ![](./badges/badge-functions.svg)
  6 | ![](./badges/badge-lines.svg)
  7 | 
  8 | 6.2Kbの軽量日本語分かち書きライブラリ
  9 | 
 10 | [動作確認用のデモサイト](https://yuhsak.github.io/wakachigaki/)
 11 | 
 12 | Python版は[こちら](https://github.com/yuhsak/wakachigaki-py)
 13 | 
 14 | ## 紹介
 15 | 
 16 | `wakachigaki` は辞書を使わない軽量の日本語分かち書き用ライブラリです。
 17 | 
 18 | ピュアなJavaScriptなのでNode.jsやDeno, ブラウザなど環境を問わず動作し、TypeScriptやES Module[^1]にも対応しています。
 19 | 
 20 | 予め分かち書きされた大量の日本語テキストから作成した機械学習モデルを内包することで辞書不要の分かち書きを実現しています。
 21 | 
 22 | 学習には[Wikipedia日本語版のダンプデータ](https://dumps.wikimedia.org/jawiki/)全量を用いました。[MeCab](https://taku910.github.io/mecab/) + [mecab-ipadic-NEologd](https://github.com/neologd/mecab-ipadic-neologd) で得られる分かち書き結果を約90%の精度で再現することが出来ています。
 23 | 
 24 | 単語境界の判定には文中に出現する文字の種類や並び順の情報のみを用いるようになっており、文字や単語単位で固有の情報を一切利用していないため未知語に非常に強いのが特徴です。
 25 | 
 26 | 辞書を用いる [kuromoji.js](https://github.com/takuyaa/kuromoji.js/) などと異なり品詞の推定機能はありませんが、その分インストールも実行も軽量で環境を問わず動作します。
 27 | 
 28 | [^1]: ブラウザ用にES Module形式のコードを配布していますが、パッケージ自体が厳密にNode.jsのNative ESMに対応しているわけではありません。Node.jsでは従来通りCommonJS形式のパッケージとして読み込まれます。TypeScriptが正式にNative ESMに対応した段階でDual Package化する予定です。
 29 | 
 30 | ## 使い方
 31 | 
 32 | ### インストール
 33 | 
 34 | ```sh
 35 | npm install wakachigaki
 36 | ```
 37 | 
 38 | ### 分かち書きの実行
 39 | 
 40 | ```ts
 41 | import { tokenize } from 'wakachigaki'
 42 | 
 43 | /** CommonJSの場合 */
 44 | // const { tokenize } = require('wakachigaki')
 45 | 
 46 | tokenize('非常に効果的な機械学習モデル')
 47 | // => [ '非常', 'に', '効果', '的', 'な', '機械学習', 'モデル' ]
 48 | ```
 49 | 
 50 | ### 境界確率の取得
 51 | 
 52 | 分かち書きされた結果を直接取得するだけでなく、テキスト内の各文字についてその文字の直後に単語境界がある確率を0~1の範囲の数値で取得することが出来ます。
 53 | 
 54 | ```ts
 55 | import { features, predictProba } from 'wakachigaki'
 56 | 
 57 | // 特徴量を取得
 58 | const feats = features('非常に効果的な機械学習モデル')
 59 | 
 60 | predictProba(feats)
 61 | /*
 62 | => [
 63 |   0.0327992634970502,     // 非
 64 |   0.9901071412275622,     // 常
 65 |   0.9742190417471894,     // に
 66 |   0.04298367736033199,    // 効
 67 |   0.7249201317135311,     // 果
 68 |   0.9920294555733393,     // 的
 69 |   0.904908994982585,      // な
 70 |   0.10174356598870479,    // 機
 71 |   0.3827245071932094,     // 械
 72 |   0.11608892206899486,    // 学
 73 |   0.6410674063348171,     // 習
 74 |   0.0045548383234342614,  // モ
 75 |   0.00006214363582036111, // デ
 76 |   0.9720230891240956      // ル
 77 | ]
 78 | */
 79 | ```
 80 | 
 81 | `tokenize()` 関数は内部でこの数値が予め定義された閾値を超えているかどうかを判定の基準にしていて、閾値もまた `threshold` という変数でexportされています。
 82 | 
 83 | ```ts
 84 | import { features, predictProba, predict, threshold } from 'wakachigaki'
 85 | 
 86 | const feats = features('非常に効果的な機械学習モデル')
 87 | 
 88 | const probas = predictProba(feats)
 89 | 
 90 | // probas.map(p => p >= threshold) と同じ結果
 91 | predict(feats)
 92 | /*
 93 | => [
 94 |   false, // 非
 95 |   true,  // 常
 96 |   true,  // に
 97 |   false, // 効
 98 |   true,  // 果
 99 |   true,  // 的
100 |   true,  // な
101 |   false, // 機
102 |   false, // 械
103 |   false, // 学
104 |   true,  // 習
105 |   false, // モ
106 |   false, // デ
107 |   true   // ル
108 | ]
109 | */
110 | ```
111 | 
112 | ### ユーティリティ
113 | 
114 | 内部で文字種の判定に利用している正規表現と判定用の関数も利用可能です。
115 | 
116 | 正規表現の内容は [src/feature/regexp.ts](./src/feature/regexp.ts) を参照して下さい。
117 | 
118 | ```ts
119 | import {
120 |   regexp,
121 |   isHiragana,
122 |   isKatakana,
123 |   isKanji,
124 |   isNumeralKanji,
125 |   isAlphabet,
126 |   isNumeral,
127 | } from 'wakachigaki'
128 | 
129 | /**
130 |  * 与えられた文字列が文字種判定用の正規表現を満たすかどうかチェックする関数。入力は複数文字でもOK
131 |  * 以下は全てtrueになる例
132 |  **/
133 | 
134 | // ひらがな
135 | isHiragana('あ')
136 | 
137 | // カタカナ
138 | isKatakana('カ')
139 | 
140 | // 漢字
141 | isKanji('漢字')
142 | 
143 | // 漢数字
144 | isNumeralKanji('一二三四五六七八九十百千万億兆')
145 | 
146 | // アルファベット (半角, 全角を無視)
147 | isAlphabet('aａ')
148 | 
149 | // 数字 (半角, 全角を無視)
150 | isNumeral('9９')
151 | ```
152 | 
153 | ### ブラウザ対応
154 | 
155 | `wakachigaki` 自体では特にブラウザ用にビルドしたコードを配布していませんが、元々他パッケージへの依存がなくES Module形式に対応しているため [unpkg.com](https://unpkg.com) などのCDNを経由すればすぐに動作させることが出来ます。
156 | 
157 | 下記のコードをhtmlに貼り付ければ多くのブラウザでそのまま動くはずです。
158 | 
159 | ```html
160 | <script type="module">
161 |   import { tokenize } from 'https://unpkg.com/wakachigaki@1.3.2'
162 |   console.log(tokenize('ブラウザで分かち書きのテスト'))
163 |   // => [ 'ブラウザ', 'で', '分かち', '書き', 'の', 'テスト']
164 | </script>
165 | ```
166 | 
167 | ### Deno対応
168 | 
169 | `wakachigaki` はNode.js固有のAPIを使用していないためDeno環境でも動作します。
170 | 
171 | ブラウザ同様にCDNとしてunpkgを利用することも出来ますが、TypeScriptの型定義の配布に対応した [Skypack](https://www.skypack.dev) を利用するのがおすすめです。
172 | 
173 | ```ts
174 | import { tokenize } from 'https://cdn.skypack.dev/wakachigaki@1.3.2?dts'
175 | 
176 | console.log(tokenize('Denoで分かち書きのテスト'))
177 | // => ['Deno', 'で', '分かち', '書き', 'の', 'テスト']
178 | ```
179 | 
180 | ## 精度の比較
181 | 
182 | 下記の表はJS環境で利用可能な類似のライブラリと併せて精度の比較を行ったものです。
183 | 
184 | 比較用のコーパスにはNHN Japan株式会社提供の[livedoor ニュースコーパス](https://www.rondhuit.com/download.html#ldcc)を利用しました。
185 | 
186 | 各記事を行単位に分解し適宜URLのみのものを取り除くなど前処理をして得られた約10万の文章について、[MeCab](https://taku910.github.io/mecab/) + [mecab-ipadic-NEologd](https://github.com/neologd/mecab-ipadic-neologd)で行った分かち書き結果を正解として各数値を算出しました。
187 | 
188 | 従来の手法と比べても遜色ない結果が得られていることがわかります。
189 | 
190 | ただし`wakachigaki`がそもそも学習に`mecab-ipadic-NEologd`を用いているために新語や複合語に強くなっており、同列の条件での比較にはなっていない点に注意して下さい。あくまで参考程度の結果です。
191 | 
192 | 特に高く見える適合率については、他ライブラリと比べて`判定した単語境界の数`が少なく`実際の単語境界の数`に近い数字になっていることからも`mecab-ipadic-NEologd`から学習した複合語の判定傾向の影響が強く出ていると考えられます。
193 | 
194 | | ライブラリ                   | 単語境界の数 |  判定数 |  正答数 | 一致率 | 適合率 | 再現率 | F2スコア |
195 | | :--------------------------- | -----------: | ------: | ------: | -----: | -----: | -----: | -------: |
196 | | wakachigaki                  |      4611683 | 4589286 | 4234587 |  0.919 |  0.923 |  0.918 |    0.920 |
197 | | TinySegmenter                |      4611683 | 5055596 | 4170853 |  0.824 |  0.825 |  0.904 |    0.863 |
198 | | kuromoji.js (デフォルト辞書) |      4611683 | 5015672 | 4312946 |  0.872 |  0.860 |  0.935 |    0.896 |
199 | 
200 | **項目の意味**
201 | 
202 | | 項目名       | 説明                                                  |
203 | | :----------- | :---------------------------------------------------- |
204 | | 単語境界の数 | コーパス全体に対してMeCabが出力した単語境界の総数     |
205 | | 判定数       | ライブラリが出力した単語境界の総数                    |
206 | | 正答数       | ライブラリが出力した単語境界のうち正解だったものの数  |
207 | | 一致率       | 非単語境界と判定したものも含む全体の一致率 (Accuracy) |
208 | | 適合率       | 正答数 ÷ 判定数 (Precision)                           |
209 | | 再現率       | 正答数 ÷ 単語境界の数 (Recall)                        |
210 | | F2スコア     | 適合率と再現率の調和平均                              |
211 | 
212 | ## 開発の動機
213 | 
214 | JSでアプリケーションを開発していると検索やレコメンド機能の実装などで日本語の分かち書きを行いたい時があります。
215 | 
216 | そんな時に `npm install` したらサクっと使えてブラウザやDenoでも動いてESM, TypeScriptにも対応しているものがあれば嬉しいと思ったのが直接の動機です。
217 | 
218 | 特に最近はサーバとブラウザどちらでも動く処理を書く機会が多かったりサーバレス系のインフラの出番が増えて取り回しの良い軽量なライブラリが求められます。
219 | 
220 | そこで `wakachigaki` は **動作に環境依存がないこと**、**バンドルサイズがごく軽量なこと**、**ES Module, TypeScriptに対応していること**を目指して開発されました。
221 | 
222 | ## モデルの学習方法
223 | 
224 | 機械学習モデルを利用した分かち書きの学習方法を検討している際 [TinySegmenter](http://chasen.org/~taku/software/TinySegmenter/) がほぼ同様の構成になっているのを発見し大いに参考にさせて頂きました。
225 | 
226 | データの加工と学習にはPythonを使い、[Wikipedia日本語版のダンプデータ](https://dumps.wikimedia.org/jawiki/)を全量使ってモデルの学習を行っています。
227 | 
228 | 教師データの分かち書きには [MeCab](https://taku910.github.io/mecab/) + [mecab-ipadic-NEologd](https://github.com/neologd/mecab-ipadic-neologd) を利用しました。
229 | 
230 | コーパス内の各文字に対して複数のパラメータでNgramを取得し、漢字、ひらがな、カタカナなど文字の種類と文字のハッシュ値から算出した文字グループを特徴量として抽出、その文字の直後が単語境界かどうかの二値分類を学習しています。
231 | 
232 | Tiny Segmenterでも解説されていますが、クライアントに配布することも考えると学習結果として取り出すモデルのパラメータは出来る限り軽量にしたかったのでモデルはL1ノルム正則化ロジスティック回帰を採用し、確率的勾配降下法によるミニバッチ学習を行いました。
233 | 
234 | 結果、モデルを表現するパラメータのJSONファイルはgzip後でわずか3Kbという軽量サイズになっています。
235 | 
236 | (学習用のコードもGitHubで公開する予定です)
237 | 
238 | ## クレジット
239 | 
240 | 機械学習モデルの構築にあたり非常に優秀な機械学習エンジニアの友人たちに多大な助力を頂きました。感謝します🙌
241 | 
242 | [@mski-iksm (GitHub)](https://github.com/mski-iksm)
243 | 
244 | [@Gashin_Learning (Twitter)](https://twitter.com/Gashin_Learning)
245 | 


--------------------------------------------------------------------------------
/src/model/model.ts:
--------------------------------------------------------------------------------
  1 | import type { Model } from './types'
  2 | 
  3 | export const threshold = 0.5
  4 | 
  5 | export const model: Model = {
  6 |   version: 2,
  7 |   config: { nBuckets: 262144, size: 3, offset: 3, scale: 1000 },
  8 |   weight: {
  9 |     type: {
 10 |       '1': {
 11 |         '-3': { C: 86, K: 39, A: 125, N: 103, B: -73, D: -1, E: -90 },
 12 |         '-2': { N: 358, O: -75, D: -73, E: -1 },
 13 |         '-1': { S: 651, C: 40, H: -508, O: -400, E: -73 },
 14 |         '0': { C: 237, H: 2347, O: 1581 },
 15 |         '1': { S: -30, C: -704, O: 2188, Z: 224 },
 16 |         '2': { S: 0, C: 204, A: -63, O: -1, Y: 224 },
 17 |         '3': { H: 44, N: 147, O: -10, Z: -72, X: 224 },
 18 |       },
 19 |       '2': {
 20 |         '-3': { CH: 10, HC: 78, KK: 165, BD: -73, DE: -1 },
 21 |         '-2': { HC: 153, HO: -23, KC: 208, NC: -159, NN: 148, OH: 0, DE: -73 },
 22 |         '-1': {
 23 |           SC: 26,
 24 |           CC: 853,
 25 |           CN: -311,
 26 |           CO: 750,
 27 |           HC: -17,
 28 |           HO: 1542,
 29 |           KK: 204,
 30 |           AA: 478,
 31 |           AO: -208,
 32 |           NC: 858,
 33 |           NN: -357,
 34 |           NO: -31,
 35 |           OA: -35,
 36 |           OO: 894,
 37 |         },
 38 |         '0': {
 39 |           SC: -401,
 40 |           CS: -552,
 41 |           CC: -1815,
 42 |           CN: 171,
 43 |           CO: 720,
 44 |           HH: -3399,
 45 |           HN: 8421,
 46 |           KH: 9425,
 47 |           KK: -10891,
 48 |           AC: 805,
 49 |           AH: 327,
 50 |           AA: -3803,
 51 |           NC: -336,
 52 |           NN: -3235,
 53 |           OC: 1211,
 54 |           OH: 501,
 55 |           OO: -4040,
 56 |         },
 57 |         '1': { CC: 321, CO: -160, KH: -13044, OC: 4, ZY: 224 },
 58 |         '2': { HK: -1, KK: 8, YX: 224 },
 59 |       },
 60 |       '3': {
 61 |         '-3': {
 62 |           CCC: 316,
 63 |           CCH: -35,
 64 |           HHH: 292,
 65 |           HOC: 145,
 66 |           HOH: -175,
 67 |           KKC: 2,
 68 |           KKK: 255,
 69 |           NCN: -203,
 70 |           OCC: -62,
 71 |           BDE: -73,
 72 |         },
 73 |         '-2': {
 74 |           SCC: -4,
 75 |           CCC: -345,
 76 |           CCH: 459,
 77 |           CCO: 155,
 78 |           CHH: 732,
 79 |           HCC: 358,
 80 |           HHH: 534,
 81 |           HHO: 420,
 82 |           HOH: -591,
 83 |           KHC: -22,
 84 |           KKK: 705,
 85 |           AAA: 72,
 86 |           NCC: -83,
 87 |           NCN: -107,
 88 |           OCC: 131,
 89 |           OCH: -163,
 90 |           OHH: -5,
 91 |         },
 92 |         '-1': {
 93 |           SCC: 283,
 94 |           CCS: -345,
 95 |           CCH: 870,
 96 |           CCK: -1126,
 97 |           CCO: 247,
 98 |           CHC: -927,
 99 |           CHH: 669,
100 |           CHN: 8835,
101 |           HCC: -698,
102 |           HCH: -1216,
103 |           HCN: 725,
104 |           HHC: 150,
105 |           HHH: -66,
106 |           HNN: -2984,
107 |           KCC: -472,
108 |           KKH: 11749,
109 |           KKK: 908,
110 |           KOK: -2409,
111 |           AAA: -1543,
112 |           AOA: -322,
113 |           NCC: 746,
114 |           NCH: 788,
115 |           NCN: -486,
116 |           NNN: -2,
117 |           OCC: -217,
118 |           OCH: -655,
119 |           OCN: 388,
120 |           OHC: 0,
121 |           OHH: -412,
122 |           OAA: -6,
123 |           OOO: -61,
124 |         },
125 |         '0': {
126 |           SCC: -176,
127 |           CCC: 285,
128 |           CCH: -52,
129 |           CCK: -87,
130 |           CHH: -704,
131 |           HCC: 1613,
132 |           HHC: -55,
133 |           KCC: -90,
134 |           KCH: 84,
135 |           KHH: 6955,
136 |           KKH: -14071,
137 |           KKK: 5059,
138 |           KOK: -2315,
139 |           AAA: -66,
140 |           AOA: -1259,
141 |           NCN: -43,
142 |           NNC: -7229,
143 |           NNN: -208,
144 |           OCC: 67,
145 |         },
146 |         '1': {
147 |           CCC: -359,
148 |           CCH: 235,
149 |           CCO: 69,
150 |           CHC: 34,
151 |           CKK: -162,
152 |           HHC: -211,
153 |           HHH: 364,
154 |           HHK: -2,
155 |           HHO: 167,
156 |           KHH: -6718,
157 |           KKK: 784,
158 |           NNC: -301,
159 |           OCC: 11,
160 |           ZYX: 224,
161 |         },
162 |       },
163 |     },
164 |     hash: {
165 |       '1': {
166 |         '-3': {
167 |           '5a92': -90,
168 |           a098: -60,
169 |           cf31: -73,
170 |           '16a1d': 0,
171 |           '1bf4a': 250,
172 |           '2755a': 0,
173 |           '28187': -192,
174 |           '36a04': -1,
175 |           '3900e': -11,
176 |           '3a93d': -415,
177 |         },
178 |         '-2': {
179 |           '353b': -1,
180 |           '5a92': -1,
181 |           a098: -185,
182 |           '16481': -239,
183 |           '19034': -13,
184 |           '25417': 41,
185 |           '2585d': 0,
186 |           '28187': -301,
187 |           '36a04': -73,
188 |           '3dc1f': 204,
189 |         },
190 |         '-1': {
191 |           '353b': -740,
192 |           '3695': -65,
193 |           '5a92': -73,
194 |           '74df': 0,
195 |           '83c4': 145,
196 |           a098: -192,
197 |           a429: 0,
198 |           cdac: -423,
199 |           '157b9': -109,
200 |           '16481': -680,
201 |           '19034': 21,
202 |           '19c16': -400,
203 |           '1b3f8': 2006,
204 |           '1bf4a': -901,
205 |           '206fe': 174,
206 |           '2152a': -113,
207 |           '2178a': -222,
208 |           '23eba': -350,
209 |           '25417': 396,
210 |           '28229': 180,
211 |           '2aef9': 114,
212 |           '2af2e': -423,
213 |           '2ecb3': 10,
214 |           '2fb50': -19,
215 |           '3024d': 0,
216 |           '319cd': -299,
217 |           '34449': 339,
218 |           '34509': 4,
219 |           '34630': 257,
220 |           '3900e': 12,
221 |           '394bf': 171,
222 |           '3d03d': 487,
223 |           '3d1ea': -576,
224 |           '3d393': 1,
225 |           '3dc1f': 619,
226 |           '3f118': -736,
227 |         },
228 |         '0': {
229 |           '1667': -765,
230 |           '183c': 4001,
231 |           '353b': 438,
232 |           '371d': -533,
233 |           '542d': 32,
234 |           '59d8': -49,
235 |           '74df': -1159,
236 |           '7771': 14,
237 |           '7bf3': 68,
238 |           '806a': -1226,
239 |           '8764': 448,
240 |           '8d9f': -103,
241 |           a098: 1831,
242 |           a429: 189,
243 |           cdac: 4162,
244 |           cf45: 1445,
245 |           d0e4: 1,
246 |           e0ab: 474,
247 |           e17c: -414,
248 |           e587: 0,
249 |           '1271c': -116,
250 |           '16481': 671,
251 |           '184ff': -217,
252 |           '19034': 161,
253 |           '19c16': 11555,
254 |           '1b3f8': -485,
255 |           '1b7ee': 399,
256 |           '1b8d3': 124,
257 |           '1bb04': -1036,
258 |           '1bf4a': 8956,
259 |           '1d26f': 22,
260 |           '1ecca': -277,
261 |           '206fe': -150,
262 |           '215f3': 86,
263 |           '2178a': 1867,
264 |           '2244d': -261,
265 |           '238f4': 394,
266 |           '25417': -1292,
267 |           '26fbc': 23,
268 |           '274e5': -679,
269 |           '28187': 115,
270 |           '28229': 1545,
271 |           '2aef9': -3861,
272 |           '2b3af': -82,
273 |           '2ecb3': 368,
274 |           '30603': 28,
275 |           '328aa': 907,
276 |           '33a9e': 947,
277 |           '34449': -27,
278 |           '345f6': 0,
279 |           '34630': -168,
280 |           '3817a': -244,
281 |           '38e48': 36,
282 |           '3900e': 773,
283 |           '394bf': 468,
284 |           '3a93d': 809,
285 |           '3d1ea': 1438,
286 |           '3dc1f': -2113,
287 |           '3dd5f': 225,
288 |           '3f118': -2566,
289 |           '3fced': -1246,
290 |           '3fd0d': 0,
291 |         },
292 |         '1': {
293 |           f20: -554,
294 |           '1667': -1376,
295 |           '183c': 557,
296 |           '244f': -504,
297 |           '353b': 1777,
298 |           '3a80': -184,
299 |           '5767': 224,
300 |           '580f': -3866,
301 |           '59d8': -1220,
302 |           '76a6': -925,
303 |           '7895': 0,
304 |           '792a': 499,
305 |           '83c4': -2879,
306 |           '8764': 354,
307 |           '8d9f': -1529,
308 |           a098: 2740,
309 |           a429: 956,
310 |           bdce: -390,
311 |           bede: 357,
312 |           cdac: 360,
313 |           cf45: 1708,
314 |           e17c: -2210,
315 |           e242: -6,
316 |           '10597': -264,
317 |           '10a1b': 850,
318 |           '1249b': -384,
319 |           '1271c': -18580,
320 |           '157b9': -39,
321 |           '15a8b': 741,
322 |           '16481': 2401,
323 |           '19034': 2069,
324 |           '19c16': 1530,
325 |           '19d61': 0,
326 |           '1bb04': -2232,
327 |           '1bd33': -2423,
328 |           '1bf4a': 13687,
329 |           '1e617': 112,
330 |           '1fcd7': -4962,
331 |           '20311': 1025,
332 |           '20de5': -224,
333 |           '2152a': -380,
334 |           '215f3': -1190,
335 |           '2178a': -43779,
336 |           '2244d': -113,
337 |           '22a6f': 5,
338 |           '238f4': -653,
339 |           '23bca': -806,
340 |           '23eba': -695,
341 |           '24a51': -804,
342 |           '25417': 1793,
343 |           '274e5': 118,
344 |           '27910': -1378,
345 |           '28050': -2071,
346 |           '28fdc': -1055,
347 |           '29ab9': -142,
348 |           '2af2e': 973,
349 |           '2ecb3': -8687,
350 |           '305ad': -614,
351 |           '3255f': 830,
352 |           '328aa': 2761,
353 |           '33a9e': 427,
354 |           '34449': -213,
355 |           '34d65': -121,
356 |           '364bb': 292,
357 |           '3817a': 1244,
358 |           '3833b': 23,
359 |           '38d21': 0,
360 |           '3900e': 2337,
361 |           '39e55': -131,
362 |           '3a65f': 2,
363 |           '3a93d': -2185,
364 |           '3b0fc': -1239,
365 |           '3b22a': -9,
366 |           '3bd09': 1256,
367 |           '3d03d': 22,
368 |           '3d1ea': 2051,
369 |           '3d393': -711,
370 |           '3d511': 0,
371 |           '3dc1f': 398,
372 |           '3f118': 832,
373 |           '3fc75': 2,
374 |           '3fced': 2525,
375 |           '3fe94': -2285,
376 |         },
377 |         '2': {
378 |           f20: -145,
379 |           '353b': 119,
380 |           '59d8': 143,
381 |           '74df': 244,
382 |           '792a': -23,
383 |           cdac: -177,
384 |           e17c: 283,
385 |           '106dd': 224,
386 |           '16481': -348,
387 |           '184ff': -8,
388 |           '19034': 267,
389 |           '1b7ee': -203,
390 |           '1bb04': 501,
391 |           '1bf4a': -145,
392 |           '1e2f9': 163,
393 |           '20de5': -211,
394 |           '2178a': 197,
395 |           '21806': -325,
396 |           '219b9': 0,
397 |           '23501': -398,
398 |           '23eba': -911,
399 |           '26458': 89,
400 |           '2ecb3': -80,
401 |           '328aa': -71,
402 |           '33a9e': -205,
403 |           '34449': 28,
404 |           '3d1ea': -29,
405 |           '3fced': 282,
406 |         },
407 |         '3': {
408 |           '183c': -564,
409 |           '5767': -72,
410 |           '792a': 6,
411 |           cdac: 412,
412 |           '1271c': 151,
413 |           '16481': -31,
414 |           '1b111': -4,
415 |           '21806': 78,
416 |           '2364b': 224,
417 |           '23eba': -833,
418 |           '2ecb3': 65,
419 |           '3900e': -14,
420 |           '3a93d': -495,
421 |           '3dda0': 0,
422 |         },
423 |       },
424 |       '2': {
425 |         '-3': {
426 |           '10ce0': 0,
427 |           '17db9': 0,
428 |           '1c4ea': 0,
429 |           '1e501': -1322,
430 |           '1fb90': 0,
431 |           '2190f': 0,
432 |           '22de1': -1,
433 |           '33238': 0,
434 |           '34c04': 1387,
435 |           '356c6': 0,
436 |           '3baf1': -73,
437 |         },
438 |         '-2': {
439 |           '21c8': 203,
440 |           ad00: 0,
441 |           f3ce: 57,
442 |           '159c6': 0,
443 |           '16fab': -91,
444 |           '1d279': 0,
445 |           '22de1': -81,
446 |           '25424': 0,
447 |           '28c6e': -339,
448 |           '2c552': 0,
449 |           '2d7c9': 0,
450 |           '36264': -12,
451 |           '369e1': 0,
452 |           '39545': 0,
453 |         },
454 |         '-1': {
455 |           '21c8': -505,
456 |           '7473': 873,
457 |           a50f: 0,
458 |           dd0c: 0,
459 |           f3ce: 472,
460 |           '13b7f': 0,
461 |           '18738': -4,
462 |           '197be': 627,
463 |           '1c06d': 0,
464 |           '1cf23': 493,
465 |           '1e54a': 435,
466 |           '21bac': 0,
467 |           '24ca7': -456,
468 |           '28008': -363,
469 |           '2ae60': 475,
470 |           '2fd84': 0,
471 |           '30239': 0,
472 |           '32806': 275,
473 |           '36ef7': 0,
474 |           '39334': 0,
475 |           '3b5e9': 0,
476 |           '3c196': 390,
477 |           '3ca75': 0,
478 |           '3cba2': 0,
479 |           '3cdcc': -122,
480 |           '3ef8f': -403,
481 |         },
482 |         '0': {
483 |           '167b': -58,
484 |           '21c8': -4261,
485 |           '34ae': 647,
486 |           '508b': -2429,
487 |           '7473': -480,
488 |           bc2a: 0,
489 |           c3af: -223,
490 |           d793: 0,
491 |           e98f: 0,
492 |           ec66: -3884,
493 |           ed42: 236,
494 |           f3ce: -2708,
495 |           '11b00': 3580,
496 |           '11bd5': 0,
497 |           '12122': -977,
498 |           '1246c': 0,
499 |           '129b8': 0,
500 |           '13455': 0,
501 |           '1440c': 455,
502 |           '1482a': -1121,
503 |           '1487f': -124,
504 |           '161ed': -546,
505 |           '164d9': 0,
506 |           '16f5f': -29,
507 |           '19215': -395,
508 |           '197be': 650,
509 |           '1b57b': -4388,
510 |           '1c00f': 39,
511 |           '1cf0a': 0,
512 |           '1cf23': -4628,
513 |           '1d62b': 1428,
514 |           '1e34f': -2630,
515 |           '1e4ca': 0,
516 |           '21cd1': -3514,
517 |           '2499e': 37,
518 |           '289f7': -801,
519 |           '28c6e': -110,
520 |           '2a6a2': 0,
521 |           '2ae60': 245,
522 |           '2b1ef': -27,
523 |           '2c570': 0,
524 |           '2cbde': -896,
525 |           '2d9d5': -1,
526 |           '2ea4c': 0,
527 |           '2fbe3': 0,
528 |           '2ff65': 0,
529 |           '30e87': -465,
530 |           '32b7a': 0,
531 |           '32ec3': 0,
532 |           '3311b': -7386,
533 |           '339e7': 151,
534 |           '344d5': 0,
535 |           '356fb': -593,
536 |           '36264': -2124,
537 |           '399bc': -3824,
538 |           '3b99a': 0,
539 |           '3c196': 440,
540 |           '3cdcc': 0,
541 |           '3ef0a': 0,
542 |         },
543 |         '1': {
544 |           '167b': -572,
545 |           '3826': -3877,
546 |           '4cad': 0,
547 |           '7a9f': -539,
548 |           a153: 665,
549 |           be59: 452,
550 |           f3ce: 1553,
551 |           faa4: 0,
552 |           '10e08': 0,
553 |           '11569': 381,
554 |           '11941': 1574,
555 |           '11b00': 616,
556 |           '16efe': -1,
557 |           '17e39': 0,
558 |           '197be': 2983,
559 |           '19d6d': 0,
560 |           '1bab9': 60,
561 |           '1cf23': 2859,
562 |           '1f9cf': 0,
563 |           '2068a': 0,
564 |           '21cd1': 425,
565 |           '24e71': 199,
566 |           '2544f': 581,
567 |           '2549e': 0,
568 |           '289f7': 2461,
569 |           '2ae60': -2323,
570 |           '2b4e1': 3494,
571 |           '2e033': 0,
572 |           '31d2a': 2423,
573 |           '3311b': 1154,
574 |           '35463': 393,
575 |           '388ed': 0,
576 |           '399bc': 1085,
577 |           '3b623': 0,
578 |           '3c196': 481,
579 |           '3c89c': 0,
580 |           '3fdbd': 0,
581 |         },
582 |         '2': {
583 |           '2d24': 222,
584 |           '1149d': 985,
585 |           '17420': 0,
586 |           '197be': 336,
587 |           '19bd4': 0,
588 |           '25627': 0,
589 |           '2b1ef': -475,
590 |           '2d94a': 0,
591 |           '3e185': 401,
592 |         },
593 |       },
594 |       '3': {
595 |         '-3': { bfa2: -72 },
596 |         '-2': { '8dcb': -7, '38afb': -411 },
597 |         '-1': {
598 |           '879': -493,
599 |           bba3: -222,
600 |           '1604f': -1,
601 |           '18934': -1,
602 |           '359f0': -1415,
603 |           '3fb76': -1500,
604 |         },
605 |         '0': { '10615': -418, '359f0': -2722, '3c742': 2648 },
606 |         '1': { '34daf': 221, '3526b': 427, '359f0': 0, '3ef72': 213 },
607 |       },
608 |     },
609 |     distance: -140,
610 |     bias: 147,
611 |   },
612 | }
613 | 


--------------------------------------------------------------------------------