├── .gitignore
├── .mocharc.yml
├── .npmignore
├── .nvmrc
├── .travis.yml
├── CHANGELOG.md
├── MIT-License
├── README.md
├── __root.d.ts
├── __root.js
├── __root.ts
├── demo
    ├── README.md
    └── sample
    │   └── 果体.txt
├── dicts
    └── .gitkeep
├── index.d.ts
├── index.js
├── index.ts
├── jest.config.js
├── jetbrains.svg
├── lib
    ├── POSTAG.d.ts
    ├── POSTAG.js
    ├── POSTAG.ts
    ├── Segment.d.ts
    ├── Segment.js
    ├── Segment.ts
    ├── const.d.ts
    ├── const.js
    ├── const.ts
    ├── defaults
    │   ├── dict.d.ts
    │   ├── dict.js
    │   ├── dict.ts
    │   ├── index.d.ts
    │   ├── index.js
    │   ├── index.ts
    │   ├── mods.d.ts
    │   ├── mods.js
    │   └── mods.ts
    ├── fs
    │   ├── get.d.ts
    │   ├── get.js
    │   └── get.ts
    ├── index.d.ts
    ├── index.js
    ├── index.ts
    ├── loader.d.ts
    ├── loader.js
    ├── loader.ts
    ├── mod
    │   ├── CHS_NAMES.d.ts
    │   ├── CHS_NAMES.js
    │   ├── CHS_NAMES.ts
    │   ├── COLORS.d.ts
    │   ├── COLORS.js
    │   ├── COLORS.ts
    │   ├── Optimizer.d.ts
    │   ├── Optimizer.js
    │   ├── Optimizer.ts
    │   ├── Tokenizer.d.ts
    │   ├── Tokenizer.js
    │   ├── Tokenizer.ts
    │   ├── const.d.ts
    │   ├── const.js
    │   ├── const.ts
    │   ├── data
    │   │   ├── STOPWORD.d.ts
    │   │   ├── STOPWORD.js
    │   │   └── STOPWORD.ts
    │   ├── index.d.ts
    │   ├── index.js
    │   ├── index.ts
    │   ├── mod.d.ts
    │   ├── mod.js
    │   └── mod.ts
    ├── segment
    │   ├── core.d.ts
    │   ├── core.js
    │   ├── core.ts
    │   ├── defaults.d.ts
    │   ├── defaults.js
    │   ├── defaults.ts
    │   ├── index.d.ts
    │   ├── index.js
    │   ├── index.ts
    │   ├── method.d.ts
    │   ├── method.js
    │   ├── method.ts
    │   ├── methods
    │   │   ├── _get_text.d.ts
    │   │   ├── _get_text.js
    │   │   ├── _get_text.ts
    │   │   ├── convertSynonym.d.ts
    │   │   ├── convertSynonym.js
    │   │   ├── convertSynonym.ts
    │   │   ├── doSegment.d.ts
    │   │   ├── doSegment.js
    │   │   ├── doSegment.ts
    │   │   ├── getOptionsDoSegment.d.ts
    │   │   ├── getOptionsDoSegment.js
    │   │   ├── getOptionsDoSegment.ts
    │   │   ├── indexOf.d.ts
    │   │   ├── indexOf.js
    │   │   ├── indexOf.ts
    │   │   ├── listModules.d.ts
    │   │   ├── listModules.js
    │   │   ├── listModules.ts
    │   │   ├── split.d.ts
    │   │   ├── split.js
    │   │   ├── split.ts
    │   │   ├── stringify.d.ts
    │   │   ├── stringify.js
    │   │   ├── stringify.ts
    │   │   ├── useModules.d.ts
    │   │   ├── useModules.js
    │   │   ├── useModules.ts
    │   │   ├── useModules2.d.ts
    │   │   ├── useModules2.js
    │   │   └── useModules2.ts
    │   ├── types.d.ts
    │   ├── types.js
    │   └── types.ts
    ├── submod.d.ts
    ├── submod.js
    ├── submod.ts
    ├── submod
    │   ├── AdjectiveOptimizer.d.ts
    │   ├── AdjectiveOptimizer.js
    │   ├── AdjectiveOptimizer.ts
    │   ├── ChsNameOptimizer.d.ts
    │   ├── ChsNameOptimizer.js
    │   ├── ChsNameOptimizer.ts
    │   ├── ChsNameTokenizer.d.ts
    │   ├── ChsNameTokenizer.js
    │   ├── ChsNameTokenizer.ts
    │   ├── DatetimeOptimizer.d.ts
    │   ├── DatetimeOptimizer.js
    │   ├── DatetimeOptimizer.ts
    │   ├── DictOptimizer.d.ts
    │   ├── DictOptimizer.js
    │   ├── DictOptimizer.ts
    │   ├── DictTokenizer.d.ts
    │   ├── DictTokenizer.js
    │   ├── DictTokenizer.ts
    │   ├── EmailOptimizer.d.ts
    │   ├── EmailOptimizer.js
    │   ├── EmailOptimizer.ts
    │   ├── ForeignOptimizer.d.ts
    │   ├── ForeignOptimizer.js
    │   ├── ForeignOptimizer.ts
    │   ├── ForeignTokenizer.d.ts
    │   ├── ForeignTokenizer.js
    │   ├── ForeignTokenizer.ts
    │   ├── JpSimpleTokenizer.d.ts
    │   ├── JpSimpleTokenizer.js
    │   ├── JpSimpleTokenizer.ts
    │   ├── PunctuationTokenizer.d.ts
    │   ├── PunctuationTokenizer.js
    │   ├── PunctuationTokenizer.ts
    │   ├── SingleTokenizer.d.ts
    │   ├── SingleTokenizer.js
    │   ├── SingleTokenizer.ts
    │   ├── URLTokenizer.d.ts
    │   ├── URLTokenizer.js
    │   ├── URLTokenizer.ts
    │   ├── WildcardTokenizer.d.ts
    │   ├── WildcardTokenizer.js
    │   ├── WildcardTokenizer.ts
    │   ├── ZhRadicalTokenizer.d.ts
    │   ├── ZhRadicalTokenizer.js
    │   ├── ZhRadicalTokenizer.ts
    │   ├── ZhtSynonymOptimizer.d.ts
    │   ├── ZhtSynonymOptimizer.js
    │   ├── ZhtSynonymOptimizer.ts
    │   ├── ZhuyinTokenizer.d.ts
    │   ├── ZhuyinTokenizer.js
    │   └── ZhuyinTokenizer.ts
    └── util
    │   ├── debug.d.ts
    │   ├── debug.js
    │   ├── debug.ts
    │   ├── index.d.ts
    │   ├── index.js
    │   ├── index.ts
    │   ├── isUnset.d.ts
    │   ├── isUnset.js
    │   └── isUnset.ts
├── package.json
├── project.config.d.ts
├── project.config.js
├── project.config.ts
├── repl
├── script
    ├── publish-after.d.ts
    ├── publish-after.js
    ├── publish-after.ts
    ├── publish-after2.d.ts
    ├── publish-after2.js
    ├── publish-after2.ts
    ├── sort-stringify-cache.d.ts
    ├── sort-stringify-cache.js
    └── sort-stringify-cache.ts
├── test
    ├── __snapshots__
    │   └── bug.spec.ts.snap
    ├── _local-dev.ts
    ├── bug.spec.ts
    ├── chk-fixme.ts
    ├── demo.cache.ts
    ├── demo.glob.ts
    ├── demo.ts
    ├── lazy.fixme.ts
    ├── lib
    │   ├── delete-cache.ts
    │   ├── index.ts
    │   └── util.ts
    ├── res
    │   ├── default.ts
    │   ├── fixme.data.ts
    │   ├── gc.data.ts
    │   ├── gc.not
    │   │   └── 666962621.txt
    │   ├── lazy.index.ts
    │   ├── lazy.index
    │   │   ├── tests_lazy_array.ts
    │   │   ├── tests_lazy_base.ts
    │   │   ├── tests_lazy_base_not.ts
    │   │   ├── tests_lazy_indexof.ts
    │   │   └── tests_lazy_indexof_not.ts
    │   ├── lazy.novel.ts
    │   └── ウォルテニア戦記
    │   │   ├── 第11話【西へ】其2.txt
    │   │   ├── 第11話【西へ】其2_cjk2zht.txt
    │   │   ├── 第11話【西へ】其2_cn2tw.txt
    │   │   ├── 第11話【西へ】其2_opencc.txt
    │   │   ├── 第11話【西へ】其2_out.txt
    │   │   └── 第11話【西へ】其2_zh2jp.txt
    ├── script
    │   └── build-submod.ts
    ├── sleep.ts
    ├── submod.spec.ts
    ├── temp
    │   ├── .gitignore
    │   ├── cache
    │   │   ├── 0
    │   │   │   ├── char.txt
    │   │   │   ├── eng.txt
    │   │   │   └── other.txt
    │   │   ├── a.txt
    │   │   ├── b.txt
    │   │   ├── c.txt
    │   │   ├── d.txt
    │   │   ├── e.txt
    │   │   ├── f.txt
    │   │   ├── g.txt
    │   │   ├── h.txt
    │   │   ├── i.txt
    │   │   ├── j.txt
    │   │   ├── k.txt
    │   │   ├── l.txt
    │   │   ├── m.txt
    │   │   ├── n.txt
    │   │   ├── o.txt
    │   │   ├── p.txt
    │   │   ├── q.txt
    │   │   ├── r.txt
    │   │   ├── s.txt
    │   │   ├── t.txt
    │   │   ├── u.txt
    │   │   ├── v.txt
    │   │   ├── w.txt
    │   │   ├── x.txt
    │   │   ├── y.txt
    │   │   └── z.txt
    │   └── stringify.sorted.txt
    ├── test.ts
    ├── test_segment.test.ts
    ├── version.spec.ts
    ├── versions.spec.ts
    ├── word.novel.test.ts
    ├── word.test.ts
    ├── z.0010.test.ts
    ├── z.gc.not.test.ts
    └── z.gc.test.ts
├── tsconfig.json
├── typedoc.config.js
├── version.d.ts
├── version.js
└── version.ts


/.mocharc.yml:
--------------------------------------------------------------------------------
1 | require:
2 | #  - esm
3 |   - ts-node/register
4 | timeout: 0
5 | color: true
6 | extension:
7 |   - ts
8 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
  1 | /.pnp
  2 | .pnp.js
  3 | .idea
  4 | ~ci.list.txt
  5 | ~ci.log.txt
  6 | ~ci.errors.txt
  7 | *.stackdump
  8 | *.bak
  9 | *.old
 10 | *.log
 11 | tsconfig.json
 12 | package-lock.json
 13 | test
 14 | .github
 15 | .gitkeep
 16 | /.*/
 17 | /.*
 18 | tests
 19 | /~*
 20 | __test__
 21 | __tests__
 22 | node_modules
 23 | /node_modules/
 24 | **/node_modules/
 25 | *.ts
 26 | !*.d.ts
 27 | /bin/**/*.d.ts
 28 | /bin/*.d.ts
 29 | 
 30 | !*.d.mts
 31 | /bin/**/*.d.mts
 32 | /bin/*.d.mts
 33 | 
 34 | !*.d.cts
 35 | /bin/**/*.d.cts
 36 | /bin/*.d.cts
 37 | 
 38 | !/src/**/*.ts
 39 | !/src/**/*.cts
 40 | !/src/**/*.mts
 41 | !/src/**/*.tsx
 42 | 
 43 | /src/**/*.d.ts
 44 | /src/**/*.js
 45 | /src/**/*.d.cts
 46 | /src/**/*.d.mts
 47 | /src/**/*.cjs
 48 | /src/**/*.mjs
 49 | /src/**/*.jsx
 50 | 
 51 | *.tgz
 52 | /tsconfig.json.tpl
 53 | yarn-error.log
 54 | .git
 55 | yarn.lock
 56 | .env.local
 57 | .env.*.local
 58 | npm-debug.log*
 59 | yarn-debug.log*
 60 | yarn-error.log*
 61 | .vscode
 62 | *.suo
 63 | *.ntvs*
 64 | *.njsproj
 65 | *.sln
 66 | *.sw?
 67 | *.vue.js
 68 | *.vue.d.ts
 69 | *.vue.js.map
 70 | .nyc_output
 71 | coverage
 72 | /*.tpl
 73 | webpack.config.js
 74 | vue.config.js
 75 | /jestconfig.json
 76 | /tslint.json
 77 | .git
 78 | webpack.*.config.js
 79 | webpack.*.config.d.ts
 80 | webpack.*.config.js.map
 81 | webpack.*.config.ts
 82 | karma.conf.js
 83 | /_config.yml
 84 | intellij-style-guide.xml
 85 | jest.config.js
 86 | *.tsbuildinfo
 87 | tsconfig.*.json
 88 | tsconfig.esm.json.tpl
 89 | /package.d.ts
 90 | .mocharc.yml
 91 | jest.config.js
 92 | jest.config.*
 93 | /jest-preset.*
 94 | /report.*.json
 95 | now.json
 96 | /Makefile
 97 | *.spec.d.ts
 98 | *.spec.js
 99 | *.spec.ts
100 | 
101 | *.spec.d.cts
102 | *.spec.cjs
103 | *.spec.cts
104 | 
105 | *.spec.d.mts
106 | *.spec.mjs
107 | *.spec.mts
108 | 
109 | *.spec.d.tsx
110 | *.spec.tsx
111 | 
112 | __mocks__
113 | __tests__
114 | __snapshots__
115 | *.snap
116 | npm-shrinkwrap.json
117 | /example/
118 | *.stat
119 | .vercel
120 | tsdx.config.js
121 | /report.json
122 | 
123 | /_*/
124 | _snowpack
125 | 
126 | /snowpack.config.js
127 | web_modules
128 | cz-adapter
129 | 
130 | tsc-multi.json.tpl
131 | tsc-multi.json
132 | 
133 | changelog-option.js
134 | 
135 | bin/tsconfig.json
136 | bin/tsconfig.*.json
137 | 
138 | .yarnrc.yml
139 | .turbo
140 | __file_snapshots__
141 | __fixtures__
142 | /fixture/
143 | 


--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | node
2 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | node_js:
 3 |     - "node"
 4 | 
 5 | #deploy:
 6 | #  provider: pages
 7 | #  skip-cleanup: true
 8 | #  github-token: $GITHUB_TOKEN
 9 | #  keep-history: true
10 | #  on:
11 | #    branch: master
12 | #  local-dir: docs
13 | 
14 | 
15 | cache:
16 |   yarn: true
17 |   directories:
18 |     - "node_modules"
19 | 
20 | before_install:
21 |   #- npm install -g typedoc typedoc-themes-color typedoc-plugin-nojekyll
22 |   - npm install -g typescript@next ts-node mocha chai
23 | 
24 | install:
25 | #  - yarn install
26 |   - yarn add fs-extra chai-string @types/mocha   typescript@next ts-node mocha chai chai-asserttype-extra
27 | 
28 | 
29 | before_script:
30 |   - yarn list segment-dict
31 |   - echo before_script
32 | script:
33 | #  npm run travis
34 |   yarn run test
35 | 
36 | env:
37 |   global:
38 |     - TS_NODE_TRANSPILE_ONLY=true
39 |     - color=1
40 |     - FORCE_COLOR=1
41 | 


--------------------------------------------------------------------------------
/MIT-License:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2015 Zongmin Lei (雷宗民) <leizongmin@gmail.com>
 2 | http://ucdok.com
 3 | 
 4 | The MIT License
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining
 7 | a copy of this software and associated documentation files (the
 8 | "Software"), to deal in the Software without restriction, including
 9 | without limitation the rights to use, copy, modify, merge, publish,
10 | distribute, sublicense, and/or sell copies of the Software, and to
11 | permit persons to whom the Software is furnished to do so, subject to
12 | the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/__root.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * Created by user on 2020/7/1.
3 |  */
4 | export default __dirname;
5 | 


--------------------------------------------------------------------------------
/__root.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | /**
3 |  * Created by user on 2020/7/1.
4 |  */
5 | Object.defineProperty(exports, "__esModule", { value: true });
6 | exports.default = __dirname;
7 | //# sourceMappingURL=__root.js.map


--------------------------------------------------------------------------------
/__root.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * Created by user on 2020/7/1.
3 |  */
4 | 
5 | import { join, normalize } from "path";
6 | 
7 | export default __dirname;
8 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | # title
 2 | 
 3 | ```ts
 4 | const segment = new Segment();
 5 | ```
 6 | 
 7 | ## 如何將目前已加入的字典匯出
 8 | 
 9 | ```ts
10 | // 用來確保字典的確已載入
11 | segment.autoInit()
12 | 
13 | // 字典類型
14 | let type = 'TABLE';
15 | 
16 | let db_dict = segment.getDictDatabase(type)
17 | fs.writeFileSync('./exported.table.dict.txt', db_dict.stringify())
18 | ```
19 | 
20 | ## 段落切分
21 | 
22 | > 由於 segment 是利用對內容的前後文分析來進行分詞  
23 | > 所以如何切割段落對於結果就會產生不同影響
24 | 
25 | |       | |
26 | |:------|:--|
27 | | `SPLIT` | `RegExp` or 具有 `.[Symbol.split](input: string, limit?: number) => string[]` 的物件 |
28 | | `SPLIT_FILTER` | `RegExp` or 具有 `.test(input: string) => boolean` 的物件 |
29 | 
30 | ```ts
31 | 	/**
32 | 	 * 分段
33 | 	 * `RegExp` or 具有 `.[Symbol.split](input: string, limit?: number) => string[]` 的物件
34 | 	 *
35 | 	 * @type {Segment.ISPLIT}
36 | 	 */
37 | 	segment.SPLIT: ISPLIT = /([\r\n]+|^[　\s+]+|[　\s]+$|[　\s]{2,})/gm as ISPLIT;
38 | 
39 | 	/**
40 | 	 * 分段之後 如果符合以下條件 則直接忽略分析
41 | 	 * `RegExp` or 具有 `.test(input: string) => boolean` 的物件
42 | 	 *
43 | 	 * @type {Segment.ISPLIT_FILTER}
44 | 	 */
45 | 	segment.SPLIT_FILTER: ISPLIT_FILTER = /^([\r\n]+)$/g as ISPLIT_FILTER;
46 | ```
47 | 
48 | ## dictionary
49 | 
50 | > 以下方法會載入字典 `name`
51 | 
52 | `name` 可以為
53 | 
54 | * 字典檔案絕對/相對路徑
55 | * 字典檔名(可以忽略副檔名)
56 | 
57 | 當只輸入檔名時  
58 | 會呼叫 `_resolveDictFilename(name: string, pathPlus?: string[], extPlus?: string[]): string;`  
59 | 依照以下順序搜尋第一個符合的檔案
60 | 
61 | 1. 目前 `cwd` 的相對路徑
62 | 2. novel-segment 模組底下的 [`novel-segment/dicts`](https://github.com/bluelovers/node-segment/tree/master/dicts)
63 | 3. 如果是呼叫 `loadSynonymDict` 時 會額外搜尋 [`segment-dict/dict/synonym`](https://github.com/bluelovers/node-segment-dict/tree/master/dict/synonym)
64 | 4. 如果是呼叫 `loadStopwordDict` 時 會額外搜尋 [`segment-dict/dict/stopword`](https://github.com/bluelovers/node-segment-dict/tree/master/dict/stopword)
65 | 5. `segment-dict` 模組底下的 [`segment-dict/dict/segment`](https://github.com/bluelovers/node-segment-dict/tree/master/dict/segment)
66 | 
67 | 副檔名為以下順序
68 | 
69 | 1. `''` => 無 也就是與 `name` 同名的檔案
70 | 2. `.utf8`
71 | 3. `.txt`
72 | 
73 | ```ts
74 |     /**
75 |      * 载入字典文件
76 |      *
77 |      * @param {String} name 字典文件名
78 |      * @param {String} type 类型
79 |      * @param {Boolean} convert_to_lower 是否全部转换为小写
80 |      * @return {Segment}
81 |      */
82 |     loadDict(name: string, type?: string, convert_to_lower?: boolean, skipExists?: boolean): this;
83 |     /**
84 |      * 载入同义词词典
85 |      *
86 |      * @param {String} name 字典文件名
87 |      */
88 |     loadSynonymDict(name: string, skipExists?: boolean): this;
89 |     /**
90 |      * 载入停止符词典
91 |      *
92 |      * @param {String} name 字典文件名
93 |      */
94 |     loadStopwordDict(name: string): this;
95 | ```
96 | 
97 | 


--------------------------------------------------------------------------------
/demo/sample/果体.txt:
--------------------------------------------------------------------------------
 1 | 但如果體內的營養成分消失
 2 | 就接近了看了刊登異性的果體照片的週刊雜誌的感覺。
 3 | 就接近了看了刊登異性的果體照片的週刊雜志的感覺。
 4 | 雨果體力的冒険者之魂在沸騰
 5 | 穿着像是果體一樣的Ｈ服裝
 6 | 應該是和萊娜訓練的成果體現出來了
 7 | “果體圍裙”
 8 | 果體後綁起來
 9 | 我處於果體狀態
10 | 
11 | 
12 | 但如果体内的营养成分消失
13 | 就接近了看了刊登异性的果体照片的周刊杂志的感觉。
14 | 就接近了看了刊登异性的果体照片的周刊杂志的感觉。
15 | 雨果体力的冒険者之魂在沸腾
16 | 穿着像是果体一样的Ｈ服装
17 | 应该是和莱娜训练的成果体现出来了
18 | “果体围裙”
19 | 果体后绑起来
20 | 我处于果体状态
21 | 


--------------------------------------------------------------------------------
/dicts/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluelovers/node-segment/f5688c9dd2c31a16d7eefd2275aa9b206a5f7134/dicts/.gitkeep


--------------------------------------------------------------------------------
/index.d.ts:
--------------------------------------------------------------------------------
 1 | import { Segment } from './lib/Segment';
 2 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
 3 | declare const _Segment: typeof Segment & {
 4 |     version: string;
 5 |     version_dict: string;
 6 |     versions: {
 7 |         "novel-segment": string;
 8 |         "segment-dict": string;
 9 |         "regexp-cjk": string;
10 |         "cjk-conv": string;
11 |     };
12 |     /**
13 |      * 分词接口
14 |      */
15 |     Segment: typeof Segment;
16 |     /**
17 |      * 词性接口
18 |      */
19 |     POSTAG: typeof POSTAG;
20 | };
21 | declare const __Segment: typeof _Segment & {
22 |     default: typeof _Segment;
23 | };
24 | export = __Segment;
25 | export * from './version';
26 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 中文分词器
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | 'use strict';
 7 | const tslib_1 = require("tslib");
 8 | const Segment_1 = require("./lib/Segment");
 9 | const ids_1 = require("@novel-segment/postag/lib/postag/ids");
10 | const _Segment = Segment_1.Segment;
11 | const __Segment = _Segment;
12 | Object.defineProperty(__Segment, "version", {
13 |     get() {
14 |         return require('./version').version;
15 |     }
16 | });
17 | Object.defineProperty(__Segment, "version_dict", {
18 |     get() {
19 |         return require('./version').version_dict;
20 |     }
21 | });
22 | Object.defineProperty(__Segment, "versions", {
23 |     get() {
24 |         return require('./version').versions;
25 |     }
26 | });
27 | // @ts-ignore
28 | tslib_1.__exportStar(require("./version"), exports);
29 | __Segment.POSTAG = ids_1.POSTAG;
30 | __Segment.Segment = Segment_1.Segment;
31 | __Segment.default = __Segment;
32 | module.exports = __Segment;
33 | //# sourceMappingURL=index.js.map


--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 中文分词器
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | 'use strict';
 7 | 
 8 | import { Segment, IWord, IDICT, IOptionsSegment, IDICT2, IDICT_STOPWORD, IDICT_SYNONYM, IOptionsDoSegment } from './lib/Segment';
 9 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
10 | 
11 | const _Segment = Segment as typeof Segment & {
12 | 	version: string,
13 | 	version_dict: string,
14 | 
15 | 	versions: {
16 | 		'novel-segment': string,
17 | 		'segment-dict': string,
18 | 		'regexp-cjk': string,
19 | 		'cjk-conv': string,
20 | 	},
21 | 
22 | 	/**
23 | 	 * 分词接口
24 | 	 */
25 | 	Segment: typeof Segment,
26 | 	/**
27 | 	 * 词性接口
28 | 	 */
29 | 	POSTAG: typeof POSTAG,
30 | };
31 | 
32 | const __Segment = _Segment as typeof _Segment & {
33 | 	default: typeof _Segment,
34 | };
35 | 
36 | Object.defineProperty(__Segment, "version", {
37 | 	get()
38 | 	{
39 | 		return require('./version').version
40 | 	}
41 | });
42 | 
43 | Object.defineProperty(__Segment, "version_dict", {
44 | 	get()
45 | 	{
46 | 		return require('./version').version_dict
47 | 	}
48 | });
49 | 
50 | Object.defineProperty(__Segment, "versions", {
51 | 	get()
52 | 	{
53 | 		return require('./version').versions
54 | 	}
55 | });
56 | 
57 | // @ts-ignore
58 | export = __Segment;
59 | 
60 | // @ts-ignore
61 | export * from './version';
62 | 
63 | __Segment.POSTAG = POSTAG;
64 | __Segment.Segment = Segment;
65 | __Segment.default = __Segment;
66 | 
67 | /*
68 | 使用示例：
69 | 
70 | var segment = new Segment();
71 | // 使用默认的识别模块及字典
72 | segment.useDefault();
73 | // 开始分词
74 | console.log(segment.doSegment('这是一个基于Node.js的中文分词模块。'));
75 | */
76 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
  1 | // @ts-check
  2 | 
  3 | const { basename, extname, dirname } = require('path');
  4 | 
  5 | /**
  6 |  * // @type { import('@jest/types').Config.InitialOptions }
  7 |  * @type { import('ts-jest').InitialOptionsTsJest }
  8 |  */
  9 | let jestConfig = {
 10 | 
 11 | }
 12 | 
 13 | /**
 14 |  * @param {string} name
 15 |  * @returns {string}
 16 |  * @private
 17 |  */
 18 | function _requireResolve(name)
 19 | {
 20 | 	let result;
 21 | 
 22 | 	try
 23 | 	{
 24 | 		// @ts-ignore
 25 | 		const { requireResolveExtra, requireResolveCore } = require('@yarn-tool/require-resolve');
 26 | 
 27 | 		const paths = [
 28 | 			requireResolveExtra('@bluelovers/tsdx').result,
 29 | 			requireResolveExtra('tsdx').result,
 30 | 		].filter(Boolean);
 31 | 
 32 | 		result = requireResolveCore(name, {
 33 | 			includeGlobal: true,
 34 | 			includeCurrentDirectory: true,
 35 | 			paths,
 36 | 		})
 37 | 	}
 38 | 	catch (e)
 39 | 	{
 40 | 
 41 | 	}
 42 | 
 43 | 	result = result || require.resolve(name);
 44 | 
 45 | 	console.info('[require.resolve]', name, '=>', result)
 46 | 
 47 | 	return result
 48 | }
 49 | 
 50 | let _ok = true;
 51 | 
 52 | try
 53 | {
 54 | 	if (!jestConfig.preset)
 55 | 	{
 56 | 
 57 | 		let result = require('@yarn-tool/ws-find-up-paths').findUpPathsWorkspaces([
 58 | 			'jest-preset.js',
 59 | 			'jest.config.js',
 60 | 		], {
 61 | 			ignoreCurrentPackage: true,
 62 | 			onlyFiles: true,
 63 | 		}).result;
 64 | 
 65 | 		if (result)
 66 | 		{
 67 | 			let name = basename(result, extname(result))
 68 | 
 69 | 			switch (name)
 70 | 			{
 71 | 				case 'jest-preset':
 72 | 					jestConfig.preset = dirname(result);
 73 | 					break;
 74 | 				default:
 75 | 					jestConfig = {
 76 | 						...require(result),
 77 | 						jestConfig,
 78 | 					};
 79 | 					break;
 80 | 			}
 81 | 
 82 | 			_ok = false;
 83 | 		}
 84 | 	}
 85 | }
 86 | catch (e)
 87 | {
 88 | 
 89 | }
 90 | 
 91 | try
 92 | {
 93 | 	if (_ok && !jestConfig.preset)
 94 | 	{
 95 | 		let result = _requireResolve('@bluelovers/jest-config/package.json');
 96 | 		if (result)
 97 | 		{
 98 | 			jestConfig.preset = dirname(result);
 99 | 			_ok = false;
100 | 		}
101 | 	}
102 | }
103 | catch (e)
104 | {
105 | 
106 | }
107 | 
108 | if (_ok && !jestConfig.preset)
109 | {
110 | 	jestConfig.preset = '@bluelovers/jest-config';
111 | 	_ok = false;
112 | }
113 | 
114 | console.info(`jest.config.preset: ${jestConfig.preset}`);
115 | 
116 | module.exports = jestConfig
117 | 


--------------------------------------------------------------------------------
/lib/POSTAG.d.ts:
--------------------------------------------------------------------------------
1 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
2 | export { POSTAG };
3 | export default POSTAG;
4 | 


--------------------------------------------------------------------------------
/lib/POSTAG.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | Object.defineProperty(exports, "__esModule", { value: true });
3 | exports.POSTAG = void 0;
4 | const ids_1 = require("@novel-segment/postag/lib/postag/ids");
5 | Object.defineProperty(exports, "POSTAG", { enumerable: true, get: function () { return ids_1.POSTAG; } });
6 | exports.default = ids_1.POSTAG;
7 | //# sourceMappingURL=POSTAG.js.map


--------------------------------------------------------------------------------
/lib/POSTAG.ts:
--------------------------------------------------------------------------------
1 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
2 | 
3 | export { POSTAG }
4 | 
5 | export default POSTAG
6 | 


--------------------------------------------------------------------------------
/lib/const.d.ts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluelovers/node-segment/f5688c9dd2c31a16d7eefd2275aa9b206a5f7134/lib/const.d.ts


--------------------------------------------------------------------------------
/lib/const.js:
--------------------------------------------------------------------------------
1 | //# sourceMappingURL=const.js.map


--------------------------------------------------------------------------------
/lib/const.ts:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lib/defaults/dict.d.ts:
--------------------------------------------------------------------------------
1 | import { Segment } from '../Segment';
2 | import { IUseDefaultOptionsDicts } from './index';
3 | export declare function useDefaultDicts(segment: Segment, options?: IUseDefaultOptionsDicts): Segment;
4 | export declare function useDefaultSynonymDict(segment: Segment, options?: IUseDefaultOptionsDicts): Segment;
5 | export declare function useDefaultBlacklistDict(segment: Segment, options?: IUseDefaultOptionsDicts): Segment;
6 | 


--------------------------------------------------------------------------------
/lib/defaults/dict.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.useDefaultDicts = useDefaultDicts;
 4 | exports.useDefaultSynonymDict = useDefaultSynonymDict;
 5 | exports.useDefaultBlacklistDict = useDefaultBlacklistDict;
 6 | function useDefaultDicts(segment, options = {}) {
 7 |     if (!options.nodict) {
 8 |         // 字典文件
 9 |         segment
10 |             //.loadDict('jieba') <=== bad file
11 |             .loadDict('char')
12 |             // 盘古词典
13 |             .loadDict('pangu/phrases')
14 |             .loadDict('pangu/phrases2')
15 |             .loadDict('phrases/*')
16 |             .loadDict('dict')
17 |             .loadDict('dict2')
18 |             .loadDict('dict3')
19 |             .loadDict('dict4')
20 |             .loadDict('pangu/dict005')
21 |             .loadDict('pangu/dict006')
22 |             //.loadDict('synonym/后')
23 |             //.loadDict('synonym/參')
24 |             //.loadDict('synonym/发')
25 |             .loadDict('dict_synonym/*')
26 |             //.loadDict('pangu/wildcard', 'WILDCARD', true)   // 通配符
27 |             .loadStopwordDict('stopword') // 停止符
28 |             .loadDict('lazy/dict_synonym')
29 |             /*
30 |             .loadDict('names/area')
31 |             .loadDict('names/job')
32 |             .loadDict('names/food')
33 | 
34 |             .loadDict('names/other')
35 |             .loadDict('names/jp')
36 |             .loadDict('names/zh')
37 |             .loadDict('names/en')
38 |             .loadDict('names/name')
39 |              */
40 |             .loadDict('names/*')
41 |             .loadDict('lazy/*')
42 |             .loadDict('pangu/num')
43 |             .loadDict('lazy/badword')
44 |             .loadDict('pangu/wildcard', 'WILDCARD', true);
45 |         useDefaultSynonymDict(segment, options);
46 |         useDefaultBlacklistDict(segment, options);
47 |         segment.doBlacklist();
48 |     }
49 |     return segment;
50 | }
51 | function useDefaultSynonymDict(segment, options = {}) {
52 |     if (!options.nodict) {
53 |         segment
54 |             .loadSynonymDict('synonym') // 同义词
55 |             .loadSynonymDict('zht.synonym', false);
56 |         if (options.nodeNovelMode) {
57 |             segment
58 |                 .loadSynonymDict('badword.synonym', false)
59 |                 .loadSynonymDict('zht.common.synonym', false);
60 |         }
61 |     }
62 |     return segment;
63 | }
64 | function useDefaultBlacklistDict(segment, options = {}) {
65 |     if (!options.nodict) {
66 |         segment
67 |             .loadBlacklistDict('blacklist')
68 |             .loadBlacklistOptimizerDict('blacklist.name')
69 |             .loadBlacklistSynonymDict('blacklist.synonym');
70 |     }
71 |     return segment;
72 | }
73 | //# sourceMappingURL=dict.js.map


--------------------------------------------------------------------------------
/lib/defaults/dict.ts:
--------------------------------------------------------------------------------
  1 | import { Segment } from '../Segment';
  2 | import { IUseDefaultOptionsDicts } from './index';
  3 | 
  4 | export function useDefaultDicts(segment: Segment, options: IUseDefaultOptionsDicts = {})
  5 | {
  6 | 	if (!options.nodict)
  7 | 	{
  8 | 		// 字典文件
  9 | 		segment
 10 | 		//.loadDict('jieba') <=== bad file
 11 | 
 12 | 			.loadDict('char')
 13 | 
 14 | 			// 盘古词典
 15 | 			.loadDict('pangu/phrases')
 16 | 			.loadDict('pangu/phrases2')
 17 | 			.loadDict('phrases/*')
 18 | 
 19 | 			.loadDict('dict')
 20 | 			.loadDict('dict2')
 21 | 			.loadDict('dict3')
 22 | 			.loadDict('dict4')
 23 | 			.loadDict('pangu/dict005')
 24 | 			.loadDict('pangu/dict006')
 25 | 
 26 | 			//.loadDict('synonym/后')
 27 | 			//.loadDict('synonym/參')
 28 | 			//.loadDict('synonym/发')
 29 | 			.loadDict('dict_synonym/*')
 30 | 
 31 | 			//.loadDict('pangu/wildcard', 'WILDCARD', true)   // 通配符
 32 | 
 33 | 			.loadStopwordDict('stopword') // 停止符
 34 | 
 35 | 			.loadDict('lazy/dict_synonym')
 36 | 
 37 | 			/*
 38 | 			.loadDict('names/area')
 39 | 			.loadDict('names/job')
 40 | 			.loadDict('names/food')
 41 | 
 42 | 			.loadDict('names/other')
 43 | 			.loadDict('names/jp')
 44 | 			.loadDict('names/zh')
 45 | 			.loadDict('names/en')
 46 | 			.loadDict('names/name')
 47 | 			 */
 48 | 
 49 | 			.loadDict('names/*')
 50 | 
 51 | 			.loadDict('lazy/*')
 52 | 
 53 | 			.loadDict('pangu/num')
 54 | 
 55 | 			.loadDict('lazy/badword')
 56 | 
 57 | 			.loadDict('pangu/wildcard', 'WILDCARD', true)
 58 | 		;
 59 | 
 60 | 		useDefaultSynonymDict(segment, options);
 61 | 		useDefaultBlacklistDict(segment, options);
 62 | 
 63 | 		segment.doBlacklist();
 64 | 	}
 65 | 
 66 | 	return segment
 67 | }
 68 | 
 69 | export function useDefaultSynonymDict(segment: Segment, options: IUseDefaultOptionsDicts = {})
 70 | {
 71 | 	if (!options.nodict)
 72 | 	{
 73 | 		segment
 74 | 		.loadSynonymDict('synonym')   // 同义词
 75 | 		.loadSynonymDict('zht.synonym', false)
 76 | 		;
 77 | 
 78 | 		if (options.nodeNovelMode)
 79 | 		{
 80 | 			segment
 81 | 				.loadSynonymDict('badword.synonym', false)
 82 | 				.loadSynonymDict('zht.common.synonym', false)
 83 | 		}
 84 | 
 85 | 	}
 86 | 
 87 | 	return segment
 88 | }
 89 | 
 90 | export function useDefaultBlacklistDict(segment: Segment, options: IUseDefaultOptionsDicts = {})
 91 | {
 92 | 	if (!options.nodict)
 93 | 	{
 94 | 		segment
 95 | 			.loadBlacklistDict('blacklist')
 96 | 			.loadBlacklistOptimizerDict('blacklist.name')
 97 | 			.loadBlacklistSynonymDict('blacklist.synonym')
 98 | 		;
 99 | 	}
100 | 
101 | 	return segment
102 | }
103 | 


--------------------------------------------------------------------------------
/lib/defaults/index.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/6/26.
 3 |  */
 4 | import { Segment } from '../Segment';
 5 | /**
 6 |  * @private
 7 |  */
 8 | export interface IUseDefaultOptionsDicts {
 9 |     /**
10 |      * 不載入 字典
11 |      */
12 |     nodict?: boolean;
13 |     /**
14 |      * 載入 node-novel 相關字典
15 |      */
16 |     nodeNovelMode?: boolean;
17 | }
18 | /**
19 |  * @private
20 |  */
21 | export interface IUseDefaultOptionsMods {
22 |     all_mod?: boolean;
23 |     nomod?: boolean;
24 | }
25 | export interface IUseDefaultOptions extends IUseDefaultOptionsDicts, IUseDefaultOptionsMods {
26 | }
27 | export declare function useDefault(segment: Segment, options?: IUseDefaultOptions): Segment;
28 | 


--------------------------------------------------------------------------------
/lib/defaults/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.useDefault = useDefault;
 4 | const mods_1 = require("./mods");
 5 | const dict_1 = require("./dict");
 6 | function useDefault(segment, options = {}) {
 7 |     // 识别模块
 8 |     !options.nomod && (0, mods_1.useDefaultMods)(segment, options);
 9 |     // 字典文件
10 |     !options.nodict && (0, dict_1.useDefaultDicts)(segment, options);
11 |     return segment;
12 | }
13 | //# sourceMappingURL=index.js.map


--------------------------------------------------------------------------------
/lib/defaults/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/6/26.
 3 |  */
 4 | import { Segment } from '../Segment';
 5 | import { useDefaultMods } from './mods';
 6 | import { useDefaultDicts } from './dict';
 7 | 
 8 | /**
 9 |  * @private
10 |  */
11 | export interface IUseDefaultOptionsDicts
12 | {
13 | 	/**
14 | 	 * 不載入 字典
15 | 	 */
16 | 	nodict?: boolean,
17 | 	/**
18 | 	 * 載入 node-novel 相關字典
19 | 	 */
20 | 	nodeNovelMode?: boolean,
21 | }
22 | 
23 | /**
24 |  * @private
25 |  */
26 | export interface IUseDefaultOptionsMods
27 | {
28 | 	all_mod?: boolean,
29 | 	nomod?: boolean,
30 | }
31 | 
32 | export interface IUseDefaultOptions extends IUseDefaultOptionsDicts, IUseDefaultOptionsMods
33 | {
34 | 
35 | }
36 | 
37 | export function useDefault(segment: Segment, options: IUseDefaultOptions = {})
38 | {
39 | 
40 | 	// 识别模块
41 | 	!options.nomod && useDefaultMods(segment, options);
42 | 
43 | 	// 字典文件
44 | 	!options.nodict && useDefaultDicts(segment, options);
45 | 
46 | 	return segment;
47 | }
48 | 


--------------------------------------------------------------------------------
/lib/defaults/mods.d.ts:
--------------------------------------------------------------------------------
1 | import { IUseDefaultOptionsMods } from './index';
2 | export declare function useDefaultMods(segment: any, options?: IUseDefaultOptionsMods): any;
3 | 


--------------------------------------------------------------------------------
/lib/defaults/mods.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.useDefaultMods = useDefaultMods;
 4 | const tslib_1 = require("tslib");
 5 | const index_1 = tslib_1.__importDefault(require("../mod/index"));
 6 | function useDefaultMods(segment, options = {}) {
 7 |     !options.nomod && segment.use((0, index_1.default)(options.all_mod));
 8 |     return segment;
 9 | }
10 | //# sourceMappingURL=mods.js.map


--------------------------------------------------------------------------------
/lib/defaults/mods.ts:
--------------------------------------------------------------------------------
 1 | import { IUseDefaultOptionsMods } from './index';
 2 | import getDefaultModList from '../mod/index';
 3 | 
 4 | export function useDefaultMods(segment, options: IUseDefaultOptionsMods = {})
 5 | {
 6 | 	!options.nomod && segment.use(getDefaultModList(options.all_mod));
 7 | 
 8 | 	return segment
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/lib/fs/get.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/13/013.
 3 |  */
 4 | export type IOptions = {
 5 |     extensions?: string[];
 6 |     paths: string[];
 7 |     onlyDir?: boolean;
 8 |     onlyFile?: boolean;
 9 | };
10 | export declare function searchGlobSync(file: string, options: IOptions): string[];
11 | export declare function searchGlobSync(file: string, paths?: string[]): string[];
12 | export declare function _searchGlobSync(file: any, options: IOptions, cwd?: string): string[];
13 | export declare function searchFirstSync(file: string, options: IOptions): string;
14 | export declare function searchFirstSync(file: string, paths?: string[]): string;
15 | export declare function existsSync(path: string, options?: {
16 |     onlyDir?: boolean;
17 |     onlyFile?: boolean;
18 | }): boolean;
19 | export declare function getOptions<T extends IOptions>(options: T & IOptions): T & IOptions;
20 | export declare function getOptions(paths: string[]): IOptions;
21 | export declare function getOptions(options: IOptions | string[]): options is IOptions;
22 | export default searchFirstSync;
23 | 


--------------------------------------------------------------------------------
/lib/index.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/16/016.
 3 |  */
 4 | import getDefaultModList from './mod';
 5 | import { Segment } from './Segment';
 6 | import { useDefault } from './defaults';
 7 | export { getDefaultModList };
 8 | export { Segment };
 9 | export { useDefault };
10 | export default Segment;
11 | 


--------------------------------------------------------------------------------
/lib/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/4/16/016.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.useDefault = exports.Segment = exports.getDefaultModList = void 0;
 7 | const tslib_1 = require("tslib");
 8 | const mod_1 = tslib_1.__importDefault(require("./mod"));
 9 | exports.getDefaultModList = mod_1.default;
10 | const Segment_1 = require("./Segment");
11 | Object.defineProperty(exports, "Segment", { enumerable: true, get: function () { return Segment_1.Segment; } });
12 | const defaults_1 = require("./defaults");
13 | Object.defineProperty(exports, "useDefault", { enumerable: true, get: function () { return defaults_1.useDefault; } });
14 | exports.default = Segment_1.Segment;
15 | //# sourceMappingURL=index.js.map


--------------------------------------------------------------------------------
/lib/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/16/016.
 3 |  */
 4 | 
 5 | import getDefaultModList from './mod';
 6 | import { Segment } from './Segment';
 7 | import { useDefault } from './defaults';
 8 | 
 9 | export { getDefaultModList }
10 | 
11 | export { Segment }
12 | 
13 | export { useDefault }
14 | 
15 | export default Segment;
16 | 


--------------------------------------------------------------------------------
/lib/loader.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/2/24/024.
 3 |  */
 4 | import SegmentDict from 'segment-dict';
 5 | import * as SegmentDictLoader from 'segment-dict/lib/loader/segment';
 6 | import * as SegmentSynonymLoader from '@novel-segment/loaders/segment/synonym';
 7 | export { SegmentDict };
 8 | export { SegmentDictLoader, SegmentSynonymLoader };
 9 | declare const _default: {
10 |     SegmentDict: typeof import("segment-dict");
11 |     SegmentDictLoader: typeof SegmentDictLoader;
12 |     SegmentSynonymLoader: typeof SegmentSynonymLoader;
13 | };
14 | export default _default;
15 | 


--------------------------------------------------------------------------------
/lib/loader.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/2/24/024.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.SegmentSynonymLoader = exports.SegmentDictLoader = exports.SegmentDict = void 0;
 7 | const tslib_1 = require("tslib");
 8 | const segment_dict_1 = tslib_1.__importDefault(require("segment-dict"));
 9 | exports.SegmentDict = segment_dict_1.default;
10 | const SegmentDictLoader = tslib_1.__importStar(require("segment-dict/lib/loader/segment"));
11 | exports.SegmentDictLoader = SegmentDictLoader;
12 | const SegmentSynonymLoader = tslib_1.__importStar(require("@novel-segment/loaders/segment/synonym"));
13 | exports.SegmentSynonymLoader = SegmentSynonymLoader;
14 | exports.default = {
15 |     SegmentDict: segment_dict_1.default,
16 |     SegmentDictLoader,
17 |     SegmentSynonymLoader,
18 | };
19 | //# sourceMappingURL=loader.js.map


--------------------------------------------------------------------------------
/lib/loader.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/2/24/024.
 3 |  */
 4 | 
 5 | // @ts-ignore
 6 | import * as fs from 'fs';
 7 | import SegmentDict from 'segment-dict';
 8 | import * as SegmentDictLoader from 'segment-dict/lib/loader/segment';
 9 | import * as SegmentSynonymLoader from '@novel-segment/loaders/segment/synonym';
10 | 
11 | export { SegmentDict }
12 | export { SegmentDictLoader, SegmentSynonymLoader }
13 | 
14 | export default {
15 | 	SegmentDict,
16 | 	SegmentDictLoader,
17 | 	SegmentSynonymLoader,
18 | };
19 | 


--------------------------------------------------------------------------------
/lib/mod/CHS_NAMES.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 中文姓
 3 |  */
 4 | import { IDICT } from '../Segment';
 5 | export declare namespace _CHS_NAMES {
 6 |     const FAMILY_NAME_1: string[];
 7 |     const FAMILY_NAME_2: string[];
 8 |     const DOUBLE_NAME_1: string[];
 9 |     const DOUBLE_NAME_2: string[];
10 |     const SINGLE_NAME: string[];
11 |     const SINGLE_NAME_NO_REPEAT: string[];
12 |     const SHARE_NAME: string[];
13 |     function p(a: string[], n: number): IDICT<number>;
14 | }
15 | /**
16 |  * 单姓
17 |  */
18 | export declare const FAMILY_NAME_1: IDICT<number>;
19 | /**
20 |  * 复姓
21 |  */
22 | export declare const FAMILY_NAME_2: IDICT<number>;
23 | /**
24 |  * 双字姓名第一个字
25 |  */
26 | export declare const DOUBLE_NAME_1: IDICT<number>;
27 | /**
28 |  * 双字姓名第二个字
29 |  */
30 | export declare const DOUBLE_NAME_2: IDICT<number>;
31 | /**
32 |  * 单字姓名
33 |  */
34 | export declare const SINGLE_NAME: IDICT<number>;
35 | /**
36 |  * 单字姓名 不重覆
37 |  */
38 | export declare const SINGLE_NAME_NO_REPEAT: IDICT<number>;
39 | declare const _default: typeof import("./CHS_NAMES");
40 | export default _default;
41 | 


--------------------------------------------------------------------------------
/lib/mod/COLORS.d.ts:
--------------------------------------------------------------------------------
 1 | import { IDICT } from '../Segment';
 2 | export declare namespace _COLORS {
 3 |     const ZH = "\u8272";
 4 |     const COLOR_HAIR: string[];
 5 |     const COLOR_WITH_RGB: string[][];
 6 |     const COLOR_ALL: string[];
 7 |     function p(a: string[]): IDICT<number>;
 8 | }
 9 | export declare const COLOR_HAIR: IDICT<number>;
10 | export declare const COLOR_ALL: IDICT<number>;
11 | declare const _default: typeof import("./COLORS");
12 | export default _default;
13 | 


--------------------------------------------------------------------------------
/lib/mod/Optimizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { IWord, Segment } from '../Segment';
 2 | import { ISubSModule, SModule, SubSModule } from './mod';
 3 | export type ISubOptimizer = ISubSModule & {
 4 |     type: 'optimizer';
 5 |     doOptimize(words: IWord[], ...argv: any[]): IWord[];
 6 | };
 7 | export type ISubOptimizerCreate<T extends SubSModuleOptimizer, R extends SubSModuleOptimizer = SubSModuleOptimizer> = {
 8 |     (segment: Segment, ...argv: any[]): T & R;
 9 | };
10 | export declare class SubSModuleOptimizer extends SubSModule implements ISubOptimizer {
11 |     static readonly type = "optimizer";
12 |     readonly type = "optimizer";
13 |     doOptimize(words: IWord[], ...argv: any[]): IWord[];
14 |     init(segment: Segment, ...argv: any[]): this;
15 |     static init<T extends SubSModuleOptimizer = SubSModuleOptimizer>(segment: Segment, ...argv: any[]): T;
16 | }
17 | /**
18 |  * 分词模块管理器
19 |  */
20 | export declare class Optimizer extends SModule {
21 |     type: string;
22 |     /**
23 |      * 对一段文本进行分词
24 |      *
25 |      * @param {array} words 单词数组
26 |      * @param {array} modules 分词模块数组
27 |      * @return {array}
28 |      */
29 |     doOptimize(words: IWord[], mods: ISubOptimizer[], ...argv: any[]): IWord[];
30 | }
31 | export default Optimizer;
32 | 


--------------------------------------------------------------------------------
/lib/mod/Optimizer.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 优化模块管理器
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | 'use strict';
 7 | Object.defineProperty(exports, "__esModule", { value: true });
 8 | exports.Optimizer = exports.SubSModuleOptimizer = void 0;
 9 | const tslib_1 = require("tslib");
10 | const core_decorators_1 = require("core-decorators");
11 | const mod_1 = require("./mod");
12 | let SubSModuleOptimizer = class SubSModuleOptimizer extends mod_1.SubSModule {
13 |     constructor() {
14 |         super(...arguments);
15 |         this.type = 'optimizer';
16 |     }
17 |     doOptimize(words, ...argv) {
18 |         throw new Error();
19 |     }
20 |     init(segment, ...argv) {
21 |         super.init(segment, ...argv);
22 |         return this;
23 |     }
24 |     static init(segment, ...argv) {
25 |         // @ts-ignore
26 |         return super.init(segment, ...argv);
27 |     }
28 | };
29 | exports.SubSModuleOptimizer = SubSModuleOptimizer;
30 | SubSModuleOptimizer.type = 'optimizer';
31 | exports.SubSModuleOptimizer = SubSModuleOptimizer = tslib_1.__decorate([
32 |     core_decorators_1.autobind
33 |     // @ts-ignore
34 | ], SubSModuleOptimizer);
35 | /**
36 |  * 分词模块管理器
37 |  */
38 | class Optimizer extends mod_1.SModule {
39 |     constructor() {
40 |         super(...arguments);
41 |         this.type = 'optimizer';
42 |     }
43 |     /**
44 |      * 对一段文本进行分词
45 |      *
46 |      * @param {array} words 单词数组
47 |      * @param {array} modules 分词模块数组
48 |      * @return {array}
49 |      */
50 |     doOptimize(words, mods, ...argv) {
51 |         return this._doMethod('doOptimize', words, mods, ...argv);
52 |     }
53 | }
54 | exports.Optimizer = Optimizer;
55 | exports.default = Optimizer;
56 | //# sourceMappingURL=Optimizer.js.map


--------------------------------------------------------------------------------
/lib/mod/Optimizer.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 优化模块管理器
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | 'use strict';
 7 | 
 8 | import { autobind } from 'core-decorators';
 9 | import { IWord, Segment } from '../Segment';
10 | import { ISubSModule, SModule, SubSModule } from './mod';
11 | 
12 | export type ISubOptimizer = ISubSModule & {
13 | 	type: 'optimizer',
14 | 	doOptimize(words: IWord[], ...argv): IWord[],
15 | }
16 | 
17 | export type ISubOptimizerCreate<T extends SubSModuleOptimizer, R extends SubSModuleOptimizer = SubSModuleOptimizer> = {
18 | 	(segment: Segment, ...argv): T & R,
19 | };
20 | 
21 | @autobind
22 | // @ts-ignore
23 | export class SubSModuleOptimizer extends SubSModule implements ISubOptimizer
24 | {
25 | 	public static override readonly type = 'optimizer';
26 | 	public override readonly type = 'optimizer';
27 | 
28 | 	public doOptimize(words: IWord[], ...argv): IWord[]
29 | 	{
30 | 		throw new Error();
31 | 	}
32 | 
33 | 	public override init(segment: Segment, ...argv)
34 | 	{
35 | 		super.init(segment, ...argv);
36 | 
37 | 		return this;
38 | 	}
39 | 
40 | 	public static override init<T extends SubSModuleOptimizer = SubSModuleOptimizer>(segment: Segment, ...argv): T
41 | 	{
42 | 		// @ts-ignore
43 | 		return super.init<T>(segment, ...argv);
44 | 	}
45 | }
46 | 
47 | /**
48 |  * 分词模块管理器
49 |  */
50 | export class Optimizer extends SModule
51 | {
52 | 	override type = 'optimizer';
53 | 
54 | 	/**
55 | 	 * 对一段文本进行分词
56 | 	 *
57 | 	 * @param {array} words 单词数组
58 | 	 * @param {array} modules 分词模块数组
59 | 	 * @return {array}
60 | 	 */
61 | 	doOptimize(words: IWord[], mods: ISubOptimizer[], ...argv): IWord[]
62 | 	{
63 | 		return this._doMethod('doOptimize', words, mods, ...argv);
64 | 	}
65 | }
66 | 
67 | export default Optimizer;
68 | 


--------------------------------------------------------------------------------
/lib/mod/Tokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 分词模块管理器
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | import { IWord, Segment } from '../Segment';
 7 | import { ISubSModule, SModule, SubSModule } from './mod';
 8 | export type ISubTokenizer = ISubSModule & {
 9 |     type: 'tokenizer';
10 |     split(words: IWord[], ...argv: any[]): IWord[];
11 | };
12 | export type ISubTokenizerCreate<T extends SubSModuleTokenizer, R extends SubSModuleTokenizer = SubSModuleTokenizer> = {
13 |     (...argv: Parameters<T["init"]>): T & R;
14 |     (segment: Segment, ...argv: any[]): T & R;
15 | };
16 | export declare abstract class SubSModuleTokenizer extends SubSModule implements ISubTokenizer {
17 |     static readonly type = "tokenizer";
18 |     readonly type = "tokenizer";
19 |     abstract split(words: IWord[], ...argv: any[]): IWord[];
20 |     init(segment: Segment, ...argv: any[]): this;
21 |     static init<T extends SubSModuleTokenizer = SubSModuleTokenizer>(segment: Segment, ...argv: any[]): T;
22 |     /**
23 |      * 仅对未识别的词进行匹配
24 |      * 不包含 p 為 0
25 |      */
26 |     protected _splitUnset<T extends IWord, U extends IWord = T>(words: T[], fn: (text: string, ...argv: any[]) => U[]): U[];
27 |     /**
28 |      * 仅对未识别的词进行匹配
29 |      * 包含已存在 但 p 為 0
30 |      */
31 |     protected _splitUnknow<T extends IWord, U extends IWord = T>(words: T[], fn: (text: string, ...argv: any[]) => U[]): U[];
32 | }
33 | /**
34 |  * 分词模块管理器
35 |  */
36 | export declare class Tokenizer extends SModule {
37 |     type: string;
38 |     /**
39 |      * 对一段文本进行分词
40 |      *
41 |      * @param {string} text 文本
42 |      * @param {array} modules 分词模块数组
43 |      * @return {array}
44 |      */
45 |     split(text: string, mods: ISubTokenizer[], ...argv: any[]): IWord[];
46 | }
47 | export default Tokenizer;
48 | 


--------------------------------------------------------------------------------
/lib/mod/const.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/19/019.
 3 |  */
 4 | import { IDICT } from '../Segment';
 5 | /**
 6 |  * 日期时间常见组合
 7 |  */
 8 | export declare let _DATETIME: string[];
 9 | export declare const DATETIME: IDICT<number>;
10 | declare const _default: typeof import("./const");
11 | export default _default;
12 | 


--------------------------------------------------------------------------------
/lib/mod/const.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/4/19/019.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.DATETIME = exports._DATETIME = void 0;
 7 | const list_1 = require("@lazy-cjk/zh-table-list/list");
 8 | /**
 9 |  * 日期时间常见组合
10 |  */
11 | exports._DATETIME = [
12 |     '世纪', '年', '年份', '年度', '月', '月份', '月度', '日', '号',
13 |     '时', '点', '点钟', '分', '分钟', '秒', '毫秒'
14 | ];
15 | exports.DATETIME = (0, list_1.arrCjk)(exports._DATETIME)
16 |     .reduce(function (data, v) {
17 |     data[v] = v.length;
18 |     return data;
19 | }, {});
20 | exports.default = exports;
21 | //# sourceMappingURL=const.js.map


--------------------------------------------------------------------------------
/lib/mod/const.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/19/019.
 3 |  */
 4 | 
 5 | import { IDICT } from '../Segment';
 6 | import { arrCjk as arr_cjk } from '@lazy-cjk/zh-table-list/list';
 7 | 
 8 | /**
 9 |  * 日期时间常见组合
10 |  */
11 | export let _DATETIME = [
12 | 	'世纪', '年', '年份', '年度', '月', '月份', '月度', '日', '号',
13 | 	'时', '点', '点钟', '分', '分钟', '秒', '毫秒'
14 | ];
15 | 
16 | export const DATETIME: IDICT<number> = arr_cjk(_DATETIME)
17 | 	.reduce(function (data, v)
18 | 	{
19 | 		data[v] = v.length;
20 | 
21 | 		return data;
22 | 	}, {})
23 | ;
24 | 
25 | export default exports as typeof import('./const');
26 | 


--------------------------------------------------------------------------------
/lib/mod/data/STOPWORD.d.ts:
--------------------------------------------------------------------------------
 1 | export declare namespace NS_STOPWORD {
 2 |     const _TABLE: string;
 3 |     const _STOPWORD: string[], STOPWORD: {
 4 |         [key: string]: number;
 5 |     }, STOPWORD2: {
 6 |         [key: number]: {
 7 |             [key: string]: number;
 8 |         };
 9 |     };
10 |     function parseStopWord(_STOPWORD: string | string[]): {
11 |         _STOPWORD: string[];
12 |         STOPWORD: {
13 |             [key: string]: number;
14 |         };
15 |         STOPWORD2: {
16 |             [key: number]: {
17 |                 [key: string]: number;
18 |             };
19 |         };
20 |     };
21 | }
22 | export declare const _STOPWORD: string[], STOPWORD: {
23 |     [key: string]: number;
24 | }, STOPWORD2: {
25 |     [key: number]: {
26 |         [key: string]: number;
27 |     };
28 | };
29 | declare const _default: typeof import("./STOPWORD");
30 | export default _default;
31 | 


--------------------------------------------------------------------------------
/lib/mod/data/STOPWORD.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.STOPWORD2 = exports.STOPWORD = exports._STOPWORD = exports.NS_STOPWORD = void 0;
 4 | const array_hyper_unique_1 = require("array-hyper-unique");
 5 | var NS_STOPWORD;
 6 | (function (NS_STOPWORD) {
 7 |     var _a;
 8 |     NS_STOPWORD._TABLE = [
 9 |         ' ,.;+-|/\\\'":?<>[]{}=!@#$%^&*()~`' +
10 |             '。，、＇：∶；?‘’“”〝〞ˆˇ﹕︰﹔﹖﹑·¨….¸;！´？！～—ˉ｜‖＂〃｀@﹫¡¿﹏﹋﹌︴々﹟#﹩$﹠&﹪%*﹡﹢﹦' +
11 |             '﹤‐￣¯―﹨ˆ˜﹍﹎+=<­＿_-\ˇ~﹉﹊（）〈〉‹›﹛﹜『』〖〗［］《》〔〕{}「」【】︵︷︿︹︽_﹁﹃︻︶︸' +
12 |             '﹀︺︾ˉ﹂﹄︼＋－×÷﹢﹣±／＝≈≡≠∧∨∑∏∪∩∈⊙⌒⊥∥∠∽≌＜＞≤≥≮≯∧∨√﹙﹚[]﹛﹜∫∮∝∞⊙∏' +
13 |             '┌┬┐┏┳┓╒╤╕─│├┼┤┣╋┫╞╪╡━┃└┴┘┗┻┛╘╧╛┄┆┅┇╭─╮┏━┓╔╦╗┈┊│╳│┃┃╠╬╣┉┋╰─╯┗━┛' +
14 |             '╚╩╝╲╱┞┟┠┡┢┦┧┨┩┪╉╊┭┮┯┰┱┲┵┶┷┸╇╈┹┺┽┾┿╀╁╂╃╄╅╆' +
15 |             '○◇□△▽☆●◆■▲▼★♠♥♦♣☼☺◘♀√☻◙♂×▁▂▃▄▅▆▇█⊙◎۞卍卐╱╲▁▏↖↗↑←↔◤◥╲╱▔▕↙↘↓→↕◣◢∷▒░℡™',
16 |         '．・　※',
17 |         '⋯',
18 |         /**
19 |          * 丶並非標點符號 而為部首 但有的人會用這個作為 標點符號使用
20 |          */
21 |         '丶',
22 |     ].join('');
23 |     _a = parseStopWord(NS_STOPWORD._TABLE), NS_STOPWORD._STOPWORD = _a._STOPWORD, NS_STOPWORD.STOPWORD = _a.STOPWORD, NS_STOPWORD.STOPWORD2 = _a.STOPWORD2;
24 |     function parseStopWord(_STOPWORD) {
25 |         var _a;
26 |         if (typeof _STOPWORD === 'string') {
27 |             _STOPWORD = _STOPWORD.split('');
28 |             //_STOPWORD = UString.split(_STOPWORD, '');
29 |         }
30 |         else if (!Array.isArray(_STOPWORD)) {
31 |             throw new TypeError(`table must is string or string[]`);
32 |         }
33 |         _STOPWORD = (0, array_hyper_unique_1.array_unique)(_STOPWORD);
34 |         let STOPWORD = {};
35 |         let STOPWORD2 = {};
36 |         for (const _STOPWORDItem of _STOPWORD) {
37 |             if (_STOPWORDItem === '')
38 |                 continue;
39 |             let len = _STOPWORDItem.length;
40 |             STOPWORD[_STOPWORDItem] = len;
41 |             STOPWORD2[len] = (_a = STOPWORD2[len]) !== null && _a !== void 0 ? _a : {};
42 |             STOPWORD2[len][_STOPWORDItem] = len;
43 |         }
44 |         return {
45 |             _STOPWORD,
46 |             STOPWORD,
47 |             STOPWORD2,
48 |         };
49 |     }
50 |     NS_STOPWORD.parseStopWord = parseStopWord;
51 | })(NS_STOPWORD || (exports.NS_STOPWORD = NS_STOPWORD = {}));
52 | exports._STOPWORD = NS_STOPWORD._STOPWORD, exports.STOPWORD = NS_STOPWORD.STOPWORD, exports.STOPWORD2 = NS_STOPWORD.STOPWORD2;
53 | exports.default = exports;
54 | //# sourceMappingURL=STOPWORD.js.map


--------------------------------------------------------------------------------
/lib/mod/data/STOPWORD.ts:
--------------------------------------------------------------------------------
 1 | import { array_unique } from 'array-hyper-unique';
 2 | 
 3 | export namespace NS_STOPWORD
 4 | {
 5 | 	export const _TABLE = [
 6 | 		' ,.;+-|/\\\'":?<>[]{}=!@#$%^&*()~`' +
 7 | 		'。，、＇：∶；?‘’“”〝〞ˆˇ﹕︰﹔﹖﹑·¨….¸;！´？！～—ˉ｜‖＂〃｀@﹫¡¿﹏﹋﹌︴々﹟#﹩$﹠&﹪%*﹡﹢﹦' +
 8 | 		'﹤‐￣¯―﹨ˆ˜﹍﹎+=<­＿_-\ˇ~﹉﹊（）〈〉‹›﹛﹜『』〖〗［］《》〔〕{}「」【】︵︷︿︹︽_﹁﹃︻︶︸' +
 9 | 		'﹀︺︾ˉ﹂﹄︼＋－×÷﹢﹣±／＝≈≡≠∧∨∑∏∪∩∈⊙⌒⊥∥∠∽≌＜＞≤≥≮≯∧∨√﹙﹚[]﹛﹜∫∮∝∞⊙∏' +
10 | 		'┌┬┐┏┳┓╒╤╕─│├┼┤┣╋┫╞╪╡━┃└┴┘┗┻┛╘╧╛┄┆┅┇╭─╮┏━┓╔╦╗┈┊│╳│┃┃╠╬╣┉┋╰─╯┗━┛' +
11 | 		'╚╩╝╲╱┞┟┠┡┢┦┧┨┩┪╉╊┭┮┯┰┱┲┵┶┷┸╇╈┹┺┽┾┿╀╁╂╃╄╅╆' +
12 | 		'○◇□△▽☆●◆■▲▼★♠♥♦♣☼☺◘♀√☻◙♂×▁▂▃▄▅▆▇█⊙◎۞卍卐╱╲▁▏↖↗↑←↔◤◥╲╱▔▕↙↘↓→↕◣◢∷▒░℡™',
13 | 		'．・　※',
14 | 		'⋯',
15 | 		/**
16 | 		 * 丶並非標點符號 而為部首 但有的人會用這個作為 標點符號使用
17 | 		 */
18 | 		'丶',
19 | 	].join('');
20 | 
21 | 	export const { _STOPWORD, STOPWORD, STOPWORD2 } = parseStopWord(_TABLE);
22 | 
23 | 	export function parseStopWord(_STOPWORD: string | string[])
24 | 	{
25 | 		if (typeof _STOPWORD === 'string')
26 | 		{
27 | 			_STOPWORD = _STOPWORD.split('');
28 | 			//_STOPWORD = UString.split(_STOPWORD, '');
29 | 		}
30 | 		else if (!Array.isArray(_STOPWORD))
31 | 		{
32 | 			throw new TypeError(`table must is string or string[]`)
33 | 		}
34 | 
35 | 		_STOPWORD = array_unique(_STOPWORD);
36 | 
37 | 		let STOPWORD = {} as {
38 | 			[key: string]: number,
39 | 		};
40 | 		let STOPWORD2 = {} as {
41 | 			[key: number]: typeof STOPWORD,
42 | 		};
43 | 
44 | 		for (const _STOPWORDItem of _STOPWORD)
45 | 		{
46 | 			if (_STOPWORDItem === '') continue;
47 | 			let len = _STOPWORDItem.length;
48 | 			STOPWORD[_STOPWORDItem] = len;
49 | 			STOPWORD2[len] = STOPWORD2[len] ?? {};
50 | 			STOPWORD2[len][_STOPWORDItem] = len;
51 | 		}
52 | 
53 | 		return {
54 | 			_STOPWORD,
55 | 			STOPWORD,
56 | 			STOPWORD2,
57 | 		}
58 | 	}
59 | }
60 | 
61 | export const { _STOPWORD, STOPWORD, STOPWORD2 } = NS_STOPWORD;
62 | 
63 | export default exports as typeof import('./STOPWORD');
64 | 


--------------------------------------------------------------------------------
/lib/mod/index.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/16/016.
 3 |  */
 4 | import { ISubOptimizer, ISubOptimizerCreate, Optimizer, SubSModuleOptimizer } from './Optimizer';
 5 | import { ISubTokenizer, ISubTokenizerCreate, SubSModuleTokenizer, Tokenizer } from './Tokenizer';
 6 | import { ISubSModule, ISubSModuleCreate, ISubSModuleMethod, SubSModule } from './mod';
 7 | export { Optimizer, SubSModuleOptimizer, ISubOptimizer, ISubOptimizerCreate };
 8 | export { Tokenizer, SubSModuleTokenizer, ISubTokenizer, ISubTokenizerCreate };
 9 | export { SubSModule, ISubSModule, ISubSModuleCreate, ISubSModuleMethod };
10 | /**
11 |  * 识别模块
12 |  * 强制分割类单词识别
13 |  */
14 | export declare enum ENUM_SUBMODS {
15 |     /**
16 |      * URL识别
17 |      */
18 |     URLTokenizer = "URLTokenizer",
19 |     /**
20 |      * 通配符，必须在标点符号识别之前
21 |      */
22 |     WildcardTokenizer = "WildcardTokenizer",
23 |     /**
24 |      * 标点符号识别
25 |      */
26 |     PunctuationTokenizer = "PunctuationTokenizer",
27 |     /**
28 |      * 外文字符、数字识别，必须在标点符号识别之后
29 |      */
30 |     ForeignTokenizer = "ForeignTokenizer",
31 |     /**
32 |      * 词典识别
33 |      */
34 |     DictTokenizer = "DictTokenizer",
35 |     /**
36 |      * 人名识别，建议在词典识别之后
37 |      */
38 |     ChsNameTokenizer = "ChsNameTokenizer",
39 |     JpSimpleTokenizer = "JpSimpleTokenizer",
40 |     /**
41 |      * 注音
42 |      */
43 |     ZhuyinTokenizer = "ZhuyinTokenizer",
44 |     /**
45 |      * 部首
46 |      */
47 |     /**
48 |      * 邮箱地址识别
49 |      */
50 |     EmailOptimizer = "EmailOptimizer",
51 |     /**
52 |      * 人名识别优化
53 |      */
54 |     ChsNameOptimizer = "ChsNameOptimizer",
55 |     /**
56 |      * 词典识别优化
57 |      */
58 |     DictOptimizer = "DictOptimizer",
59 |     /**
60 |      * 日期时间识别优化
61 |      */
62 |     DatetimeOptimizer = "DatetimeOptimizer",
63 |     /**
64 |      * 合併外文與中文的詞
65 |      * 例如 Ｔ恤
66 |      */
67 |     ForeignOptimizer = "ForeignOptimizer",
68 |     /**
69 |      * 自動處理 `里|裏|后`
70 |      */
71 |     ZhtSynonymOptimizer = "ZhtSynonymOptimizer",
72 |     AdjectiveOptimizer = "AdjectiveOptimizer"
73 | }
74 | /**
75 |  * 不包含在預設模組列表內 需要手動指定
76 |  */
77 | export declare enum ENUM_SUBMODS_OTHER {
78 |     /**
79 |      * 单字切分模块
80 |      */
81 |     SingleTokenizer = "SingleTokenizer"
82 | }
83 | export type ENUM_SUBMODS_NAME = ENUM_SUBMODS | ENUM_SUBMODS_OTHER;
84 | export declare const LIST_SUBMODS_NOT_DEF: ENUM_SUBMODS[];
85 | export declare const SUBMODS_LIST: import("ts-enum-util").EnumWrapper<string, typeof ENUM_SUBMODS>;
86 | export declare const SUBMODS_OTHER_LIST: import("ts-enum-util").EnumWrapper<string, typeof ENUM_SUBMODS_OTHER>;
87 | /**
88 |  * 取得列表並且保持 ENUM 順序
89 |  * @param {boolean} all
90 |  * @returns {ENUM_SUBMODS[]}
91 |  */
92 | export declare function getDefault(all?: boolean): ENUM_SUBMODS[];
93 | export default getDefault;
94 | 


--------------------------------------------------------------------------------
/lib/mod/index.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by user on 2018/4/16/016.
  3 |  */
  4 | 
  5 | import { $enum } from "ts-enum-util";
  6 | import { ISubOptimizer, ISubOptimizerCreate, Optimizer, SubSModuleOptimizer } from './Optimizer';
  7 | import { ISubTokenizer, ISubTokenizerCreate, SubSModuleTokenizer, Tokenizer } from './Tokenizer';
  8 | import { ISubSModule, ISubSModuleCreate, ISubSModuleMethod, SubSModule } from './mod';
  9 | 
 10 | export { Optimizer, SubSModuleOptimizer, ISubOptimizer, ISubOptimizerCreate }
 11 | export { Tokenizer, SubSModuleTokenizer, ISubTokenizer, ISubTokenizerCreate }
 12 | export { SubSModule, ISubSModule, ISubSModuleCreate, ISubSModuleMethod }
 13 | 
 14 | /**
 15 |  * 识别模块
 16 |  * 强制分割类单词识别
 17 |  */
 18 | export enum ENUM_SUBMODS
 19 | {
 20 | 	/**
 21 | 	 * URL识别
 22 | 	 */
 23 | 	URLTokenizer = 'URLTokenizer',
 24 | 	/**
 25 | 	 * 通配符，必须在标点符号识别之前
 26 | 	 */
 27 | 	WildcardTokenizer = 'WildcardTokenizer',
 28 | 	/**
 29 | 	 * 标点符号识别
 30 | 	 */
 31 | 	PunctuationTokenizer = 'PunctuationTokenizer',
 32 | 	/**
 33 | 	 * 外文字符、数字识别，必须在标点符号识别之后
 34 | 	 */
 35 | 	ForeignTokenizer = 'ForeignTokenizer',
 36 | 
 37 | 	// 中文单词识别
 38 | 
 39 | 	/**
 40 | 	 * 词典识别
 41 | 	 */
 42 | 	DictTokenizer = 'DictTokenizer',
 43 | 	/**
 44 | 	 * 人名识别，建议在词典识别之后
 45 | 	 */
 46 | 	ChsNameTokenizer = 'ChsNameTokenizer',
 47 | 
 48 | 	JpSimpleTokenizer = 'JpSimpleTokenizer',
 49 | 
 50 | 	/**
 51 | 	 * 注音
 52 | 	 */
 53 | 	ZhuyinTokenizer = 'ZhuyinTokenizer',
 54 | 
 55 | 	/**
 56 | 	 * 部首
 57 | 	 */
 58 | 	//ZhRadicalTokenizer = 'ZhRadicalTokenizer',
 59 | 
 60 | 	// @todo 优化模块
 61 | 
 62 | 	/**
 63 | 	 * 邮箱地址识别
 64 | 	 */
 65 | 	EmailOptimizer = 'EmailOptimizer',
 66 | 	/**
 67 | 	 * 人名识别优化
 68 | 	 */
 69 | 	ChsNameOptimizer = 'ChsNameOptimizer',
 70 | 	/**
 71 | 	 * 词典识别优化
 72 | 	 */
 73 | 	DictOptimizer = 'DictOptimizer',
 74 | 	/**
 75 | 	 * 日期时间识别优化
 76 | 	 */
 77 | 	DatetimeOptimizer = 'DatetimeOptimizer',
 78 | 
 79 | 	/**
 80 | 	 * 合併外文與中文的詞
 81 | 	 * 例如 Ｔ恤
 82 | 	 */
 83 | 	ForeignOptimizer = 'ForeignOptimizer',
 84 | 
 85 | 	/**
 86 | 	 * 自動處理 `里|裏|后`
 87 | 	 */
 88 | 	ZhtSynonymOptimizer = 'ZhtSynonymOptimizer',
 89 | 
 90 | 	AdjectiveOptimizer = 'AdjectiveOptimizer',
 91 | }
 92 | 
 93 | /**
 94 |  * 不包含在預設模組列表內 需要手動指定
 95 |  */
 96 | export enum ENUM_SUBMODS_OTHER
 97 | {
 98 | 	/**
 99 | 	 * 单字切分模块
100 | 	 */
101 | 	SingleTokenizer = 'SingleTokenizer',
102 | }
103 | 
104 | export type ENUM_SUBMODS_NAME = ENUM_SUBMODS | ENUM_SUBMODS_OTHER;
105 | 
106 | export const LIST_SUBMODS_NOT_DEF = [
107 | 	ENUM_SUBMODS.ZhtSynonymOptimizer,
108 | ];
109 | 
110 | export const SUBMODS_LIST = $enum(ENUM_SUBMODS);
111 | export const SUBMODS_OTHER_LIST = $enum(ENUM_SUBMODS_OTHER);
112 | 
113 | /**
114 |  * 取得列表並且保持 ENUM 順序
115 |  * @param {boolean} all
116 |  * @returns {ENUM_SUBMODS[]}
117 |  */
118 | export function getDefault(all?: boolean): ENUM_SUBMODS[]
119 | {
120 | 	let list = SUBMODS_LIST.getKeys();
121 | 
122 | 	return Object.keys(ENUM_SUBMODS)
123 | 		.reduce(function (a, m)
124 | 		{
125 | 			if (!a.includes(m) && list.includes(m as any))
126 | 			{
127 | 				if (all || !LIST_SUBMODS_NOT_DEF.includes(m as any))
128 | 				{
129 | 					a.push(m);
130 | 				}
131 | 			}
132 | 
133 | 			return a;
134 | 		}, [])
135 | 		;
136 | }
137 | 
138 | //console.log(getDefault(true));
139 | 
140 | export default getDefault;
141 | 


--------------------------------------------------------------------------------
/lib/mod/mod.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/2/21/021.
 3 |  */
 4 | import { IDICT_BLACKLIST, IWord, Segment } from '../Segment';
 5 | import { IWordDebug, IWordDebugInfo } from '../util/index';
 6 | import { ENUM_SUBMODS_NAME } from './index';
 7 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
 8 | export type ISModuleType = 'optimizer' | 'tokenizer' | string;
 9 | export declare class SModule implements ISModule {
10 |     type?: ISModuleType;
11 |     segment: Segment;
12 |     /**
13 |      * @param {Segment} segment 分词接口
14 |      */
15 |     constructor(segment: Segment);
16 |     protected _doMethod<S extends IWord, T extends ISubSModule>(fn: string, target: S[], mods: T[], ...argv: any[]): S[];
17 | }
18 | export declare class SubSModule implements ISubSModule {
19 |     static type: ISModuleType;
20 |     type: ISModuleType;
21 |     segment: Segment;
22 |     priority?: number;
23 |     inited?: boolean;
24 |     static NAME: string;
25 |     name: string;
26 |     protected _TABLE?: any;
27 |     protected _POSTAG?: typeof POSTAG;
28 |     protected _BLACKLIST?: IDICT_BLACKLIST;
29 |     constructor(type?: ISModuleType, segment?: Segment, ...argv: any[]);
30 |     static init<T extends SubSModule = SubSModule>(segment: Segment, ...argv: any[]): T;
31 |     protected static _init<T extends SubSModule>(libThis: IModuleStatic<T>, segment: Segment, ...argv: any[]): T;
32 |     init(segment: Segment, ...argv: any[]): this;
33 |     protected _cache(...argv: any[]): void;
34 |     /**
35 |      * 回傳最簡版的 IWord { w, p, f, s }
36 |      */
37 |     protected createRawToken<T extends IWord, U extends IWordDebugInfo = IWordDebugInfo>(data: T, ow?: Partial<T & IWord>, attr?: U & IWordDebugInfo): T;
38 |     protected createToken<T extends IWord, U extends IWordDebugInfo = IWordDebugInfo>(data: T, skipCheck?: boolean, attr?: U & IWordDebugInfo): T;
39 |     protected sliceToken<T extends IWord, U extends IWordDebugInfo>(words: T[], pos: number, len: number, data: T, skipCheck?: boolean, attr?: U & IWordDebugInfo): T[];
40 |     protected debugToken<T extends IWordDebug, U extends IWordDebugInfo>(data: T, attr?: U & IWordDebugInfo, returnToken?: true, ...argv: any[]): T;
41 | }
42 | export interface ISubSModuleMethod<T extends IWord, U extends IWord = T> {
43 |     (words: T[], ...argv: any[]): U[];
44 | }
45 | export interface ISubSModuleCreate<T extends SubSModule, R extends SubSModule = SubSModule> {
46 |     (segment: Segment, ...argv: any[]): T & R;
47 | }
48 | export interface ISModule {
49 |     type?: ISModuleType;
50 |     segment: Segment;
51 | }
52 | export interface IModuleStatic<T extends ISModule | SubSModule> {
53 |     type: ISModuleType;
54 |     new (type?: ISModuleType, segment?: Segment, ...argv: any[]): T;
55 |     init(segment: Segment, ...argv: any[]): T;
56 | }
57 | export interface ISubSModule {
58 |     type: ISModuleType;
59 |     segment: Segment;
60 |     name?: ENUM_SUBMODS_NAME | string;
61 |     priority?: number;
62 |     init(segment: Segment, ...argv: any[]): ISubSModule;
63 | }
64 | declare const _default: typeof import("./mod");
65 | export default _default;
66 | 


--------------------------------------------------------------------------------
/lib/mod/mod.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | /**
  3 |  * Created by user on 2018/2/21/021.
  4 |  */
  5 | Object.defineProperty(exports, "__esModule", { value: true });
  6 | exports.SubSModule = exports.SModule = void 0;
  7 | const debug_1 = require("../util/debug");
  8 | class SModule {
  9 |     /**
 10 |      * @param {Segment} segment 分词接口
 11 |      */
 12 |     constructor(segment) {
 13 |         this.segment = segment;
 14 |     }
 15 |     _doMethod(fn, target, mods, ...argv) {
 16 |         mods.forEach(function (mod) {
 17 |             // @ts-ignore
 18 |             if (typeof mod._cache === 'function') {
 19 |                 // @ts-ignore
 20 |                 mod._cache();
 21 |             }
 22 |             target = mod[fn](target, ...argv);
 23 |         });
 24 |         return target;
 25 |     }
 26 | }
 27 | exports.SModule = SModule;
 28 | class SubSModule {
 29 |     constructor(type, segment, ...argv) {
 30 |         if (type) {
 31 |             this.type = type;
 32 |         }
 33 |         if (!this.type) {
 34 |             throw new Error();
 35 |         }
 36 |         if (segment) {
 37 |             this.init(segment, ...argv);
 38 |             this.inited = true;
 39 |         }
 40 |     }
 41 |     static init(segment, ...argv) {
 42 |         // @ts-ignore
 43 |         return this._init(this, segment, ...argv);
 44 |     }
 45 |     static _init(libThis, segment, ...argv) {
 46 |         if (!libThis.type) {
 47 |             throw new Error();
 48 |         }
 49 |         let mod = new libThis(libThis.type, segment, ...argv);
 50 |         if (!mod.inited) {
 51 |             mod.init(segment, ...argv);
 52 |             mod.inited = true;
 53 |         }
 54 |         // @ts-ignore
 55 |         return mod;
 56 |     }
 57 |     init(segment, ...argv) {
 58 |         this.segment = segment;
 59 |         this.inited = true;
 60 |         //this._cache();
 61 |         return this;
 62 |     }
 63 |     _cache(...argv) {
 64 |         this._POSTAG = this.segment.POSTAG;
 65 |     }
 66 |     /**
 67 |      * 回傳最簡版的 IWord { w, p, f, s }
 68 |      */
 69 |     createRawToken(data, ow, attr) {
 70 |         var _a, _b, _c, _d;
 71 |         // @ts-ignore
 72 |         ow = ow || {};
 73 |         let nw = {
 74 |             w: (_a = data.w) !== null && _a !== void 0 ? _a : ow.w,
 75 |             p: (_b = data.p) !== null && _b !== void 0 ? _b : ow.p,
 76 |             f: (_c = data.f) !== null && _c !== void 0 ? _c : ow.f,
 77 |             s: (_d = data.s) !== null && _d !== void 0 ? _d : ow.s,
 78 |         };
 79 |         if (attr) {
 80 |             this.debugToken(nw, attr);
 81 |         }
 82 |         return nw;
 83 |     }
 84 |     createToken(data, skipCheck, attr) {
 85 |         let TABLE = this._TABLE;
 86 |         if (!skipCheck && TABLE && !(data.w in TABLE)) {
 87 |             this.debugToken(data, {
 88 |                 autoCreate: true,
 89 |             });
 90 |         }
 91 |         // 自動將模組名稱血入 debug 資訊
 92 |         if (this.name) {
 93 |             attr = Object.assign(attr || {});
 94 |             if (!(this.name in attr)) {
 95 |                 // @ts-ignore
 96 |                 attr[this.name] = true;
 97 |             }
 98 |         }
 99 |         if (attr) {
100 |             this.debugToken(data, attr);
101 |         }
102 |         return data;
103 |     }
104 |     sliceToken(words, pos, len, data, skipCheck, attr) {
105 |         words.splice(pos, len, this.createToken(data, skipCheck, attr));
106 |         return words;
107 |     }
108 |     debugToken(data, attr, returnToken, ...argv) {
109 |         return (0, debug_1.debugToken)(data, attr, returnToken, ...argv);
110 |     }
111 | }
112 | exports.SubSModule = SubSModule;
113 | exports.default = exports;
114 | //# sourceMappingURL=mod.js.map


--------------------------------------------------------------------------------
/lib/segment/defaults.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * Created by user on 2019/6/26.
3 |  */
4 | import { IOptionsDoSegment } from './types';
5 | export declare const defaultOptionsDoSegment: IOptionsDoSegment;
6 | 


--------------------------------------------------------------------------------
/lib/segment/defaults.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | Object.defineProperty(exports, "__esModule", { value: true });
3 | exports.defaultOptionsDoSegment = void 0;
4 | exports.defaultOptionsDoSegment = {};
5 | //# sourceMappingURL=defaults.js.map


--------------------------------------------------------------------------------
/lib/segment/defaults.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * Created by user on 2019/6/26.
3 |  */
4 | import { IOptionsDoSegment } from './types';
5 | 
6 | export const defaultOptionsDoSegment: IOptionsDoSegment = {};
7 | 


--------------------------------------------------------------------------------
/lib/segment/index.d.ts:
--------------------------------------------------------------------------------
1 | import SegmentCore from './core';
2 | export declare class SegmentBase extends SegmentCore {
3 | }
4 | export default SegmentBase;
5 | 


--------------------------------------------------------------------------------
/lib/segment/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.SegmentBase = void 0;
 4 | const tslib_1 = require("tslib");
 5 | const core_1 = tslib_1.__importDefault(require("./core"));
 6 | class SegmentBase extends core_1.default {
 7 | }
 8 | exports.SegmentBase = SegmentBase;
 9 | exports.default = SegmentBase;
10 | //# sourceMappingURL=index.js.map


--------------------------------------------------------------------------------
/lib/segment/index.ts:
--------------------------------------------------------------------------------
1 | import SegmentCore from './core';
2 | 
3 | export class SegmentBase extends SegmentCore
4 | {
5 | 
6 | }
7 | 
8 | export default SegmentBase
9 | 


--------------------------------------------------------------------------------
/lib/segment/method.d.ts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluelovers/node-segment/f5688c9dd2c31a16d7eefd2275aa9b206a5f7134/lib/segment/method.d.ts


--------------------------------------------------------------------------------
/lib/segment/method.js:
--------------------------------------------------------------------------------
1 | //# sourceMappingURL=method.js.map


--------------------------------------------------------------------------------
/lib/segment/method.ts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluelovers/node-segment/f5688c9dd2c31a16d7eefd2275aa9b206a5f7134/lib/segment/method.ts


--------------------------------------------------------------------------------
/lib/segment/methods/_get_text.d.ts:
--------------------------------------------------------------------------------
1 | export declare function _get_text(text: string | Buffer): string;
2 | 


--------------------------------------------------------------------------------
/lib/segment/methods/_get_text.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports._get_text = _get_text;
 4 | const crlf_normalize_1 = require("crlf-normalize");
 5 | function _get_text(text) {
 6 |     try {
 7 |         if (Buffer.isBuffer(text)) {
 8 |             text = text.toString();
 9 |         }
10 |     }
11 |     catch (e) { }
12 |     finally {
13 |         if (typeof text !== 'string') {
14 |             throw new TypeError(`text must is string or Buffer`);
15 |         }
16 |         text = (0, crlf_normalize_1.crlf)(text);
17 |     }
18 |     return text;
19 | }
20 | //# sourceMappingURL=_get_text.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/_get_text.ts:
--------------------------------------------------------------------------------
 1 | import { crlf } from 'crlf-normalize';
 2 | 
 3 | export function _get_text(text: string | Buffer): string
 4 | {
 5 | 	try
 6 | 	{
 7 | 		if (Buffer.isBuffer(text))
 8 | 		{
 9 | 			text = text.toString();
10 | 		}
11 | 	}
12 | 	catch (e)
13 | 	{}
14 | 	finally
15 | 	{
16 | 		if (typeof text !== 'string')
17 | 		{
18 | 			throw new TypeError(`text must is string or Buffer`)
19 | 		}
20 | 
21 | 		text = crlf(text);
22 | 	}
23 | 
24 | 	return text;
25 | }
26 | 


--------------------------------------------------------------------------------
/lib/segment/methods/convertSynonym.d.ts:
--------------------------------------------------------------------------------
 1 | import { IWordDebug } from '../../util/debug';
 2 | import { IDICT, IDICT_SYNONYM } from '../types';
 3 | import { ITSOverwrite } from 'ts-type';
 4 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
 5 | import { IWord } from '@novel-segment/types';
 6 | interface IOptions {
 7 |     /**
 8 |      * for debug
 9 |      */
10 |     showcount?: boolean;
11 |     DICT_SYNONYM: IDICT_SYNONYM;
12 |     DICT_TABLE: IDICT<IWord>;
13 |     POSTAG: typeof POSTAG;
14 | }
15 | export interface IConvertSynonymWithShowcount {
16 |     count: number;
17 |     list: IWordDebug[];
18 | }
19 | /**
20 |  * 转换同义词
21 |  */
22 | export declare function convertSynonym(ret: IWordDebug[], options: ITSOverwrite<IOptions, {
23 |     showcount: true;
24 | }>): {
25 |     count: number;
26 |     list: IWordDebug[];
27 | };
28 | /**
29 |  * 转换同义词
30 |  */
31 | export declare function convertSynonym(ret: IWordDebug[], options?: IOptions): IWordDebug[];
32 | export {};
33 | 


--------------------------------------------------------------------------------
/lib/segment/methods/convertSynonym.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.convertSynonym = convertSynonym;
 4 | const tslib_1 = require("tslib");
 5 | const core_1 = tslib_1.__importDefault(require("deepmerge-plus/core"));
 6 | const debug_1 = require("../../util/debug");
 7 | function convertSynonym(ret, options) {
 8 |     const { showcount, POSTAG, DICT_SYNONYM, DICT_TABLE } = options;
 9 |     let total_count = 0;
10 |     //const RAW = Symbol.for('RAW');
11 |     // 转换同义词
12 |     function _convertSynonym(list) {
13 |         let count = 0;
14 |         list = list.reduce(function (a, item) {
15 |             let bool;
16 |             let w = item.w;
17 |             let nw;
18 |             let debug = (0, debug_1.debugToken)(item);
19 |             if (w in DICT_SYNONYM) {
20 |                 bool = true;
21 |                 nw = DICT_SYNONYM[w];
22 |             }
23 |             else if (debug.autoCreate && !debug.convertSynonym && !item.ow && item.m && item.m.length) {
24 |                 nw = item.m.reduce(function (a, b) {
25 |                     if (typeof b === 'string') {
26 |                         a.push(b);
27 |                     }
28 |                     else if (b.w in DICT_SYNONYM) {
29 |                         a.push(DICT_SYNONYM[b.w]);
30 |                         bool = true;
31 |                     }
32 |                     else {
33 |                         a.push(b.w);
34 |                     }
35 |                     return a;
36 |                 }, []).join('');
37 |             }
38 |             if (bool) {
39 |                 count++;
40 |                 total_count++;
41 |                 //return { w: DICT_SYNONYM[item.w], p: item.p };
42 |                 let p = item.p;
43 |                 if (w in DICT_TABLE) {
44 |                     p = DICT_TABLE[w].p || p;
45 |                 }
46 |                 if (p & POSTAG.BAD) {
47 |                     p = p ^ POSTAG.BAD;
48 |                 }
49 |                 let item_new = (0, debug_1.debugToken)({
50 |                     ...item,
51 |                     w: nw,
52 |                     ow: w,
53 |                     p,
54 |                     op: item.p,
55 |                     //[RAW]: item,
56 |                     //source: item,
57 |                 }, {
58 |                     convertSynonym: true,
59 |                     //_source: item,
60 |                     /**
61 |                      * JSON.stringify
62 |                      * avoid TypeError: Converting circular structure to JSON
63 |                      */
64 |                     _source: (0, core_1.default)({}, item),
65 |                 }, true);
66 |                 a.push(item_new);
67 |             }
68 |             else {
69 |                 a.push(item);
70 |             }
71 |             debug = undefined;
72 |             return a;
73 |         }, []);
74 |         return { count: count, list: list };
75 |     }
76 |     let result;
77 |     do {
78 |         result = _convertSynonym(ret);
79 |         ret = result.list;
80 |         result.list = undefined;
81 |     } while (result.count > 0);
82 |     result = undefined;
83 |     if (showcount) {
84 |         return { count: total_count, list: ret };
85 |     }
86 |     return ret;
87 | }
88 | //# sourceMappingURL=convertSynonym.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/convertSynonym.ts:
--------------------------------------------------------------------------------
  1 | import deepmerge from 'deepmerge-plus/core';
  2 | import { debugToken, IWordDebug } from '../../util/debug';
  3 | import { IDICT, IDICT_SYNONYM } from '../types';
  4 | import { ITSOverwrite } from 'ts-type';
  5 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
  6 | import { IWord } from '@novel-segment/types';
  7 | 
  8 | interface IOptions
  9 | {
 10 | 	/**
 11 | 	 * for debug
 12 | 	 */
 13 | 	showcount?: boolean,
 14 | 	DICT_SYNONYM: IDICT_SYNONYM,
 15 | 	DICT_TABLE: IDICT<IWord>,
 16 | 	POSTAG: typeof POSTAG,
 17 | }
 18 | 
 19 | export interface IConvertSynonymWithShowcount
 20 | {
 21 | 	count: number,
 22 | 	list: IWordDebug[],
 23 | }
 24 | 
 25 | /**
 26 |  * 转换同义词
 27 |  */
 28 | export function convertSynonym(ret: IWordDebug[], options: ITSOverwrite<IOptions, {
 29 | 	showcount: true,
 30 | }>): {
 31 | 	count: number,
 32 | 	list: IWordDebug[],
 33 | }
 34 | /**
 35 |  * 转换同义词
 36 |  */
 37 | export function convertSynonym(ret: IWordDebug[], options?: IOptions): IWordDebug[]
 38 | export function convertSynonym(ret: IWordDebug[], options: IOptions)
 39 | {
 40 | 	const { showcount, POSTAG, DICT_SYNONYM, DICT_TABLE } = options;
 41 | 
 42 | 	let total_count = 0;
 43 | 
 44 | 	//const RAW = Symbol.for('RAW');
 45 | 
 46 | 	// 转换同义词
 47 | 	function _convertSynonym(list: IWordDebug[])
 48 | 	{
 49 | 		let count = 0;
 50 | 		list = list.reduce(function (a, item: IWordDebug)
 51 | 		{
 52 | 			let bool: boolean;
 53 | 			let w = item.w;
 54 | 			let nw: string;
 55 | 
 56 | 			let debug = debugToken(item);
 57 | 
 58 | 			if (w in DICT_SYNONYM)
 59 | 			{
 60 | 				bool = true;
 61 | 				nw = DICT_SYNONYM[w];
 62 | 			}
 63 | 			else if (debug.autoCreate && !debug.convertSynonym && !item.ow && item.m && item.m.length)
 64 | 			{
 65 | 				nw = item.m.reduce(function (a: string[], b)
 66 | 				{
 67 | 					if (typeof b === 'string')
 68 | 					{
 69 | 						a.push(b);
 70 | 					}
 71 | 					else if (b.w in DICT_SYNONYM)
 72 | 					{
 73 | 						a.push(DICT_SYNONYM[b.w]);
 74 | 						bool = true;
 75 | 					}
 76 | 					else
 77 | 					{
 78 | 						a.push(b.w);
 79 | 					}
 80 | 
 81 | 					return a;
 82 | 				}, []).join('');
 83 | 			}
 84 | 
 85 | 			if (bool)
 86 | 			{
 87 | 				count++;
 88 | 				total_count++;
 89 | 				//return { w: DICT_SYNONYM[item.w], p: item.p };
 90 | 
 91 | 				let p = item.p;
 92 | 
 93 | 				if (w in DICT_TABLE)
 94 | 				{
 95 | 					p = DICT_TABLE[w].p || p;
 96 | 				}
 97 | 
 98 | 				if (p & POSTAG.BAD)
 99 | 				{
100 | 					p = p ^ POSTAG.BAD;
101 | 				}
102 | 
103 | 				let item_new = debugToken({
104 | 					...item,
105 | 
106 | 					w: nw,
107 | 					ow: w,
108 | 					p,
109 | 					op: item.p,
110 | 
111 | 					//[RAW]: item,
112 | 
113 | 					//source: item,
114 | 				}, {
115 | 					convertSynonym: true,
116 | 					//_source: item,
117 | 
118 | 					/**
119 | 					 * JSON.stringify
120 | 					 * avoid TypeError: Converting circular structure to JSON
121 | 					 */
122 | 					_source: deepmerge({}, item) as IWordDebug,
123 | 
124 | 				}, true);
125 | 
126 | 				a.push(item_new);
127 | 			}
128 | 			else
129 | 			{
130 | 				a.push(item);
131 | 			}
132 | 
133 | 			debug = undefined;
134 | 
135 | 			return a;
136 | 		}, [] as IWordDebug[]);
137 | 		return { count: count, list: list } as IConvertSynonymWithShowcount;
138 | 	}
139 | 
140 | 	let result: IConvertSynonymWithShowcount;
141 | 	do
142 | 	{
143 | 		result = _convertSynonym(ret);
144 | 		ret = result.list;
145 | 
146 | 		result.list = undefined;
147 | 	}
148 | 	while (result.count > 0);
149 | 
150 | 	result = undefined;
151 | 
152 | 	if (showcount)
153 | 	{
154 | 		return { count: total_count, list: ret };
155 | 	}
156 | 
157 | 	return ret;
158 | }
159 | 


--------------------------------------------------------------------------------
/lib/segment/methods/doSegment.d.ts:
--------------------------------------------------------------------------------
 1 | import { IWordDebug } from '../../util/debug';
 2 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
 3 | export declare function _doSegmentStripPOSTAG(ret: IWordDebug[], postag: POSTAG): IWordDebug[];
 4 | /**
 5 |  * 去除停止符
 6 |  */
 7 | export declare function _doSegmentStripStopword(ret: IWordDebug[], STOPWORD: any): IWordDebug[];
 8 | export declare function _doSegmentStripSpace(ret: IWordDebug[]): IWordDebug[];
 9 | /**
10 |  * 仅返回单词内容
11 |  */
12 | export declare function _doSegmentSimple(ret: IWordDebug[]): string[];
13 | 


--------------------------------------------------------------------------------
/lib/segment/methods/doSegment.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports._doSegmentStripPOSTAG = _doSegmentStripPOSTAG;
 4 | exports._doSegmentStripStopword = _doSegmentStripStopword;
 5 | exports._doSegmentStripSpace = _doSegmentStripSpace;
 6 | exports._doSegmentSimple = _doSegmentSimple;
 7 | function _doSegmentStripPOSTAG(ret, postag) {
 8 |     return ret.filter(function (item) {
 9 |         return item.p !== postag;
10 |     });
11 | }
12 | /**
13 |  * 去除停止符
14 |  */
15 | function _doSegmentStripStopword(ret, STOPWORD) {
16 |     return ret.filter(function (item) {
17 |         return !(item.w in STOPWORD);
18 |     });
19 | }
20 | function _doSegmentStripSpace(ret) {
21 |     return ret.filter(function (item) {
22 |         return !/^\s+$/g.test(item.w);
23 |     });
24 | }
25 | /**
26 |  * 仅返回单词内容
27 |  */
28 | function _doSegmentSimple(ret) {
29 |     return ret.map(function (item) {
30 |         return item.w;
31 |     });
32 | }
33 | //# sourceMappingURL=doSegment.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/doSegment.ts:
--------------------------------------------------------------------------------
 1 | import { IWordDebug } from '../../util/debug';
 2 | import { POSTAG } from '@novel-segment/postag/lib/postag/ids';
 3 | 
 4 | export function _doSegmentStripPOSTAG(ret: IWordDebug[], postag: POSTAG)
 5 | {
 6 | 	return ret.filter(function (item)
 7 | 	{
 8 | 		return item.p !== postag;
 9 | 	});
10 | }
11 | 
12 | /**
13 |  * 去除停止符
14 |  */
15 | export function _doSegmentStripStopword(ret: IWordDebug[], STOPWORD)
16 | {
17 | 	return ret.filter(function (item)
18 | 	{
19 | 		return !(item.w in STOPWORD);
20 | 	});
21 | }
22 | 
23 | export function _doSegmentStripSpace(ret: IWordDebug[])
24 | {
25 | 	return ret.filter(function (item)
26 | 	{
27 | 		return !/^\s+$/g.test(item.w);
28 | 	});
29 | }
30 | 
31 | /**
32 |  * 仅返回单词内容
33 |  */
34 | export function _doSegmentSimple(ret: IWordDebug[]): string[]
35 | {
36 | 	return ret.map(function (item)
37 | 	{
38 | 		return item.w;
39 | 	});
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/lib/segment/methods/getOptionsDoSegment.d.ts:
--------------------------------------------------------------------------------
1 | import { IOptionsDoSegment } from '../types';
2 | export declare function getOptionsDoSegment<T extends IOptionsDoSegment>(options: T, optionsDoSegment: any): T;
3 | 


--------------------------------------------------------------------------------
/lib/segment/methods/getOptionsDoSegment.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | Object.defineProperty(exports, "__esModule", { value: true });
3 | exports.getOptionsDoSegment = getOptionsDoSegment;
4 | const defaults_1 = require("../defaults");
5 | function getOptionsDoSegment(options, optionsDoSegment) {
6 |     return Object.assign({}, defaults_1.defaultOptionsDoSegment, optionsDoSegment, options);
7 | }
8 | //# sourceMappingURL=getOptionsDoSegment.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/getOptionsDoSegment.ts:
--------------------------------------------------------------------------------
 1 | import { IOptionsDoSegment } from '../types';
 2 | import { defaultOptionsDoSegment } from '../defaults';
 3 | 
 4 | export function getOptionsDoSegment<T extends IOptionsDoSegment>(options: T, optionsDoSegment: any): T
 5 | {
 6 | 	return Object.assign({},
 7 | 		defaultOptionsDoSegment,
 8 | 		optionsDoSegment,
 9 | 		options,
10 | 	);
11 | }
12 | 


--------------------------------------------------------------------------------
/lib/segment/methods/indexOf.d.ts:
--------------------------------------------------------------------------------
 1 | import { IWord } from '@novel-segment/types';
 2 | /**
 3 |  * 在单词数组中查找某一个单词或词性所在的位置
 4 |  *
 5 |  * @param {Array} words 单词数组
 6 |  * @param {Number|String} s 要查找的单词或词性
 7 |  * @param {Number} cur 开始位置
 8 |  * @return {Number} 找不到，返回-1
 9 |  */
10 | export declare function indexOf(words: IWord[], s: string | number, cur?: number, ...argv: any[]): number;
11 | 


--------------------------------------------------------------------------------
/lib/segment/methods/indexOf.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.indexOf = indexOf;
 4 | /**
 5 |  * 在单词数组中查找某一个单词或词性所在的位置
 6 |  *
 7 |  * @param {Array} words 单词数组
 8 |  * @param {Number|String} s 要查找的单词或词性
 9 |  * @param {Number} cur 开始位置
10 |  * @return {Number} 找不到，返回-1
11 |  */
12 | function indexOf(words, s, cur, ...argv) {
13 |     cur = isNaN(cur) ? 0 : cur;
14 |     let f = typeof s === 'string' ? 'w' : 'p';
15 |     while (cur < words.length) {
16 |         if (words[cur][f] === s)
17 |             return cur;
18 |         cur++;
19 |     }
20 |     return -1;
21 | }
22 | //# sourceMappingURL=indexOf.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/indexOf.ts:
--------------------------------------------------------------------------------
 1 | import { IWord } from '@novel-segment/types';
 2 | 
 3 | /**
 4 |  * 在单词数组中查找某一个单词或词性所在的位置
 5 |  *
 6 |  * @param {Array} words 单词数组
 7 |  * @param {Number|String} s 要查找的单词或词性
 8 |  * @param {Number} cur 开始位置
 9 |  * @return {Number} 找不到，返回-1
10 |  */
11 | export function indexOf(words: IWord[], s: string | number, cur?: number, ...argv)
12 | {
13 | 	cur = isNaN(cur) ? 0 : cur;
14 | 	let f = typeof s === 'string' ? 'w' : 'p';
15 | 
16 | 	while (cur < words.length)
17 | 	{
18 | 		if (words[cur][f] === s) return cur;
19 | 		cur++;
20 | 	}
21 | 
22 | 	return -1;
23 | }
24 | 


--------------------------------------------------------------------------------
/lib/segment/methods/listModules.d.ts:
--------------------------------------------------------------------------------
 1 | import { IOptionsDoSegment } from '../types';
 2 | import { ISubTokenizer } from '../../mod/Tokenizer';
 3 | import { ISubOptimizer } from '../../mod/Optimizer';
 4 | import { Segment } from '../../Segment';
 5 | export declare function listModules(modules: Segment["modules"], options: IOptionsDoSegment): {
 6 |     enable: {
 7 |         tokenizer: ISubTokenizer[];
 8 |         optimizer: ISubOptimizer[];
 9 |     };
10 |     disable: {
11 |         tokenizer: ISubTokenizer[];
12 |         optimizer: ISubOptimizer[];
13 |     };
14 | };
15 | 


--------------------------------------------------------------------------------
/lib/segment/methods/listModules.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.listModules = listModules;
 4 | function listModules(modules, options) {
 5 |     let ret = {
 6 |         enable: {
 7 |             tokenizer: [],
 8 |             optimizer: [],
 9 |         },
10 |         disable: {
11 |             tokenizer: [],
12 |             optimizer: [],
13 |         },
14 |     };
15 |     if (options === null || options === void 0 ? void 0 : options.disableModules) {
16 |         modules.tokenizer
17 |             .forEach(function (mod) {
18 |             let bool;
19 |             if (mod.name) {
20 |                 if (options.disableModules.includes(mod.name)) {
21 |                     bool = true;
22 |                 }
23 |             }
24 |             else {
25 |                 if (options.disableModules.includes(mod)) {
26 |                     bool = true;
27 |                 }
28 |             }
29 |             ret[bool ? 'disable' : 'enable'].tokenizer.push(mod);
30 |         });
31 |         modules.optimizer
32 |             .forEach(function (mod) {
33 |             let bool;
34 |             if (mod.name) {
35 |                 if (options.disableModules.includes(mod.name)) {
36 |                     bool = true;
37 |                 }
38 |             }
39 |             else {
40 |                 if (options.disableModules.includes(mod)) {
41 |                     bool = true;
42 |                 }
43 |             }
44 |             ret[bool ? 'disable' : 'enable'].optimizer.push(mod);
45 |         });
46 |     }
47 |     else {
48 |         ret.enable.tokenizer = modules.tokenizer.slice();
49 |         ret.enable.optimizer = modules.optimizer.slice();
50 |     }
51 |     return ret;
52 | }
53 | //# sourceMappingURL=listModules.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/listModules.ts:
--------------------------------------------------------------------------------
 1 | import { IOptionsDoSegment } from '../types';
 2 | import { ISubTokenizer } from '../../mod/Tokenizer';
 3 | import { ISubOptimizer } from '../../mod/Optimizer';
 4 | import { Segment } from '../../Segment';
 5 | 
 6 | export function listModules(modules: Segment["modules"], options: IOptionsDoSegment)
 7 | {
 8 | 	let ret = {
 9 | 		enable: {
10 | 			tokenizer: [] as ISubTokenizer[],
11 | 			optimizer: [] as ISubOptimizer[],
12 | 		},
13 | 		disable: {
14 | 			tokenizer: [] as ISubTokenizer[],
15 | 			optimizer: [] as ISubOptimizer[],
16 | 		},
17 | 	};
18 | 
19 | 	if (options?.disableModules)
20 | 	{
21 | 		modules.tokenizer
22 | 			.forEach(function (mod)
23 | 			{
24 | 				let bool: boolean;
25 | 
26 | 				if (mod.name)
27 | 				{
28 | 					if (options.disableModules.includes(mod.name))
29 | 					{
30 | 						bool = true;
31 | 					}
32 | 				}
33 | 				else
34 | 				{
35 | 					if (options.disableModules.includes(mod as any))
36 | 					{
37 | 						bool = true;
38 | 					}
39 | 				}
40 | 
41 | 				ret[bool ? 'disable' : 'enable'].tokenizer.push(mod);
42 | 			})
43 | 		;
44 | 
45 | 		modules.optimizer
46 | 			.forEach(function (mod)
47 | 			{
48 | 				let bool: boolean;
49 | 
50 | 				if (mod.name)
51 | 				{
52 | 					if (options.disableModules.includes(mod.name))
53 | 					{
54 | 						bool = true;
55 | 					}
56 | 				}
57 | 				else
58 | 				{
59 | 					if (options.disableModules.includes(mod as any))
60 | 					{
61 | 						bool = true;
62 | 					}
63 | 				}
64 | 
65 | 				ret[bool ? 'disable' : 'enable'].optimizer.push(mod);
66 | 			})
67 | 		;
68 | 	}
69 | 	else
70 | 	{
71 | 		ret.enable.tokenizer = modules.tokenizer.slice();
72 | 		ret.enable.optimizer = modules.optimizer.slice();
73 | 	}
74 | 
75 | 	return ret;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/segment/methods/split.d.ts:
--------------------------------------------------------------------------------
 1 | import { IWord } from '@novel-segment/types';
 2 | /**
 3 |  * 根据某个单词或词性来分割单词数组
 4 |  *
 5 |  * @param {Array} words 单词数组
 6 |  * @param {Number|String} s 用于分割的单词或词性
 7 |  * @return {Array}
 8 |  */
 9 | export declare function split(words: IWord[], s: string | number, ...argv: any[]): IWord[];
10 | 


--------------------------------------------------------------------------------
/lib/segment/methods/split.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.split = split;
 4 | /**
 5 |  * 根据某个单词或词性来分割单词数组
 6 |  *
 7 |  * @param {Array} words 单词数组
 8 |  * @param {Number|String} s 用于分割的单词或词性
 9 |  * @return {Array}
10 |  */
11 | function split(words, s, ...argv) {
12 |     let ret = [];
13 |     let lasti = 0;
14 |     let i = 0;
15 |     let f = typeof s === 'string' ? 'w' : 'p';
16 |     while (i < words.length) {
17 |         if (words[i][f] === s) {
18 |             if (lasti < i)
19 |                 ret.push(words.slice(lasti, i));
20 |             ret.push(words.slice(i, i + 1));
21 |             i++;
22 |             lasti = i;
23 |         }
24 |         else {
25 |             i++;
26 |         }
27 |     }
28 |     if (lasti < words.length - 1) {
29 |         ret.push(words.slice(lasti, words.length));
30 |     }
31 |     words = undefined;
32 |     return ret;
33 | }
34 | //# sourceMappingURL=split.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/split.ts:
--------------------------------------------------------------------------------
 1 | import { IWord } from '@novel-segment/types';
 2 | 
 3 | /**
 4 |  * 根据某个单词或词性来分割单词数组
 5 |  *
 6 |  * @param {Array} words 单词数组
 7 |  * @param {Number|String} s 用于分割的单词或词性
 8 |  * @return {Array}
 9 |  */
10 | export function split(words: IWord[], s: string | number, ...argv): IWord[]
11 | {
12 | 	let ret = [];
13 | 	let lasti = 0;
14 | 	let i = 0;
15 | 	let f = typeof s === 'string' ? 'w' : 'p';
16 | 
17 | 	while (i < words.length)
18 | 	{
19 | 		if (words[i][f] === s)
20 | 		{
21 | 			if (lasti < i) ret.push(words.slice(lasti, i));
22 | 			ret.push(words.slice(i, i + 1));
23 | 			i++;
24 | 			lasti = i;
25 | 		}
26 | 		else
27 | 		{
28 | 			i++;
29 | 		}
30 | 	}
31 | 	if (lasti < words.length - 1)
32 | 	{
33 | 		ret.push(words.slice(lasti, words.length));
34 | 	}
35 | 
36 | 	words = undefined;
37 | 
38 | 	return ret;
39 | }
40 | 


--------------------------------------------------------------------------------
/lib/segment/methods/stringify.d.ts:
--------------------------------------------------------------------------------
1 | export { stringify, stringify as default } from '@novel-segment/stringify';
2 | 


--------------------------------------------------------------------------------
/lib/segment/methods/stringify.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | Object.defineProperty(exports, "__esModule", { value: true });
3 | exports.default = exports.stringify = void 0;
4 | var stringify_1 = require("@novel-segment/stringify");
5 | Object.defineProperty(exports, "stringify", { enumerable: true, get: function () { return stringify_1.stringify; } });
6 | Object.defineProperty(exports, "default", { enumerable: true, get: function () { return stringify_1.stringify; } });
7 | //# sourceMappingURL=stringify.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/stringify.ts:
--------------------------------------------------------------------------------
1 | 
2 | export { stringify, stringify as default } from '@novel-segment/stringify';
3 | 


--------------------------------------------------------------------------------
/lib/segment/methods/useModules.d.ts:
--------------------------------------------------------------------------------
1 | import SegmentCore from '../core';
2 | import { ISubOptimizer } from '../../mod/Optimizer';
3 | import { ISubTokenizer } from '../../mod/Tokenizer';
4 | export declare function _isIgnoreModules<T extends SegmentCore>(me: T, mod: ISubOptimizer | ISubTokenizer | any, ...argv: any[]): boolean;
5 | export declare function _warnIgnoreModules(mod: any): void;
6 | export declare function useModules<T>(me: T, mod: ISubOptimizer | ISubTokenizer | any, ...argv: any[]): T;
7 | 


--------------------------------------------------------------------------------
/lib/segment/methods/useModules.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports._isIgnoreModules = _isIgnoreModules;
 4 | exports._warnIgnoreModules = _warnIgnoreModules;
 5 | exports.useModules = useModules;
 6 | function _isIgnoreModules(me, mod, ...argv) {
 7 |     var _a, _b;
 8 |     return ((_b = (_a = me.options) === null || _a === void 0 ? void 0 : _a.disableModules) === null || _b === void 0 ? void 0 : _b.includes(mod));
 9 | }
10 | function _warnIgnoreModules(mod) {
11 |     console.warn(`can't use this mod, because it got disable: ${mod}`);
12 | }
13 | function useModules(me, mod, ...argv) {
14 |     if (_isIgnoreModules(me, mod, ...argv)) {
15 |         _warnIgnoreModules(mod);
16 |     }
17 |     else {
18 |         // 初始化并注册模块
19 |         let c = mod.init(me, ...argv);
20 |         if (typeof c !== 'undefined') {
21 |             mod = c;
22 |         }
23 |         if (!['tokenizer', 'optimizer'].includes(mod.type)) {
24 |             throw new TypeError(`not a valid module, ${mod}`);
25 |         }
26 |         // @ts-ignore
27 |         me.modules[mod.type].push(mod);
28 |     }
29 |     return me;
30 | }
31 | //# sourceMappingURL=useModules.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/useModules.ts:
--------------------------------------------------------------------------------
 1 | import SegmentCore from '../core';
 2 | import { ISubOptimizer } from '../../mod/Optimizer';
 3 | import { ISubTokenizer } from '../../mod/Tokenizer';
 4 | 
 5 | export function _isIgnoreModules<T extends SegmentCore>(me: T, mod: ISubOptimizer | ISubTokenizer | any, ...argv)
 6 | {
 7 | 	return (me.options?.disableModules?.includes(mod))
 8 | }
 9 | 
10 | export function _warnIgnoreModules(mod)
11 | {
12 | 	console.warn(`can't use this mod, because it got disable: ${mod}`)
13 | }
14 | 
15 | export function useModules<T>(me: T, mod: ISubOptimizer | ISubTokenizer | any, ...argv)
16 | {
17 | 	if (_isIgnoreModules(me as any, mod, ...argv))
18 | 	{
19 | 		_warnIgnoreModules(mod)
20 | 	}
21 | 	else
22 | 	{
23 | 		// 初始化并注册模块
24 | 		let c = mod.init(me, ...argv);
25 | 
26 | 		if (typeof c !== 'undefined')
27 | 		{
28 | 			mod = c;
29 | 		}
30 | 
31 | 		if (!['tokenizer', 'optimizer'].includes(mod.type))
32 | 		{
33 | 			throw new TypeError(`not a valid module, ${mod}`)
34 | 		}
35 | 
36 | 		// @ts-ignore
37 | 		me.modules[mod.type].push(mod);
38 | 	}
39 | 
40 | 	return me;
41 | }
42 | 


--------------------------------------------------------------------------------
/lib/segment/methods/useModules2.d.ts:
--------------------------------------------------------------------------------
1 | import { ISubOptimizer } from '../../mod/Optimizer';
2 | import { ISubTokenizer } from '../../mod/Tokenizer';
3 | export declare function useModules<T>(me: T, mod: ISubOptimizer | ISubTokenizer | any | string | (ISubTokenizer | ISubOptimizer | string)[], ...argv: any[]): T;
4 | 


--------------------------------------------------------------------------------
/lib/segment/methods/useModules2.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.useModules = useModules;
 4 | const tslib_1 = require("tslib");
 5 | const useModules_1 = require("./useModules");
 6 | const BuildInSubMod = tslib_1.__importStar(require("../../submod"));
 7 | function useModules(me, mod, ...argv) {
 8 |     if (Array.isArray(mod)) {
 9 |         mod.forEach(function (m) {
10 |             useModules(me, m, ...argv);
11 |         });
12 |     }
13 |     else {
14 |         if (typeof mod === 'string' && !(0, useModules_1._isIgnoreModules)(me, mod, ...argv)) {
15 |             //mod = require(path.join(__dirname, '../..', 'submod', mod));
16 |             //mod = require(`../../submod/${mod}`);
17 |             mod = BuildInSubMod[mod];
18 |         }
19 |         (0, useModules_1.useModules)(me, mod, ...argv);
20 |     }
21 |     return me;
22 | }
23 | //# sourceMappingURL=useModules2.js.map


--------------------------------------------------------------------------------
/lib/segment/methods/useModules2.ts:
--------------------------------------------------------------------------------
 1 | import { _isIgnoreModules, useModules as _useModules } from './useModules';
 2 | import { ISubOptimizer } from '../../mod/Optimizer';
 3 | import { ISubTokenizer } from '../../mod/Tokenizer';
 4 | import * as BuildInSubMod from '../../submod';
 5 | 
 6 | export function useModules<T>(me: T, mod: ISubOptimizer | ISubTokenizer | any | string | (ISubTokenizer | ISubOptimizer | string)[], ...argv)
 7 | {
 8 | 	if (Array.isArray(mod))
 9 | 	{
10 | 		mod.forEach(function (m)
11 | 		{
12 | 			useModules(me as any, m, ...argv)
13 | 		});
14 | 	}
15 | 	else
16 | 	{
17 | 		if (typeof mod === 'string' && !_isIgnoreModules(me as any, mod, ...argv))
18 | 		{
19 | 			//mod = require(path.join(__dirname, '../..', 'submod', mod));
20 | 			//mod = require(`../../submod/${mod}`);
21 | 
22 | 			mod = BuildInSubMod[mod]
23 | 		}
24 | 
25 | 		_useModules(me as any, mod, ...argv)
26 | 	}
27 | 
28 | 	return me;
29 | }
30 | 


--------------------------------------------------------------------------------
/lib/segment/types.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/6/26.
 3 |  */
 4 | import { IOptions as IOptionsTableDict } from '@novel-segment/table-core-abstract';
 5 | import { TableDict } from '@novel-segment/table-dict';
 6 | import { ENUM_SUBMODS_NAME } from '../mod/index';
 7 | import { IUseDefaultOptions } from '../defaults/index';
 8 | export { IWord } from '@novel-segment/types';
 9 | export type ISPLIT = RegExp | string | {
10 |     [Symbol.split](input: string, limit?: number): string[];
11 | };
12 | export type ISPLIT_FILTER = RegExp | {
13 |     test(input: string): boolean;
14 | };
15 | export interface IDICT<T = any> {
16 |     [key: string]: T;
17 | }
18 | export interface IDICT2<T = any> {
19 |     [key: number]: IDICT<T>;
20 | }
21 | export interface IOptionsSegment extends IOptionsTableDict, IUseDefaultOptions {
22 |     db?: TableDict[];
23 |     optionsDoSegment?: IOptionsDoSegment;
24 |     maxChunkCount?: number;
25 |     minChunkCount?: number;
26 |     disableModules?: (ENUM_SUBMODS_NAME | unknown)[];
27 | }
28 | export type IDICT_SYNONYM = IDICT<string>;
29 | export type IDICT_STOPWORD = IDICT<boolean>;
30 | export type IDICT_BLACKLIST = IDICT<boolean>;
31 | export interface IOptionsDoSegment {
32 |     /**
33 |      * 不返回词性
34 |      */
35 |     simple?: boolean;
36 |     /**
37 |      * 去除标点符号
38 |      */
39 |     stripPunctuation?: boolean;
40 |     /**
41 |      * 转换同义词
42 |      */
43 |     convertSynonym?: boolean;
44 |     /**
45 |      * 去除停止符
46 |      */
47 |     stripStopword?: boolean;
48 |     stripSpace?: boolean;
49 |     disableModules?: (ENUM_SUBMODS_NAME | unknown)[];
50 | }
51 | 


--------------------------------------------------------------------------------
/lib/segment/types.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | /**
3 |  * Created by user on 2019/6/26.
4 |  */
5 | Object.defineProperty(exports, "__esModule", { value: true });
6 | //# sourceMappingURL=types.js.map


--------------------------------------------------------------------------------
/lib/segment/types.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/6/26.
 3 |  */
 4 | 
 5 | import { IOptions as IOptionsTableDict } from '@novel-segment/table-core-abstract';
 6 | import { TableDict } from '@novel-segment/table-dict';
 7 | import { ENUM_SUBMODS_NAME } from '../mod/index';
 8 | import { IUseDefaultOptions } from '../defaults/index';
 9 | 
10 | export { IWord } from '@novel-segment/types';
11 | 
12 | export type ISPLIT = RegExp | string | {
13 | 	[Symbol.split](input: string, limit?: number): string[],
14 | };
15 | 
16 | export type ISPLIT_FILTER = RegExp | {
17 | 	test(input: string): boolean,
18 | };
19 | 
20 | export interface IDICT<T = any>
21 | {
22 | 	[key: string]: T,
23 | }
24 | 
25 | export interface IDICT2<T = any>
26 | {
27 | 	[key: number]: IDICT<T>,
28 | }
29 | 
30 | export interface IOptionsSegment extends IOptionsTableDict, IUseDefaultOptions
31 | {
32 | 	db?: TableDict[],
33 | 	optionsDoSegment?: IOptionsDoSegment,
34 | 
35 | 	maxChunkCount?: number,
36 | 	minChunkCount?: number,
37 | 
38 | 	disableModules?: (ENUM_SUBMODS_NAME | unknown)[],
39 | }
40 | 
41 | export type IDICT_SYNONYM = IDICT<string>;
42 | export type IDICT_STOPWORD = IDICT<boolean>;
43 | export type IDICT_BLACKLIST = IDICT<boolean>;
44 | 
45 | export interface IOptionsDoSegment
46 | {
47 | 	/**
48 | 	 * 不返回词性
49 | 	 */
50 | 	simple?: boolean,
51 | 
52 | 	/**
53 | 	 * 去除标点符号
54 | 	 */
55 | 	stripPunctuation?: boolean,
56 | 
57 | 	/**
58 | 	 * 转换同义词
59 | 	 */
60 | 	convertSynonym?: boolean,
61 | 
62 | 	/**
63 | 	 * 去除停止符
64 | 	 */
65 | 	stripStopword?: boolean,
66 | 
67 | 	stripSpace?: boolean,
68 | 
69 | 	disableModules?: (ENUM_SUBMODS_NAME | unknown)[],
70 | }
71 | 


--------------------------------------------------------------------------------
/lib/submod.d.ts:
--------------------------------------------------------------------------------
 1 | import * as AdjectiveOptimizer from './submod/AdjectiveOptimizer';
 2 | import * as ChsNameOptimizer from './submod/ChsNameOptimizer';
 3 | import * as ChsNameTokenizer from './submod/ChsNameTokenizer';
 4 | import * as DatetimeOptimizer from './submod/DatetimeOptimizer';
 5 | import * as DictOptimizer from './submod/DictOptimizer';
 6 | import * as DictTokenizer from './submod/DictTokenizer';
 7 | import * as EmailOptimizer from './submod/EmailOptimizer';
 8 | import * as ForeignOptimizer from './submod/ForeignOptimizer';
 9 | import * as ForeignTokenizer from './submod/ForeignTokenizer';
10 | import * as JpSimpleTokenizer from './submod/JpSimpleTokenizer';
11 | import * as PunctuationTokenizer from './submod/PunctuationTokenizer';
12 | import * as SingleTokenizer from './submod/SingleTokenizer';
13 | import * as URLTokenizer from './submod/URLTokenizer';
14 | import * as WildcardTokenizer from './submod/WildcardTokenizer';
15 | import * as ZhRadicalTokenizer from './submod/ZhRadicalTokenizer';
16 | import * as ZhtSynonymOptimizer from './submod/ZhtSynonymOptimizer';
17 | import * as ZhuyinTokenizer from './submod/ZhuyinTokenizer';
18 | export { AdjectiveOptimizer };
19 | export { ChsNameOptimizer };
20 | export { ChsNameTokenizer };
21 | export { DatetimeOptimizer };
22 | export { DictOptimizer };
23 | export { DictTokenizer };
24 | export { EmailOptimizer };
25 | export { ForeignOptimizer };
26 | export { ForeignTokenizer };
27 | export { JpSimpleTokenizer };
28 | export { PunctuationTokenizer };
29 | export { SingleTokenizer };
30 | export { URLTokenizer };
31 | export { WildcardTokenizer };
32 | export { ZhRadicalTokenizer };
33 | export { ZhtSynonymOptimizer };
34 | export { ZhuyinTokenizer };
35 | 


--------------------------------------------------------------------------------
/lib/submod.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.ZhuyinTokenizer = exports.ZhtSynonymOptimizer = exports.ZhRadicalTokenizer = exports.WildcardTokenizer = exports.URLTokenizer = exports.SingleTokenizer = exports.PunctuationTokenizer = exports.JpSimpleTokenizer = exports.ForeignTokenizer = exports.ForeignOptimizer = exports.EmailOptimizer = exports.DictTokenizer = exports.DictOptimizer = exports.DatetimeOptimizer = exports.ChsNameTokenizer = exports.ChsNameOptimizer = exports.AdjectiveOptimizer = void 0;
 4 | const tslib_1 = require("tslib");
 5 | const AdjectiveOptimizer = tslib_1.__importStar(require("./submod/AdjectiveOptimizer"));
 6 | exports.AdjectiveOptimizer = AdjectiveOptimizer;
 7 | const ChsNameOptimizer = tslib_1.__importStar(require("./submod/ChsNameOptimizer"));
 8 | exports.ChsNameOptimizer = ChsNameOptimizer;
 9 | const ChsNameTokenizer = tslib_1.__importStar(require("./submod/ChsNameTokenizer"));
10 | exports.ChsNameTokenizer = ChsNameTokenizer;
11 | const DatetimeOptimizer = tslib_1.__importStar(require("./submod/DatetimeOptimizer"));
12 | exports.DatetimeOptimizer = DatetimeOptimizer;
13 | const DictOptimizer = tslib_1.__importStar(require("./submod/DictOptimizer"));
14 | exports.DictOptimizer = DictOptimizer;
15 | const DictTokenizer = tslib_1.__importStar(require("./submod/DictTokenizer"));
16 | exports.DictTokenizer = DictTokenizer;
17 | const EmailOptimizer = tslib_1.__importStar(require("./submod/EmailOptimizer"));
18 | exports.EmailOptimizer = EmailOptimizer;
19 | const ForeignOptimizer = tslib_1.__importStar(require("./submod/ForeignOptimizer"));
20 | exports.ForeignOptimizer = ForeignOptimizer;
21 | const ForeignTokenizer = tslib_1.__importStar(require("./submod/ForeignTokenizer"));
22 | exports.ForeignTokenizer = ForeignTokenizer;
23 | const JpSimpleTokenizer = tslib_1.__importStar(require("./submod/JpSimpleTokenizer"));
24 | exports.JpSimpleTokenizer = JpSimpleTokenizer;
25 | const PunctuationTokenizer = tslib_1.__importStar(require("./submod/PunctuationTokenizer"));
26 | exports.PunctuationTokenizer = PunctuationTokenizer;
27 | const SingleTokenizer = tslib_1.__importStar(require("./submod/SingleTokenizer"));
28 | exports.SingleTokenizer = SingleTokenizer;
29 | const URLTokenizer = tslib_1.__importStar(require("./submod/URLTokenizer"));
30 | exports.URLTokenizer = URLTokenizer;
31 | const WildcardTokenizer = tslib_1.__importStar(require("./submod/WildcardTokenizer"));
32 | exports.WildcardTokenizer = WildcardTokenizer;
33 | const ZhRadicalTokenizer = tslib_1.__importStar(require("./submod/ZhRadicalTokenizer"));
34 | exports.ZhRadicalTokenizer = ZhRadicalTokenizer;
35 | const ZhtSynonymOptimizer = tslib_1.__importStar(require("./submod/ZhtSynonymOptimizer"));
36 | exports.ZhtSynonymOptimizer = ZhtSynonymOptimizer;
37 | const ZhuyinTokenizer = tslib_1.__importStar(require("./submod/ZhuyinTokenizer"));
38 | exports.ZhuyinTokenizer = ZhuyinTokenizer;
39 | //# sourceMappingURL=submod.js.map


--------------------------------------------------------------------------------
/lib/submod.ts:
--------------------------------------------------------------------------------
 1 | 
 2 | import * as AdjectiveOptimizer from './submod/AdjectiveOptimizer';
 3 | import * as ChsNameOptimizer from './submod/ChsNameOptimizer';
 4 | import * as ChsNameTokenizer from './submod/ChsNameTokenizer';
 5 | import * as DatetimeOptimizer from './submod/DatetimeOptimizer';
 6 | import * as DictOptimizer from './submod/DictOptimizer';
 7 | import * as DictTokenizer from './submod/DictTokenizer';
 8 | import * as EmailOptimizer from './submod/EmailOptimizer';
 9 | import * as ForeignOptimizer from './submod/ForeignOptimizer';
10 | import * as ForeignTokenizer from './submod/ForeignTokenizer';
11 | import * as JpSimpleTokenizer from './submod/JpSimpleTokenizer';
12 | import * as PunctuationTokenizer from './submod/PunctuationTokenizer';
13 | import * as SingleTokenizer from './submod/SingleTokenizer';
14 | import * as URLTokenizer from './submod/URLTokenizer';
15 | import * as WildcardTokenizer from './submod/WildcardTokenizer';
16 | import * as ZhRadicalTokenizer from './submod/ZhRadicalTokenizer';
17 | import * as ZhtSynonymOptimizer from './submod/ZhtSynonymOptimizer';
18 | import * as ZhuyinTokenizer from './submod/ZhuyinTokenizer';
19 | 
20 | export { AdjectiveOptimizer }
21 | export { ChsNameOptimizer }
22 | export { ChsNameTokenizer }
23 | export { DatetimeOptimizer }
24 | export { DictOptimizer }
25 | export { DictTokenizer }
26 | export { EmailOptimizer }
27 | export { ForeignOptimizer }
28 | export { ForeignTokenizer }
29 | export { JpSimpleTokenizer }
30 | export { PunctuationTokenizer }
31 | export { SingleTokenizer }
32 | export { URLTokenizer }
33 | export { WildcardTokenizer }
34 | export { ZhRadicalTokenizer }
35 | export { ZhtSynonymOptimizer }
36 | export { ZhuyinTokenizer }
37 | 


--------------------------------------------------------------------------------
/lib/submod/AdjectiveOptimizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { SubSModuleOptimizer } from '../mod';
 2 | import { IWordDebug } from '../util';
 3 | /**
 4 |  * 把一些错认为名词的词标注为形容词，或者对名词作定语的情况
 5 |  */
 6 | export declare class AdjectiveOptimizer extends SubSModuleOptimizer {
 7 |     name: string;
 8 |     doOptimize(words: IWordDebug[]): IWordDebug[];
 9 |     isNominal(pos: number | number[]): boolean;
10 | }
11 | export declare const init: typeof AdjectiveOptimizer.init;
12 | export declare const type = "optimizer";
13 | export default AdjectiveOptimizer;
14 | 


--------------------------------------------------------------------------------
/lib/submod/AdjectiveOptimizer.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.type = exports.init = exports.AdjectiveOptimizer = void 0;
 4 | const mod_1 = require("../mod");
 5 | const COLORS_1 = require("../mod/COLORS");
 6 | /**
 7 |  * 把一些错认为名词的词标注为形容词，或者对名词作定语的情况
 8 |  */
 9 | class AdjectiveOptimizer extends mod_1.SubSModuleOptimizer {
10 |     constructor() {
11 |         super(...arguments);
12 |         this.name = 'AdjectiveOptimizer';
13 |     }
14 |     doOptimize(words) {
15 |         const POSTAG = this._POSTAG;
16 |         let index = 0;
17 |         while (index < words.length) {
18 |             const word = words[index];
19 |             const nextword = words[index + 1];
20 |             if (nextword) {
21 |                 // 对于<颜色>+<的>，直接判断颜色是形容词（字典里颜色都是名词）
22 |                 if (nextword.p & POSTAG.D_U && COLORS_1.COLOR_ALL[word.w]) {
23 |                     word.op = word.op || word.p;
24 |                     word.p |= POSTAG.D_A;
25 |                     this.debugToken(word, {
26 |                         [this.name]: true,
27 |                     });
28 |                 }
29 |                 // 如果是连续的两个名词，前一个是颜色，那这个颜色也是形容词
30 |                 if (word.p & POSTAG.D_N && this.isNominal(nextword.p) && COLORS_1.COLOR_ALL[word.w]) {
31 |                     word.op = word.op || word.p;
32 |                     word.p |= POSTAG.D_A;
33 |                     word.p |= POSTAG.D_N;
34 |                     this.debugToken(word, {
35 |                         [this.name]: true,
36 |                     });
37 |                 }
38 |                 if ((word.w === '純' || word.w === '纯') && COLORS_1.COLOR_HAIR[nextword.w]) {
39 |                     word.op = word.op || word.p;
40 |                     word.p |= POSTAG.D_A;
41 |                     this.debugToken(word, {
42 |                         [this.name]: true,
43 |                     });
44 |                 }
45 |             }
46 |             // 移到下一个单词
47 |             index += 1;
48 |         }
49 |         return words;
50 |     }
51 |     isNominal(pos) {
52 |         /*
53 |         if (Array.isArray(pos))
54 |         {
55 |             return this.isNominal(pos[0]);
56 |         }
57 |         */
58 |         const POSTAG = this._POSTAG;
59 |         return (pos === POSTAG.D_N ||
60 |             pos === POSTAG.A_NT ||
61 |             pos === POSTAG.A_NX ||
62 |             pos === POSTAG.A_NZ ||
63 |             pos === POSTAG.A_NR ||
64 |             pos === POSTAG.A_NS ||
65 |             pos === POSTAG.URL);
66 |     }
67 | }
68 | exports.AdjectiveOptimizer = AdjectiveOptimizer;
69 | exports.init = AdjectiveOptimizer.init.bind(AdjectiveOptimizer);
70 | exports.type = AdjectiveOptimizer.type;
71 | exports.default = AdjectiveOptimizer;
72 | //# sourceMappingURL=AdjectiveOptimizer.js.map


--------------------------------------------------------------------------------
/lib/submod/AdjectiveOptimizer.ts:
--------------------------------------------------------------------------------
 1 | import { SubSModuleOptimizer } from '../mod';
 2 | 
 3 | import { COLOR_ALL, COLOR_HAIR } from '../mod/COLORS';
 4 | import { IWordDebug } from '../util';
 5 | 
 6 | /**
 7 |  * 把一些错认为名词的词标注为形容词，或者对名词作定语的情况
 8 |  */
 9 | export class AdjectiveOptimizer extends SubSModuleOptimizer
10 | {
11 | 	override name = 'AdjectiveOptimizer';
12 | 
13 | 	override doOptimize(words: IWordDebug[]): IWordDebug[]
14 | 	{
15 | 		const POSTAG = this._POSTAG;
16 | 		let index = 0;
17 | 		while (index < words.length)
18 | 		{
19 | 			const word = words[index];
20 | 			const nextword = words[index + 1];
21 | 			if (nextword)
22 | 			{
23 | 				// 对于<颜色>+<的>，直接判断颜色是形容词（字典里颜色都是名词）
24 | 				if (nextword.p & POSTAG.D_U && COLOR_ALL[word.w])
25 | 				{
26 | 					word.op = word.op || word.p;
27 | 					word.p |= POSTAG.D_A;
28 | 
29 | 					this.debugToken(word, {
30 | 						[this.name]: true,
31 | 					});
32 | 				}
33 | 
34 | 				// 如果是连续的两个名词，前一个是颜色，那这个颜色也是形容词
35 | 				if (word.p & POSTAG.D_N && this.isNominal(nextword.p) && COLOR_ALL[word.w])
36 | 				{
37 | 					word.op = word.op || word.p;
38 | 					word.p |= POSTAG.D_A;
39 | 					word.p |= POSTAG.D_N;
40 | 
41 | 					this.debugToken(word, {
42 | 						[this.name]: true,
43 | 					});
44 | 				}
45 | 
46 | 				if ((word.w === '純' || word.w === '纯') && COLOR_HAIR[nextword.w])
47 | 				{
48 | 					word.op = word.op || word.p;
49 | 					word.p |= POSTAG.D_A;
50 | 
51 | 					this.debugToken(word, {
52 | 						[this.name]: true,
53 | 					});
54 | 				}
55 | 			}
56 | 			// 移到下一个单词
57 | 			index += 1;
58 | 		}
59 | 		return words;
60 | 	}
61 | 
62 | 	isNominal(pos: number | number[]): boolean
63 | 	{
64 | 		/*
65 | 		if (Array.isArray(pos))
66 | 		{
67 | 			return this.isNominal(pos[0]);
68 | 		}
69 | 		*/
70 | 
71 | 		const POSTAG = this._POSTAG;
72 | 		return (
73 | 			pos === POSTAG.D_N ||
74 | 			pos === POSTAG.A_NT ||
75 | 			pos === POSTAG.A_NX ||
76 | 			pos === POSTAG.A_NZ ||
77 | 			pos === POSTAG.A_NR ||
78 | 			pos === POSTAG.A_NS ||
79 | 			pos === POSTAG.URL
80 | 		);
81 | 	}
82 | }
83 | 
84 | export const init = AdjectiveOptimizer.init.bind(AdjectiveOptimizer) as typeof AdjectiveOptimizer.init;
85 | 
86 | export const type = AdjectiveOptimizer.type;
87 | 
88 | export default AdjectiveOptimizer
89 | 


--------------------------------------------------------------------------------
/lib/submod/ChsNameOptimizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 人名优化模块
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  * @version 0.1
 6 |  */
 7 | import { SubSModuleOptimizer } from '../mod';
 8 | import { IDICT, IWord } from '../Segment';
 9 | /**
10 |  * @todo 支援 XX氏
11 |  */
12 | export declare class ChsNameOptimizer extends SubSModuleOptimizer {
13 |     protected _TABLE: IDICT<IWord>;
14 |     name: string;
15 |     _cache(): void;
16 |     isBlackList(nw: string): boolean;
17 |     isMergeable2(...words: string[]): boolean;
18 |     isMergeable(word: IWord, nextword: IWord): boolean;
19 |     /**
20 |      * 只有新詞屬於人名或未知詞時才會合併
21 |      */
22 |     validUnknownNewWord<W extends string | string[]>(ws: W, cb?: (nw: string, ew: IWord, ws: W) => IWord | boolean | void): true | IWord;
23 |     /**
24 |      * 姓
25 |      */
26 |     isFamilyName(w: string): boolean;
27 |     /**
28 |      * 双字姓名
29 |      */
30 |     isDoubleName(w1: string, w2: string): boolean;
31 |     isSingleNameRepeat(w1: string, w2: string): boolean;
32 |     /**
33 |      * 单字姓名
34 |      */
35 |     isSingleName(w1: string): boolean;
36 |     /**
37 |      * 单字姓名 不重覆
38 |      */
39 |     isSingleNameNoRepeat(w1: string): boolean;
40 |     isFirstName(w1: string, w2: string): boolean;
41 |     /**
42 |      * 对可能是人名的单词进行优化
43 |      *
44 |      * @param {array} words 单词数组
45 |      * @return {array}
46 |      */
47 |     doOptimize(words: IWord[]): IWord[];
48 | }
49 | export declare const init: typeof ChsNameOptimizer.init;
50 | export declare const type = "optimizer";
51 | export default ChsNameOptimizer;
52 | 


--------------------------------------------------------------------------------
/lib/submod/ChsNameTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { SubSModuleTokenizer } from '../mod';
 2 | import { IDICT, IWord } from '../Segment';
 3 | export declare class ChsNameTokenizer extends SubSModuleTokenizer {
 4 |     protected _TABLE: IDICT<IWord>;
 5 |     name: string;
 6 |     _cache(): void;
 7 |     /**
 8 |      * 对未识别的单词进行分词
 9 |      *
10 |      * @param {array} words 单词数组
11 |      * @return {array}
12 |      */
13 |     split(words: IWord[]): IWord[];
14 |     /**
15 |      * 匹配包含的人名，并返回相关信息
16 |      *
17 |      * @param {string} text 文本
18 |      * @param {int} cur 开始位置
19 |      * @return {array}  返回格式   {w: '人名', c: 开始位置}
20 |      */
21 |     matchName(text: string, cur?: number): IWord[];
22 | }
23 | export declare const init: typeof ChsNameTokenizer.init;
24 | export declare const type = "tokenizer";
25 | export default ChsNameTokenizer;
26 | 


--------------------------------------------------------------------------------
/lib/submod/DatetimeOptimizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 日期时间优化模块
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | import Segment, { IWord } from '../Segment';
 7 | /** 模块类型 */
 8 | export declare const type = "optimizer";
 9 | export declare let segment: Segment;
10 | /**
11 |  * 模块初始化
12 |  *
13 |  * @param {Segment} segment 分词接口
14 |  */
15 | export declare function init(_segment: any): void;
16 | /**
17 |  * 日期时间优化
18 |  *
19 |  * @param {array} words 单词数组
20 |  * @param {bool} is_not_first 是否为管理器调用的
21 |  * @return {array}
22 |  */
23 | export declare function doOptimize(words: IWord[], is_not_first?: boolean): Segment.IWord[];
24 | 


--------------------------------------------------------------------------------
/lib/submod/DatetimeOptimizer.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.segment = exports.type = void 0;
 4 | exports.init = init;
 5 | exports.doOptimize = doOptimize;
 6 | const const_1 = require("../mod/const");
 7 | /** 模块类型 */
 8 | exports.type = 'optimizer';
 9 | /**
10 |  * 模块初始化
11 |  *
12 |  * @param {Segment} segment 分词接口
13 |  */
14 | function init(_segment) {
15 |     exports.segment = _segment;
16 | }
17 | /**
18 |  * 日期时间优化
19 |  *
20 |  * @param {array} words 单词数组
21 |  * @param {bool} is_not_first 是否为管理器调用的
22 |  * @return {array}
23 |  */
24 | function doOptimize(words, is_not_first) {
25 |     if (typeof is_not_first === 'undefined') {
26 |         is_not_first = false;
27 |     }
28 |     // 合并相邻的能组成一个单词的两个词
29 |     const TABLE = exports.segment.getDict('TABLE');
30 |     const POSTAG = exports.segment.POSTAG;
31 |     let i = 0;
32 |     let ie = words.length - 1;
33 |     while (i < ie) {
34 |         let w1 = words[i];
35 |         let w2 = words[i + 1];
36 |         //debug(w1.w + ', ' + w2.w);
37 |         if ((w1.p & POSTAG.A_M) > 0) {
38 |             // =========================================
39 |             // 日期时间组合   数字 + 日期单位，如 “2005年"
40 |             if (w2.w in const_1.DATETIME) {
41 |                 let nw = w1.w + w2.w;
42 |                 let len = 2;
43 |                 let ma = [w1, w2];
44 |                 // 继续搜索后面连续的日期时间描述，必须符合  数字 + 日期单位
45 |                 while (true) {
46 |                     let w11 = words[i + len];
47 |                     let w22 = words[i + len + 1];
48 |                     if (w11 && w22 && (w11.p & POSTAG.A_M) > 0 && w22.w in const_1.DATETIME) {
49 |                         len += 2;
50 |                         nw += w11.w + w22.w;
51 |                         ma.push(w11);
52 |                         ma.push(w22);
53 |                     }
54 |                     else {
55 |                         break;
56 |                     }
57 |                 }
58 |                 words.splice(i, len, {
59 |                     w: nw,
60 |                     p: POSTAG.D_T,
61 |                     m: ma,
62 |                 });
63 |                 ie -= len - 1;
64 |                 continue;
65 |             }
66 |             // =========================================
67 |         }
68 |         // 移到下一个词
69 |         i++;
70 |     }
71 |     return words;
72 | }
73 | //# sourceMappingURL=DatetimeOptimizer.js.map


--------------------------------------------------------------------------------
/lib/submod/DatetimeOptimizer.ts:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | /**
 4 |  * 日期时间优化模块
 5 |  *
 6 |  * @author 老雷<leizongmin@gmail.com>
 7 |  */
 8 | 
 9 | import Segment, { IWord } from '../Segment';
10 | import { DATETIME } from '../mod/const';
11 | 
12 | /** 模块类型 */
13 | export const type = 'optimizer';
14 | export let segment: Segment;
15 | 
16 | /**
17 |  * 模块初始化
18 |  *
19 |  * @param {Segment} segment 分词接口
20 |  */
21 | export function init(_segment)
22 | {
23 | 	segment = _segment;
24 | }
25 | 
26 | /**
27 |  * 日期时间优化
28 |  *
29 |  * @param {array} words 单词数组
30 |  * @param {bool} is_not_first 是否为管理器调用的
31 |  * @return {array}
32 |  */
33 | export function doOptimize(words: IWord[], is_not_first?: boolean)
34 | {
35 | 	if (typeof is_not_first === 'undefined')
36 | 	{
37 | 		is_not_first = false;
38 | 	}
39 | 	// 合并相邻的能组成一个单词的两个词
40 | 	const TABLE = segment.getDict('TABLE');
41 | 	const POSTAG = segment.POSTAG;
42 | 
43 | 	let i = 0;
44 | 	let ie = words.length - 1;
45 | 	while (i < ie)
46 | 	{
47 | 		let w1 = words[i];
48 | 		let w2 = words[i + 1];
49 | 		//debug(w1.w + ', ' + w2.w);
50 | 
51 | 		if ((w1.p & POSTAG.A_M) > 0)
52 | 		{
53 | 			// =========================================
54 | 			// 日期时间组合   数字 + 日期单位，如 “2005年"
55 | 			if (w2.w in DATETIME)
56 | 			{
57 | 				let nw = w1.w + w2.w;
58 | 				let len = 2;
59 | 
60 | 				let ma = [w1, w2];
61 | 
62 | 				// 继续搜索后面连续的日期时间描述，必须符合  数字 + 日期单位
63 | 				while (true)
64 | 				{
65 | 					let w11 = words[i + len];
66 | 					let w22 = words[i + len + 1];
67 | 					if (w11 && w22 && (w11.p & POSTAG.A_M) > 0 && w22.w in DATETIME)
68 | 					{
69 | 						len += 2;
70 | 						nw += w11.w + w22.w;
71 | 
72 | 						ma.push(w11);
73 | 						ma.push(w22);
74 | 					}
75 | 					else
76 | 					{
77 | 						break;
78 | 					}
79 | 				}
80 | 				words.splice(i, len, {
81 | 					w: nw,
82 | 					p: POSTAG.D_T,
83 | 					m: ma,
84 | 				});
85 | 				ie -= len - 1;
86 | 				continue;
87 | 			}
88 | 			// =========================================
89 | 		}
90 | 
91 | 		// 移到下一个词
92 | 		i++;
93 | 	}
94 | 
95 | 	return words;
96 | }
97 | 


--------------------------------------------------------------------------------
/lib/submod/DictOptimizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { ISubOptimizerCreate, SubSModuleOptimizer } from '../mod';
 2 | import { IDICT, IWord } from '../Segment';
 3 | import { POSTAG as IPOSTAG } from '@novel-segment/postag/lib/postag/ids';
 4 | /**
 5 |  * 词典优化模块
 6 |  *
 7 |  * @author 老雷<leizongmin@gmail.com>
 8 |  */
 9 | export declare class DictOptimizer extends SubSModuleOptimizer {
10 |     protected _TABLE: IDICT<IWord>;
11 |     name: string;
12 |     _cache(): void;
13 |     isMergeable(w1: IWord, w2: IWord, { POSTAG, TABLE, nw, i, nw_cache, nw_cache_exists, }: {
14 |         POSTAG: typeof IPOSTAG;
15 |         TABLE: IDICT;
16 |         nw: string;
17 |         i: number;
18 |         nw_cache: IWord;
19 |         nw_cache_exists: boolean;
20 |     }): boolean;
21 |     _getWordCache(nw: string, nw_cache: IWord, nw_cache_exists: boolean): {
22 |         nw: string;
23 |         nw_cache: IWord;
24 |         nw_cache_exists: boolean;
25 |     };
26 |     /**
27 |      * 词典优化
28 |      *
29 |      * @param {array} words 单词数组
30 |      * @param {bool} is_not_first 是否为管理器调用的
31 |      * @return {array}
32 |      */
33 |     doOptimize(words: IWord[], is_not_first: boolean): IWord[];
34 |     /**
35 |      * 數詞 + 量詞
36 |      */
37 |     _mergeWordHowManyProp(p: number, p2: number, p3?: number): number;
38 | }
39 | export declare const init: ISubOptimizerCreate<DictOptimizer>;
40 | export declare const type = "optimizer";
41 | export default DictOptimizer;
42 | 


--------------------------------------------------------------------------------
/lib/submod/EmailOptimizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { ISubOptimizerCreate, SubSModuleOptimizer } from '../mod';
 2 | import { IDICT, IWord } from '../Segment';
 3 | /**
 4 |  * 邮箱地址中允许出现的字符
 5 |  * 参考：http://www.cs.tut.fi/~jkorpela/rfc/822addr.html
 6 |  */
 7 | export declare const _EMAILCHAR: string[];
 8 | export declare const EMAILCHAR: IDICT<number>;
 9 | /**
10 |  * 邮箱地址识别优化模块
11 |  *
12 |  * @author 老雷<leizongmin@gmail.com>
13 |  */
14 | export declare class EmailOptimizer extends SubSModuleOptimizer {
15 |     /**
16 |      * 对可能是邮箱地址的单词进行优化
17 |      *
18 |      * @param {array} words 单词数组
19 |      * @return {array}
20 |      */
21 |     doOptimize(words: any): any;
22 |     /**
23 |      * 根据一组单词生成邮箱地址
24 |      *
25 |      * @param {array} words 单词数组
26 |      * @return {string}
27 |      */
28 |     toEmailAddress(words: IWord[]): string;
29 | }
30 | export declare const init: ISubOptimizerCreate<EmailOptimizer>;
31 | export declare const type = "optimizer";
32 | export default EmailOptimizer;
33 | 


--------------------------------------------------------------------------------
/lib/submod/EmailOptimizer.ts:
--------------------------------------------------------------------------------
  1 | 'use strict';
  2 | 
  3 | import { ISubOptimizerCreate, SubSModuleOptimizer } from '../mod';
  4 | import { IDICT, IWord } from '../Segment';
  5 | 
  6 | /**
  7 |  * 邮箱地址中允许出现的字符
  8 |  * 参考：http://www.cs.tut.fi/~jkorpela/rfc/822addr.html
  9 |  */
 10 | export const _EMAILCHAR = '!"#$%&\'*+-/0123456789=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~.'.split('');
 11 | export const EMAILCHAR: IDICT<number> = {};
 12 | for (let i in _EMAILCHAR) EMAILCHAR[_EMAILCHAR[i]] = 1;
 13 | 
 14 | /**
 15 |  * 邮箱地址识别优化模块
 16 |  *
 17 |  * @author 老雷<leizongmin@gmail.com>
 18 |  */
 19 | export class EmailOptimizer extends SubSModuleOptimizer
 20 | {
 21 | 
 22 | 	/**
 23 | 	 * 对可能是邮箱地址的单词进行优化
 24 | 	 *
 25 | 	 * @param {array} words 单词数组
 26 | 	 * @return {array}
 27 | 	 */
 28 | 	override doOptimize(words)
 29 | 	{
 30 | 		const POSTAG = this.segment.POSTAG;
 31 | 		//debug(words);
 32 | 
 33 | 		let i = 0;
 34 | 		let ie = words.length - 1;
 35 | 		let addr_start: boolean | number = false;
 36 | 		let has_at = false;
 37 | 
 38 | 		while (i < ie)
 39 | 		{
 40 | 			let word = words[i];
 41 | 			let is_ascii = ((word.p === POSTAG.A_NX) ||
 42 | 				(word.p === POSTAG.A_M && word.w.charCodeAt(0) < 128))
 43 | 				? true : false;
 44 | 
 45 | 			// 如果是外文字符或者数字，符合电子邮件地址开头的条件
 46 | 			// @ts-ignore
 47 | 			if (addr_start === false && is_ascii)
 48 | 			{
 49 | 				addr_start = i;
 50 | 				i++;
 51 | 				continue;
 52 | 			}
 53 | 			else
 54 | 			{
 55 | 				// 如果遇到@符号，符合第二个条件
 56 | 				if (has_at === false && word.w === '@')
 57 | 				{
 58 | 					has_at = true;
 59 | 					i++;
 60 | 					continue;
 61 | 				}
 62 | 				// 如果已经遇到过@符号，且出现了其他字符，则截取邮箱地址
 63 | 				if (has_at !== false && words[i - 1].w !== '@' && is_ascii === false && !(word.w in EMAILCHAR))
 64 | 				{
 65 | 					let mailws = words.slice(addr_start, i);
 66 | 					//debug(toEmailAddress(mailws));
 67 | 					words.splice(addr_start, mailws.length, {
 68 | 						w: this.toEmailAddress(mailws),
 69 | 						p: POSTAG.URL
 70 | 					});
 71 | 					i = <number>addr_start + 1;
 72 | 					ie -= mailws.length - 1;
 73 | 					addr_start = false;
 74 | 					has_at = false;
 75 | 					continue;
 76 | 				}
 77 | 				// 如果已经开头
 78 | 				if (addr_start !== false && (is_ascii || word.w in EMAILCHAR))
 79 | 				{
 80 | 					i++;
 81 | 					continue;
 82 | 				}
 83 | 			}
 84 | 
 85 | 			// 移到下一个词
 86 | 			addr_start = false;
 87 | 			has_at = false;
 88 | 			i++;
 89 | 		}
 90 | 
 91 | 		// 检查剩余部分
 92 | 		if (addr_start && has_at && words[ie])
 93 | 		{
 94 | 			let word = words[ie];
 95 | 			let is_ascii = ((word.p === POSTAG.A_NX) ||
 96 | 				(word.p === POSTAG.A_M && word.w in EMAILCHAR))
 97 | 				? true : false;
 98 | 			if (is_ascii)
 99 | 			{
100 | 				let mailws = words.slice(addr_start, words.length);
101 | 				//debug(toEmailAddress(mailws));
102 | 				words.splice(addr_start, mailws.length, {
103 | 					w: this.toEmailAddress(mailws),
104 | 					p: POSTAG.URL
105 | 				});
106 | 			}
107 | 		}
108 | 
109 | 		return words;
110 | 	}
111 | 
112 | 	/**
113 | 	 * 根据一组单词生成邮箱地址
114 | 	 *
115 | 	 * @param {array} words 单词数组
116 | 	 * @return {string}
117 | 	 */
118 | 	toEmailAddress(words: IWord[])
119 | 	{
120 | 		let ret = words[0].w;
121 | 		for (let i = 1, word; word = words[i]; i++)
122 | 		{
123 | 			ret += word.w;
124 | 		}
125 | 		return ret;
126 | 	}
127 | 
128 | }
129 | 
130 | export const init = EmailOptimizer.init.bind(EmailOptimizer) as ISubOptimizerCreate<EmailOptimizer>;
131 | 
132 | export const type = EmailOptimizer.type;
133 | 
134 | export default EmailOptimizer;
135 | 


--------------------------------------------------------------------------------
/lib/submod/ForeignOptimizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/8/18/018.
 3 |  */
 4 | import { SubSModuleOptimizer } from '../mod';
 5 | import { IDICT, IWord } from '../Segment';
 6 | import { IWordDebug } from '../util';
 7 | export declare class ForeignOptimizer extends SubSModuleOptimizer {
 8 |     name: string;
 9 |     protected _TABLE: IDICT<IWord>;
10 |     _cache(): void;
11 |     doOptimize<T extends IWordDebug>(words: T[]): T[];
12 | }
13 | export declare const init: typeof ForeignOptimizer.init;
14 | export declare const type = "optimizer";
15 | export default ForeignOptimizer;
16 | 


--------------------------------------------------------------------------------
/lib/submod/ForeignOptimizer.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/8/18/018.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.type = exports.init = exports.ForeignOptimizer = void 0;
 7 | const mod_1 = require("../mod");
 8 | class ForeignOptimizer extends mod_1.SubSModuleOptimizer {
 9 |     constructor() {
10 |         super(...arguments);
11 |         this.name = 'ForeignOptimizer';
12 |     }
13 |     _cache() {
14 |         super._cache();
15 |         this._TABLE = this.segment.getDict('TABLE');
16 |         this._POSTAG = this.segment.POSTAG;
17 |     }
18 |     doOptimize(words) {
19 |         const self = this;
20 |         const POSTAG = this._POSTAG;
21 |         const TABLE = this._TABLE;
22 |         let i = 0;
23 |         let len = words.length - 1;
24 |         while (i < len) {
25 |             let w0 = words[i - 1];
26 |             let w1 = words[i];
27 |             let w2 = words[i + 1];
28 |             if (!(w1.p === POSTAG.A_NX)) {
29 |                 i++;
30 |                 continue;
31 |             }
32 |             if (w2) {
33 |                 let nw = w1.w + w2.w;
34 |                 let mw = TABLE[nw];
35 |                 if (mw) {
36 |                     let new_w = self.debugToken({
37 |                         ...mw,
38 |                         w: nw,
39 |                         m: [w1, w2],
40 |                     }, {
41 |                         [this.name]: 1,
42 |                     }, true);
43 |                     this.sliceToken(words, i, 2, new_w);
44 |                     len--;
45 |                     continue;
46 |                 }
47 |             }
48 |             i++;
49 |         }
50 |         return words;
51 |     }
52 | }
53 | exports.ForeignOptimizer = ForeignOptimizer;
54 | exports.init = ForeignOptimizer.init.bind(ForeignOptimizer);
55 | exports.type = ForeignOptimizer.type;
56 | exports.default = ForeignOptimizer;
57 | //# sourceMappingURL=ForeignOptimizer.js.map


--------------------------------------------------------------------------------
/lib/submod/ForeignOptimizer.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/8/18/018.
 3 |  */
 4 | 
 5 | import { SubSModuleOptimizer } from '../mod';
 6 | import { IDICT, IWord } from '../Segment';
 7 | import { IWordDebug } from '../util';
 8 | 
 9 | export class ForeignOptimizer extends SubSModuleOptimizer
10 | {
11 | 	override name = 'ForeignOptimizer';
12 | 
13 | 	protected override _TABLE: IDICT<IWord>;
14 | 
15 | 	override _cache()
16 | 	{
17 | 		super._cache();
18 | 
19 | 		this._TABLE = this.segment.getDict('TABLE');
20 | 		this._POSTAG = this.segment.POSTAG;
21 | 	}
22 | 
23 | 	override doOptimize<T extends IWordDebug>(words: T[]): T[]
24 | 	{
25 | 		const self = this;
26 | 		const POSTAG = this._POSTAG;
27 | 		const TABLE = this._TABLE;
28 | 
29 | 		let i = 0;
30 | 		let len = words.length - 1;
31 | 
32 | 		while (i < len)
33 | 		{
34 | 			let w0: IWordDebug = words[i - 1];
35 | 			let w1: IWordDebug = words[i];
36 | 			let w2: IWordDebug = words[i + 1];
37 | 
38 | 			if (!(w1.p === POSTAG.A_NX))
39 | 			{
40 | 				i++;
41 | 				continue;
42 | 			}
43 | 
44 | 			if (w2)
45 | 			{
46 | 				let nw: string = w1.w + w2.w;
47 | 				let mw: IWordDebug = TABLE[nw];
48 | 
49 | 				if (mw)
50 | 				{
51 | 					let new_w: IWordDebug = self.debugToken({
52 | 						...mw,
53 | 						w: nw,
54 | 						m: [w1, w2],
55 | 					}, {
56 | 						[this.name]: 1,
57 | 					}, true);
58 | 
59 | 					this.sliceToken(words, i, 2, new_w);
60 | 
61 | 					len--;
62 | 					continue;
63 | 				}
64 | 			}
65 | 
66 | 			i++;
67 | 		}
68 | 
69 | 		return words;
70 | 	}
71 | }
72 | 
73 | export const init = ForeignOptimizer.init.bind(ForeignOptimizer) as typeof ForeignOptimizer.init;
74 | 
75 | export const type = ForeignOptimizer.type;
76 | 
77 | export default ForeignOptimizer;
78 | 


--------------------------------------------------------------------------------
/lib/submod/ForeignTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 外文字符、数字识别模块
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | import { ISubTokenizerCreate, SubSModuleTokenizer } from '../mod';
 7 | import { IWord } from '../Segment';
 8 | import { IWordDebugInfo } from '../util/index';
 9 | export declare class ForeignTokenizer extends SubSModuleTokenizer {
10 |     name: string;
11 |     /**
12 |      * 分詞用(包含中文)
13 |      */
14 |     _REGEXP_SPLIT_1: RegExp;
15 |     /**
16 |      * 分詞用(不包含中文的全詞符合)
17 |      */
18 |     _REGEXP_SPLIT_2: RegExp;
19 |     _cache(): void;
20 |     /**
21 |      * 对未识别的单词进行分词
22 |      *
23 |      * @param {array} words 单词数组
24 |      * @return {array}
25 |      */
26 |     split(words: IWord[]): IWord[];
27 |     /**
28 |      * 支援更多外文判定(但可能會降低效率)
29 |      *
30 |      * 並且避免誤切割 例如 latīna Русский
31 |      */
32 |     splitForeign2(text: string, cur?: number): IWord[];
33 |     /**
34 |      * 匹配包含的英文字符和数字，并分割
35 |      *
36 |      * @param {string} text 文本
37 |      * @param {int} cur 开始位置
38 |      * @return {array}  返回格式   {w: '单词', c: 开始位置}
39 |      */
40 |     splitForeign(text: string, cur?: number): IWord[];
41 |     createForeignToken(word: IWord, lasttype?: number, attr?: IWordDebugInfo): IWord;
42 | }
43 | export declare const init: ISubTokenizerCreate<ForeignTokenizer>;
44 | export declare const type = "tokenizer";
45 | export default ForeignTokenizer;
46 | 


--------------------------------------------------------------------------------
/lib/submod/JpSimpleTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/19/019.
 3 |  */
 4 | import { SubSModuleTokenizer } from '../mod';
 5 | import { IWord } from '../Segment';
 6 | import { IWordDebug } from '../util';
 7 | export declare const enum EnumJpSimpleTokenizerType {
 8 |     /**
 9 |      * 平仮名
10 |      * https://en.wikipedia.org/wiki/Hiragana
11 |      */
12 |     HIRAGANA = 1,
13 |     /**
14 |      * 片仮名
15 |      * https://en.wikipedia.org/wiki/Katakana
16 |      */
17 |     KATAKANA = 2
18 | }
19 | export declare class JpSimpleTokenizer extends SubSModuleTokenizer {
20 |     static NAME: "JpSimpleTokenizer";
21 |     name: "JpSimpleTokenizer";
22 |     split(words: IWord[], ...argv: any[]): IWord[];
23 |     protected createJpSimpleToken<T extends IWordDebug>(data: T, type: EnumJpSimpleTokenizerType): T;
24 |     protected _splitText(text: string): IWord[];
25 | }
26 | export declare const init: typeof JpSimpleTokenizer.init;
27 | export declare const type = "tokenizer";
28 | export default JpSimpleTokenizer;
29 | 


--------------------------------------------------------------------------------
/lib/submod/JpSimpleTokenizer.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/4/19/019.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.type = exports.init = exports.JpSimpleTokenizer = exports.EnumJpSimpleTokenizerType = void 0;
 7 | const mod_1 = require("../mod");
 8 | var EnumJpSimpleTokenizerType;
 9 | (function (EnumJpSimpleTokenizerType) {
10 |     /**
11 |      * 平仮名
12 |      * https://en.wikipedia.org/wiki/Hiragana
13 |      */
14 |     EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["HIRAGANA"] = 1] = "HIRAGANA";
15 |     /**
16 |      * 片仮名
17 |      * https://en.wikipedia.org/wiki/Katakana
18 |      */
19 |     EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["KATAKANA"] = 2] = "KATAKANA";
20 | })(EnumJpSimpleTokenizerType || (exports.EnumJpSimpleTokenizerType = EnumJpSimpleTokenizerType = {}));
21 | class JpSimpleTokenizer extends mod_1.SubSModuleTokenizer {
22 |     constructor() {
23 |         super(...arguments);
24 |         this.name = 'JpSimpleTokenizer';
25 |     }
26 |     split(words, ...argv) {
27 |         return this._splitUnset(words, this._splitText);
28 |     }
29 |     createJpSimpleToken(data, type) {
30 |         return super.debugToken(data, {
31 |             [this.name]: type,
32 |         }, true);
33 |     }
34 |     _splitText(text) {
35 |         //const POSTAG = this.segment.POSTAG;
36 |         let self = this;
37 |         let b1 = /[ぁ-ん]/.test(text);
38 |         let b2 = /[ァ-ヴーｱ-ﾝﾞｰ]/.test(text);
39 |         if (b1 === false || b2 === false) {
40 |             if (b1 === true && /^[ぁ-ん]+$/.test(text) || b2 === true && /^[ァ-ヴーｱ-ﾝﾞｰ]+$/.test(text)) {
41 |                 return [self.createJpSimpleToken({
42 |                         w: text,
43 |                     }, b1 ? 1 /* EnumJpSimpleTokenizerType.HIRAGANA */ : 2 /* EnumJpSimpleTokenizerType.KATAKANA */)];
44 |             }
45 |             return null;
46 |         }
47 |         let ret = [];
48 |         text
49 |             .split(/((?:[^ァ-ヴーｱ-ﾝﾞｰ]+)?[ぁ-ん]+(?=[ァ-ヴーｱ-ﾝﾞｰ])|(?:[^ぁ-ん]+)?[ァ-ヴーｱ-ﾝﾞｰ]+(?=[ぁ-ん]))/)
50 |             .forEach(function (w, i) {
51 |             if (w !== '') {
52 |                 ret.push(self.createJpSimpleToken({
53 |                     w,
54 |                 }, /[ぁ-ん]/.test(w) ? 1 /* EnumJpSimpleTokenizerType.HIRAGANA */
55 |                     : 2 /* EnumJpSimpleTokenizerType.KATAKANA */));
56 |             }
57 |         });
58 |         return ret;
59 |     }
60 | }
61 | exports.JpSimpleTokenizer = JpSimpleTokenizer;
62 | JpSimpleTokenizer.NAME = 'JpSimpleTokenizer';
63 | exports.init = JpSimpleTokenizer.init.bind(JpSimpleTokenizer);
64 | exports.type = JpSimpleTokenizer.type;
65 | exports.default = JpSimpleTokenizer;
66 | //# sourceMappingURL=JpSimpleTokenizer.js.map


--------------------------------------------------------------------------------
/lib/submod/JpSimpleTokenizer.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/19/019.
 3 |  */
 4 | 
 5 | import { SubSModuleTokenizer } from '../mod';
 6 | import { IWord } from '../Segment';
 7 | import { IWordDebug } from '../util';
 8 | 
 9 | export const enum EnumJpSimpleTokenizerType
10 | {
11 | 	/**
12 | 	 * 平仮名
13 | 	 * https://en.wikipedia.org/wiki/Hiragana
14 | 	 */
15 | 	HIRAGANA = 0x1,
16 | 	/**
17 | 	 * 片仮名
18 | 	 * https://en.wikipedia.org/wiki/Katakana
19 | 	 */
20 | 	KATAKANA = 0x2,
21 | }
22 | 
23 | export class JpSimpleTokenizer extends SubSModuleTokenizer
24 | {
25 | 	static override NAME = 'JpSimpleTokenizer' as const;
26 | 
27 | 	override name = 'JpSimpleTokenizer' as const;
28 | 
29 | 	split(words: IWord[], ...argv): IWord[]
30 | 	{
31 | 		return this._splitUnset(words, this._splitText);
32 | 	}
33 | 
34 | 	protected createJpSimpleToken<T extends IWordDebug>(data: T, type: EnumJpSimpleTokenizerType)
35 | 	{
36 | 		return super.debugToken(data, {
37 | 			[this.name]: type,
38 | 		}, true);
39 | 	}
40 | 
41 | 	protected _splitText(text: string): IWord[]
42 | 	{
43 | 		//const POSTAG = this.segment.POSTAG;
44 | 
45 | 		let self = this;
46 | 
47 | 		let b1 = /[ぁ-ん]/.test(text);
48 | 		let b2 = /[ァ-ヴーｱ-ﾝﾞｰ]/.test(text);
49 | 
50 | 		if (b1 === false || b2 === false)
51 | 		{
52 | 			if (b1 === true && /^[ぁ-ん]+$/.test(text) || b2 === true && /^[ァ-ヴーｱ-ﾝﾞｰ]+$/.test(text))
53 | 			{
54 | 				return [self.createJpSimpleToken({
55 | 					w: text,
56 | 				}, b1 ? EnumJpSimpleTokenizerType.HIRAGANA : EnumJpSimpleTokenizerType.KATAKANA
57 | 				)];
58 | 			}
59 | 
60 | 			return null;
61 | 		}
62 | 
63 | 		let ret: IWord[] = [];
64 | 
65 | 		text
66 | 			.split(/((?:[^ァ-ヴーｱ-ﾝﾞｰ]+)?[ぁ-ん]+(?=[ァ-ヴーｱ-ﾝﾞｰ])|(?:[^ぁ-ん]+)?[ァ-ヴーｱ-ﾝﾞｰ]+(?=[ぁ-ん]))/)
67 | 			.forEach(function (w, i)
68 | 			{
69 | 				if (w !== '')
70 | 				{
71 | 					ret.push(self.createJpSimpleToken({
72 | 						w,
73 | 					}, /[ぁ-ん]/.test(w) ? EnumJpSimpleTokenizerType.HIRAGANA
74 | 							: EnumJpSimpleTokenizerType.KATAKANA
75 | 					));
76 | 				}
77 | 			})
78 | 
79 | 		;
80 | 
81 | 		return ret;
82 | 	}
83 | 
84 | }
85 | 
86 | export const init = JpSimpleTokenizer.init.bind(JpSimpleTokenizer) as typeof JpSimpleTokenizer.init;
87 | 
88 | export const type = JpSimpleTokenizer.type;
89 | 
90 | export default JpSimpleTokenizer;
91 | 
92 | 


--------------------------------------------------------------------------------
/lib/submod/PunctuationTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 标点符号识别模块
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | import { SubSModuleTokenizer } from '../mod';
 7 | import { IWord } from '../Segment';
 8 | export declare class PunctuationTokenizer extends SubSModuleTokenizer {
 9 |     name: string;
10 |     _STOPWORD: string[];
11 |     STOPWORD: {
12 |         [key: string]: number;
13 |     };
14 |     STOPWORD2: {
15 |         [key: number]: {
16 |             [key: string]: number;
17 |         };
18 |     };
19 |     /**
20 |      * 对未识别的单词进行分词
21 |      *
22 |      * @param {array} words 单词数组
23 |      * @return {array}
24 |      */
25 |     split(words: IWord[]): IWord[];
26 |     /**
27 |      * 匹配包含的标点符号，返回相关信息
28 |      *
29 |      * @param {string} text 文本
30 |      * @param {int} cur 开始位置
31 |      * @return {array}  返回格式   {w: '网址', c: 开始位置}
32 |      */
33 |     matchStopword(text: string, cur?: number): IWord[];
34 | }
35 | export declare const init: typeof PunctuationTokenizer.init;
36 | export declare const type = "tokenizer";
37 | export default PunctuationTokenizer;
38 | 


--------------------------------------------------------------------------------
/lib/submod/PunctuationTokenizer.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.type = exports.init = exports.PunctuationTokenizer = void 0;
 4 | /**
 5 |  * 标点符号识别模块
 6 |  *
 7 |  * @author 老雷<leizongmin@gmail.com>
 8 |  */
 9 | const mod_1 = require("../mod");
10 | const STOPWORD_1 = require("../mod/data/STOPWORD");
11 | class PunctuationTokenizer extends mod_1.SubSModuleTokenizer {
12 |     constructor() {
13 |         super(...arguments);
14 |         this.name = 'PunctuationTokenizer';
15 |         this._STOPWORD = STOPWORD_1._STOPWORD;
16 |         this.STOPWORD = STOPWORD_1.STOPWORD;
17 |         this.STOPWORD2 = STOPWORD_1.STOPWORD2;
18 |     }
19 |     /**
20 |      * 对未识别的单词进行分词
21 |      *
22 |      * @param {array} words 单词数组
23 |      * @return {array}
24 |      */
25 |     split(words) {
26 |         const POSTAG = this._POSTAG;
27 |         const self = this;
28 |         let ret = [];
29 |         for (let i = 0, word; word = words[i]; i++) {
30 |             if (word.p > 0) {
31 |                 ret.push(word);
32 |                 continue;
33 |             }
34 |             // 仅对未识别的词进行匹配
35 |             let stopinfo = self.matchStopword(word.w);
36 |             if (stopinfo.length < 1) {
37 |                 ret.push(word);
38 |                 continue;
39 |             }
40 |             // 分离出标点符号
41 |             let lastc = 0;
42 |             for (let ui = 0, sw; sw = stopinfo[ui]; ui++) {
43 |                 if (sw.c > lastc) {
44 |                     ret.push({
45 |                         w: word.w.substr(lastc, sw.c - lastc)
46 |                     });
47 |                 }
48 |                 ret.push(self.debugToken({
49 |                     w: sw.w,
50 |                     p: POSTAG.D_W
51 |                 }, {
52 |                     [self.name]: true,
53 |                 }, true));
54 |                 lastc = sw.c + sw.w.length;
55 |             }
56 |             let lastsw = stopinfo[stopinfo.length - 1];
57 |             if (lastsw.c + lastsw.w.length < word.w.length) {
58 |                 ret.push({
59 |                     w: word.w.substr(lastsw.c + lastsw.w.length)
60 |                 });
61 |             }
62 |         }
63 |         return ret;
64 |     }
65 |     /**
66 |      * 匹配包含的标点符号，返回相关信息
67 |      *
68 |      * @param {string} text 文本
69 |      * @param {int} cur 开始位置
70 |      * @return {array}  返回格式   {w: '网址', c: 开始位置}
71 |      */
72 |     matchStopword(text, cur) {
73 |         const STOPWORD2 = this.STOPWORD2;
74 |         if (isNaN(cur))
75 |             cur = 0;
76 |         let ret = [];
77 |         let isMatch = false;
78 |         while (cur < text.length) {
79 |             let w;
80 |             for (let i in STOPWORD2) {
81 |                 w = text.substr(cur, i);
82 |                 if (w in STOPWORD2[i]) {
83 |                     ret.push({ w: w, c: cur });
84 |                     isMatch = true;
85 |                     break;
86 |                 }
87 |             }
88 |             cur += isMatch === false ? 1 : w.length;
89 |             isMatch = false;
90 |         }
91 |         return ret;
92 |     }
93 | }
94 | exports.PunctuationTokenizer = PunctuationTokenizer;
95 | // debug(STOPWORD2);
96 | exports.init = PunctuationTokenizer.init.bind(PunctuationTokenizer);
97 | exports.type = PunctuationTokenizer.type;
98 | exports.default = PunctuationTokenizer;
99 | //# sourceMappingURL=PunctuationTokenizer.js.map


--------------------------------------------------------------------------------
/lib/submod/PunctuationTokenizer.ts:
--------------------------------------------------------------------------------
  1 | 'use strict';
  2 | 
  3 | /**
  4 |  * 标点符号识别模块
  5 |  *
  6 |  * @author 老雷<leizongmin@gmail.com>
  7 |  */
  8 | 
  9 | import { SubSModuleTokenizer } from '../mod';
 10 | import { IWord } from '../Segment';
 11 | import { _STOPWORD, STOPWORD, STOPWORD2 } from '../mod/data/STOPWORD';
 12 | 
 13 | export class PunctuationTokenizer extends SubSModuleTokenizer
 14 | {
 15 | 	override name = 'PunctuationTokenizer';
 16 | 
 17 | 	public _STOPWORD = _STOPWORD;
 18 | 	public STOPWORD = STOPWORD;
 19 | 	public STOPWORD2 = STOPWORD2;
 20 | 
 21 | 	/**
 22 | 	 * 对未识别的单词进行分词
 23 | 	 *
 24 | 	 * @param {array} words 单词数组
 25 | 	 * @return {array}
 26 | 	 */
 27 | 	split(words: IWord[]): IWord[]
 28 | 	{
 29 | 		const POSTAG = this._POSTAG;
 30 | 		const self = this;
 31 | 
 32 | 		let ret = [];
 33 | 		for (let i = 0, word; word = words[i]; i++)
 34 | 		{
 35 | 			if (word.p > 0)
 36 | 			{
 37 | 				ret.push(word);
 38 | 				continue;
 39 | 			}
 40 | 			// 仅对未识别的词进行匹配
 41 | 			let stopinfo = self.matchStopword(word.w);
 42 | 			if (stopinfo.length < 1)
 43 | 			{
 44 | 				ret.push(word);
 45 | 				continue;
 46 | 			}
 47 | 			// 分离出标点符号
 48 | 			let lastc = 0;
 49 | 			for (let ui = 0, sw; sw = stopinfo[ui]; ui++)
 50 | 			{
 51 | 				if (sw.c > lastc)
 52 | 				{
 53 | 					ret.push({
 54 | 						w: word.w.substr(lastc, sw.c - lastc)
 55 | 					});
 56 | 				}
 57 | 
 58 | 				ret.push(self.debugToken({
 59 | 					w: sw.w,
 60 | 					p: POSTAG.D_W
 61 | 				}, {
 62 | 					[self.name]: true,
 63 | 				}, true));
 64 | 
 65 | 				lastc = sw.c + sw.w.length;
 66 | 			}
 67 | 			let lastsw = stopinfo[stopinfo.length - 1];
 68 | 			if (lastsw.c + lastsw.w.length < word.w.length)
 69 | 			{
 70 | 				ret.push({
 71 | 					w: word.w.substr(lastsw.c + lastsw.w.length)
 72 | 				});
 73 | 			}
 74 | 		}
 75 | 		return ret;
 76 | 	}
 77 | 
 78 | 	/**
 79 | 	 * 匹配包含的标点符号，返回相关信息
 80 | 	 *
 81 | 	 * @param {string} text 文本
 82 | 	 * @param {int} cur 开始位置
 83 | 	 * @return {array}  返回格式   {w: '网址', c: 开始位置}
 84 | 	 */
 85 | 	matchStopword(text: string, cur?: number): IWord[]
 86 | 	{
 87 | 		const STOPWORD2 = this.STOPWORD2;
 88 | 
 89 | 		if (isNaN(cur)) cur = 0;
 90 | 		let ret = [];
 91 | 		let isMatch = false;
 92 | 		while (cur < text.length)
 93 | 		{
 94 | 			let w;
 95 | 			for (let i in STOPWORD2)
 96 | 			{
 97 | 				w = text.substr(cur, i as any as number);
 98 | 				if (w in STOPWORD2[i])
 99 | 				{
100 | 					ret.push({ w: w, c: cur });
101 | 					isMatch = true;
102 | 					break;
103 | 				}
104 | 			}
105 | 			cur += isMatch === false ? 1 : w.length;
106 | 			isMatch = false;
107 | 		}
108 | 
109 | 		return ret;
110 | 	}
111 | }
112 | 
113 | // debug(STOPWORD2);
114 | 
115 | export const init = PunctuationTokenizer.init.bind(PunctuationTokenizer) as typeof PunctuationTokenizer.init;
116 | 
117 | export const type = PunctuationTokenizer.type;
118 | 
119 | export default PunctuationTokenizer;
120 | 


--------------------------------------------------------------------------------
/lib/submod/SingleTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { SubSModuleTokenizer } from '../mod';
 2 | import { IWord } from '../Segment';
 3 | /**
 4 |  * 单字切分模块
 5 |  * 此模組不包含模組列表內 需要手動指定
 6 |  *
 7 |  * @author 老雷<leizongmin@gmail.com>
 8 |  */
 9 | export declare class SingleTokenizer extends SubSModuleTokenizer {
10 |     /**
11 |      * 对未识别的单词进行分词
12 |      *
13 |      * @param {array} words 单词数组
14 |      * @return {array}
15 |      */
16 |     split(words: IWord[]): IWord[];
17 |     /**
18 |      * 单字切分
19 |      *
20 |      * @param {string} text 要切分的文本
21 |      * @param {int} cur 开始位置
22 |      * @return {array}
23 |      */
24 |     splitSingle(text: any, cur?: number): IWord[];
25 | }
26 | export declare const init: typeof SingleTokenizer.init;
27 | export declare const type = "tokenizer";
28 | export default SingleTokenizer;
29 | 


--------------------------------------------------------------------------------
/lib/submod/SingleTokenizer.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.type = exports.init = exports.SingleTokenizer = void 0;
 4 | const tslib_1 = require("tslib");
 5 | const mod_1 = require("../mod");
 6 | const uni_string_1 = tslib_1.__importDefault(require("uni-string"));
 7 | /**
 8 |  * 单字切分模块
 9 |  * 此模組不包含模組列表內 需要手動指定
10 |  *
11 |  * @author 老雷<leizongmin@gmail.com>
12 |  */
13 | class SingleTokenizer extends mod_1.SubSModuleTokenizer {
14 |     /**
15 |      * 对未识别的单词进行分词
16 |      *
17 |      * @param {array} words 单词数组
18 |      * @return {array}
19 |      */
20 |     split(words) {
21 |         const POSTAG = this.segment.POSTAG;
22 |         let ret = [];
23 |         for (let i = 0, word; word = words[i]; i++) {
24 |             if (typeof word.p === 'undefined' || word.p) {
25 |                 ret.push(word);
26 |             }
27 |             else {
28 |                 // 仅对未识别的词进行匹配
29 |                 ret = ret.concat(this.splitSingle(word.w));
30 |             }
31 |         }
32 |         return ret;
33 |     }
34 |     /**
35 |      * 单字切分
36 |      *
37 |      * @param {string} text 要切分的文本
38 |      * @param {int} cur 开始位置
39 |      * @return {array}
40 |      */
41 |     splitSingle(text, cur) {
42 |         const POSTAG = this.segment.POSTAG;
43 |         if (isNaN(cur))
44 |             cur = 0;
45 |         if (cur > 0) {
46 |             text = text.slice(cur);
47 |         }
48 |         let ret = [];
49 |         uni_string_1.default
50 |             .split(text, '')
51 |             .forEach(function (w, i) {
52 |             ret.push({
53 |                 w,
54 |                 p: POSTAG.UNK,
55 |             });
56 |         });
57 |         return ret;
58 |     }
59 | }
60 | exports.SingleTokenizer = SingleTokenizer;
61 | exports.init = SingleTokenizer.init.bind(SingleTokenizer);
62 | exports.type = SingleTokenizer.type;
63 | exports.default = SingleTokenizer;
64 | //# sourceMappingURL=SingleTokenizer.js.map


--------------------------------------------------------------------------------
/lib/submod/SingleTokenizer.ts:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | import { SubSModuleTokenizer } from '../mod';
 4 | import { IWord } from '../Segment';
 5 | import UString from 'uni-string';
 6 | 
 7 | /**
 8 |  * 单字切分模块
 9 |  * 此模組不包含模組列表內 需要手動指定
10 |  *
11 |  * @author 老雷<leizongmin@gmail.com>
12 |  */
13 | export class SingleTokenizer extends SubSModuleTokenizer
14 | {
15 | 
16 | 	/**
17 | 	 * 对未识别的单词进行分词
18 | 	 *
19 | 	 * @param {array} words 单词数组
20 | 	 * @return {array}
21 | 	 */
22 | 	split(words: IWord[]): IWord[]
23 | 	{
24 | 		const POSTAG = this.segment.POSTAG;
25 | 
26 | 		let ret = [];
27 | 		for (let i = 0, word; word = words[i]; i++)
28 | 		{
29 | 			if (typeof word.p === 'undefined' || word.p)
30 | 			{
31 | 				ret.push(word);
32 | 			}
33 | 			else
34 | 			{
35 | 				// 仅对未识别的词进行匹配
36 | 				ret = ret.concat(this.splitSingle(word.w));
37 | 			}
38 | 		}
39 | 		return ret;
40 | 	}
41 | 
42 | 	/**
43 | 	 * 单字切分
44 | 	 *
45 | 	 * @param {string} text 要切分的文本
46 | 	 * @param {int} cur 开始位置
47 | 	 * @return {array}
48 | 	 */
49 | 	splitSingle(text, cur?: number): IWord[]
50 | 	{
51 | 		const POSTAG = this.segment.POSTAG;
52 | 
53 | 		if (isNaN(cur)) cur = 0;
54 | 
55 | 		if (cur > 0)
56 | 		{
57 | 			text = text.slice(cur);
58 | 		}
59 | 
60 | 		let ret: IWord[] = [];
61 | 
62 | 		UString
63 | 			.split(text, '')
64 | 			.forEach(function (w, i)
65 | 			{
66 | 				ret.push({
67 | 					w,
68 | 					p: POSTAG.UNK,
69 | 				});
70 | 			})
71 | 		;
72 | 
73 | 		return ret;
74 | 	}
75 | }
76 | 
77 | export const init = SingleTokenizer.init.bind(SingleTokenizer) as typeof SingleTokenizer.init;
78 | 
79 | export const type = SingleTokenizer.type;
80 | 
81 | export default SingleTokenizer;
82 | 


--------------------------------------------------------------------------------
/lib/submod/URLTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | import Segment, { IWord } from '../Segment';
 2 | /**
 3 |  * URL识别模块
 4 |  *
 5 |  * @author 老雷<leizongmin@gmail.com>
 6 |  */
 7 | /**
 8 |  * 模块类型
 9 |  * */
10 | export declare const type = "tokenizer";
11 | export declare let segment: Segment;
12 | /**
13 |  * 模块初始化
14 |  *
15 |  * @param {Segment} segment 分词接口
16 |  */
17 | export declare function init(_segment: Segment): void;
18 | /**
19 |  * 对未识别的单词进行分词
20 |  *
21 |  * @param {array} words 单词数组
22 |  * @return {array}
23 |  */
24 | export declare function split(words: IWord[]): IWord[];
25 | /**
26 |  * 匹配包含的网址，返回相关信息
27 |  *
28 |  * @param {string} text 文本
29 |  * @param {int} cur 开始位置
30 |  * @return {array}  返回格式   {w: '网址', c: 开始位置}
31 |  */
32 | export declare function matchURL(text: string, cur?: number): any[];
33 | 


--------------------------------------------------------------------------------
/lib/submod/WildcardTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 通配符识别模块
 3 |  *
 4 |  * @author 老雷<leizongmin@gmail.com>
 5 |  */
 6 | import { ISubTokenizerCreate, SubSModuleTokenizer } from '../mod';
 7 | import { IDICT, IDICT2, IWord } from '../Segment';
 8 | import { IWordDebugInfo } from '../util/index';
 9 | export declare class WildcardTokenizer extends SubSModuleTokenizer {
10 |     name: string;
11 |     protected _TABLE: IDICT<IWord>;
12 |     protected _TABLE2: IDICT2<IWord>;
13 |     _cache(): void;
14 |     /**
15 |      * 对未识别的单词进行分词
16 |      *
17 |      * @param {array} words 单词数组
18 |      * @return {array}
19 |      */
20 |     split(words: IWord[]): IWord[];
21 |     createWildcardToken(word: IWord, lasttype?: number, attr?: IWordDebugInfo): IWord;
22 |     splitWildcard(text: string, cur?: number): IWord[];
23 |     /**
24 |      * 匹配单词，返回相关信息
25 |      *
26 |      * @param {string} text 文本
27 |      * @param {int} cur 开始位置
28 |      * @return {array}  返回格式   {w: '单词', c: 开始位置}
29 |      */
30 |     matchWord(text: string, cur?: number): IWord[];
31 | }
32 | export declare const init: ISubTokenizerCreate<WildcardTokenizer>;
33 | export declare const type = "tokenizer";
34 | export default WildcardTokenizer;
35 | 


--------------------------------------------------------------------------------
/lib/submod/WildcardTokenizer.js:
--------------------------------------------------------------------------------
  1 | 'use strict';
  2 | Object.defineProperty(exports, "__esModule", { value: true });
  3 | exports.type = exports.init = exports.WildcardTokenizer = void 0;
  4 | /**
  5 |  * 通配符识别模块
  6 |  *
  7 |  * @author 老雷<leizongmin@gmail.com>
  8 |  */
  9 | const mod_1 = require("../mod");
 10 | class WildcardTokenizer extends mod_1.SubSModuleTokenizer {
 11 |     constructor() {
 12 |         super(...arguments);
 13 |         this.name = 'WildcardTokenizer';
 14 |     }
 15 |     _cache() {
 16 |         super._cache();
 17 |         this._TABLE = this.segment.getDict('WILDCARD');
 18 |         this._TABLE2 = this.segment.getDict('WILDCARD2');
 19 |     }
 20 |     /**
 21 |      * 对未识别的单词进行分词
 22 |      *
 23 |      * @param {array} words 单词数组
 24 |      * @return {array}
 25 |      */
 26 |     split(words) {
 27 |         //return this._splitUnknow(words, this.splitForeign);
 28 |         return this._splitUnknow(words, this.splitWildcard);
 29 |     }
 30 |     createWildcardToken(word, lasttype, attr) {
 31 |         let nw = this.createToken(word, true, attr);
 32 |         return nw;
 33 |     }
 34 |     splitWildcard(text, cur) {
 35 |         var _a;
 36 |         //const POSTAG = this._POSTAG;
 37 |         const TABLE = this._TABLE;
 38 |         let ret = [];
 39 |         let self = this;
 40 |         // 分离出已识别的单词
 41 |         let wordinfo = self.matchWord(text);
 42 |         if (wordinfo.length) {
 43 |             let lastc = 0;
 44 |             for (let ui = 0, bw; bw = wordinfo[ui]; ui++) {
 45 |                 if (bw.c > lastc) {
 46 |                     ret.push({
 47 |                         w: text.substr(lastc, bw.c - lastc),
 48 |                     });
 49 |                 }
 50 |                 let nw = self.createWildcardToken({
 51 |                     w: bw.w,
 52 |                     p: (_a = TABLE[bw.w.toLowerCase()]) === null || _a === void 0 ? void 0 : _a.p,
 53 |                 });
 54 |                 ret.push(nw);
 55 |                 lastc = bw.c + bw.w.length;
 56 |             }
 57 |             let lastword = wordinfo[wordinfo.length - 1];
 58 |             if (lastword.c + lastword.w.length < text.length) {
 59 |                 ret.push({
 60 |                     w: text.substr(lastword.c + lastword.w.length),
 61 |                 });
 62 |             }
 63 |         }
 64 |         return ret.length ? ret : undefined;
 65 |     }
 66 |     /**
 67 |      * 匹配单词，返回相关信息
 68 |      *
 69 |      * @param {string} text 文本
 70 |      * @param {int} cur 开始位置
 71 |      * @return {array}  返回格式   {w: '单词', c: 开始位置}
 72 |      */
 73 |     matchWord(text, cur) {
 74 |         //const POSTAG = this._POSTAG;
 75 |         const TABLE = this._TABLE2;
 76 |         if (isNaN(cur))
 77 |             cur = 0;
 78 |         let ret = [];
 79 |         //let self = this;
 80 |         let s = false;
 81 |         // 匹配可能出现的单词，取长度最大的那个
 82 |         let lowertext = text.toLowerCase();
 83 |         while (cur < text.length) {
 84 |             let stopword = null;
 85 |             for (let i in TABLE) {
 86 |                 if (lowertext.substr(cur, i) in TABLE[i]) {
 87 |                     stopword = {
 88 |                         w: text.substr(cur, i),
 89 |                         c: cur,
 90 |                     };
 91 |                 }
 92 |             }
 93 |             if (stopword !== null) {
 94 |                 ret.push(stopword);
 95 |                 cur += stopword.w.length;
 96 |             }
 97 |             else {
 98 |                 cur++;
 99 |             }
100 |         }
101 |         return ret;
102 |     }
103 | }
104 | exports.WildcardTokenizer = WildcardTokenizer;
105 | exports.init = WildcardTokenizer.init.bind(WildcardTokenizer);
106 | exports.type = WildcardTokenizer.type;
107 | exports.default = WildcardTokenizer;
108 | //# sourceMappingURL=WildcardTokenizer.js.map


--------------------------------------------------------------------------------
/lib/submod/WildcardTokenizer.ts:
--------------------------------------------------------------------------------
  1 | 'use strict';
  2 | 
  3 | /**
  4 |  * 通配符识别模块
  5 |  *
  6 |  * @author 老雷<leizongmin@gmail.com>
  7 |  */
  8 | import { ISubTokenizerCreate, SubSModuleTokenizer } from '../mod';
  9 | import { IDICT, IDICT2, IWord } from '../Segment';
 10 | import { IWordDebugInfo } from '../util/index';
 11 | 
 12 | export class WildcardTokenizer extends SubSModuleTokenizer
 13 | {
 14 | 
 15 | 	override name = 'WildcardTokenizer';
 16 | 
 17 | 	protected override _TABLE: IDICT<IWord>;
 18 | 	protected _TABLE2: IDICT2<IWord>;
 19 | 
 20 | 	override _cache()
 21 | 	{
 22 | 		super._cache();
 23 | 		this._TABLE = this.segment.getDict('WILDCARD');
 24 | 		this._TABLE2 = this.segment.getDict('WILDCARD2');
 25 | 	}
 26 | 
 27 | 	/**
 28 | 	 * 对未识别的单词进行分词
 29 | 	 *
 30 | 	 * @param {array} words 单词数组
 31 | 	 * @return {array}
 32 | 	 */
 33 | 	split(words: IWord[]): IWord[]
 34 | 	{
 35 | 		//return this._splitUnknow(words, this.splitForeign);
 36 | 		return this._splitUnknow(words, this.splitWildcard);
 37 | 	}
 38 | 
 39 | 	createWildcardToken(word: IWord, lasttype?: number, attr?: IWordDebugInfo)
 40 | 	{
 41 | 		let nw = this.createToken<IWord>(word, true, attr);
 42 | 
 43 | 		return nw;
 44 | 	}
 45 | 
 46 | 	splitWildcard(text: string, cur?: number): IWord[]
 47 | 	{
 48 | 		//const POSTAG = this._POSTAG;
 49 | 		const TABLE = this._TABLE;
 50 | 
 51 | 		let ret: IWord[] = [];
 52 | 		let self = this;
 53 | 
 54 | 		// 分离出已识别的单词
 55 | 		let wordinfo = self.matchWord(text);
 56 | 		if (wordinfo.length)
 57 | 		{
 58 | 			let lastc = 0;
 59 | 			for (let ui = 0, bw; bw = wordinfo[ui]; ui++)
 60 | 			{
 61 | 				if (bw.c > lastc)
 62 | 				{
 63 | 					ret.push({
 64 | 						w: text.substr(lastc, bw.c - lastc),
 65 | 					});
 66 | 				}
 67 | 
 68 | 				let nw = self.createWildcardToken({
 69 | 					w: bw.w,
 70 | 					p: TABLE[bw.w.toLowerCase()]?.p,
 71 | 				});
 72 | 
 73 | 				ret.push(nw);
 74 | 
 75 | 				lastc = bw.c + bw.w.length;
 76 | 			}
 77 | 
 78 | 			let lastword = wordinfo[wordinfo.length - 1];
 79 | 			if (lastword.c + lastword.w.length < text.length)
 80 | 			{
 81 | 				ret.push({
 82 | 					w: text.substr(lastword.c + lastword.w.length),
 83 | 				});
 84 | 			}
 85 | 		}
 86 | 
 87 | 		return ret.length ? ret : undefined;
 88 | 	}
 89 | 
 90 | 	/**
 91 | 	 * 匹配单词，返回相关信息
 92 | 	 *
 93 | 	 * @param {string} text 文本
 94 | 	 * @param {int} cur 开始位置
 95 | 	 * @return {array}  返回格式   {w: '单词', c: 开始位置}
 96 | 	 */
 97 | 	matchWord(text: string, cur?: number)
 98 | 	{
 99 | 		//const POSTAG = this._POSTAG;
100 | 		const TABLE = this._TABLE2;
101 | 
102 | 		if (isNaN(cur)) cur = 0;
103 | 
104 | 		let ret: IWord[] = [];
105 | 		//let self = this;
106 | 
107 | 		let s = false;
108 | 
109 | 		// 匹配可能出现的单词，取长度最大的那个
110 | 		let lowertext = text.toLowerCase();
111 | 
112 | 		while (cur < text.length)
113 | 		{
114 | 			let stopword: IWord = null;
115 | 			for (let i in TABLE)
116 | 			{
117 | 				if (lowertext.substr(cur, i as any) in TABLE[i])
118 | 				{
119 | 					stopword = {
120 | 						w: text.substr(cur, i as any),
121 | 						c: cur,
122 | 					};
123 | 				}
124 | 			}
125 | 			if (stopword !== null)
126 | 			{
127 | 				ret.push(stopword);
128 | 				cur += stopword.w.length;
129 | 			}
130 | 			else
131 | 			{
132 | 				cur++;
133 | 			}
134 | 		}
135 | 		return ret;
136 | 	}
137 | 
138 | }
139 | 
140 | export const init = WildcardTokenizer.init.bind(WildcardTokenizer) as ISubTokenizerCreate<WildcardTokenizer>;
141 | 
142 | export const type = WildcardTokenizer.type;
143 | 
144 | export default WildcardTokenizer;
145 | 


--------------------------------------------------------------------------------
/lib/submod/ZhRadicalTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { ISubTokenizerCreate, SubSModuleTokenizer } from '../mod';
 2 | import { IDICT, IDICT2, IWord } from '../Segment';
 3 | /**
 4 |  * 此模組目前無任何用處與效果
 5 |  *
 6 |  * @todo 部首
 7 |  */
 8 | export declare class ZhRadicalTokenizer extends SubSModuleTokenizer {
 9 |     name: string;
10 |     protected _TABLE: IDICT<IWord>;
11 |     protected _TABLE2: IDICT2<IWord>;
12 |     protected _cache(...argv: any[]): void;
13 |     split(words: IWord[]): IWord[];
14 |     splitZhRadical(text: string, cur?: number): IWord[];
15 | }
16 | export declare const init: ISubTokenizerCreate<ZhRadicalTokenizer>;
17 | export declare const type = "tokenizer";
18 | export default ZhRadicalTokenizer;
19 | 


--------------------------------------------------------------------------------
/lib/submod/ZhRadicalTokenizer.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.type = exports.init = exports.ZhRadicalTokenizer = void 0;
 4 | const mod_1 = require("../mod");
 5 | /**
 6 |  * 此模組目前無任何用處與效果
 7 |  *
 8 |  * @todo 部首
 9 |  */
10 | class ZhRadicalTokenizer extends mod_1.SubSModuleTokenizer {
11 |     constructor() {
12 |         super(...arguments);
13 |         this.name = 'ZhRadicalTokenizer';
14 |     }
15 |     _cache(...argv) {
16 |         super._cache(...argv);
17 |     }
18 |     split(words) {
19 |         return this._splitUnset(words, this.splitZhRadical);
20 |     }
21 |     splitZhRadical(text, cur) {
22 |         let ret = [];
23 |         let self = this;
24 |         let _r = /[\u4136\u4137]/u;
25 |         if (!_r.test(text)) {
26 |             return null;
27 |         }
28 |         text
29 |             .split(/([\u4136\u4137]+)/u)
30 |             .forEach(function (w, i) {
31 |             if (w !== '') {
32 |                 if (_r.test(w)) {
33 |                     ret.push(self.debugToken({
34 |                         w,
35 |                     }, {
36 |                         [self.name]: true,
37 |                     }, true));
38 |                 }
39 |                 else {
40 |                     ret.push({
41 |                         w,
42 |                     });
43 |                 }
44 |             }
45 |         });
46 |         return ret.length ? ret : null;
47 |     }
48 | }
49 | exports.ZhRadicalTokenizer = ZhRadicalTokenizer;
50 | exports.init = ZhRadicalTokenizer.init.bind(ZhRadicalTokenizer);
51 | exports.type = ZhRadicalTokenizer.type;
52 | exports.default = ZhRadicalTokenizer;
53 | //# sourceMappingURL=ZhRadicalTokenizer.js.map


--------------------------------------------------------------------------------
/lib/submod/ZhRadicalTokenizer.ts:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | import { ISubTokenizerCreate, SubSModuleTokenizer } from '../mod';
 4 | import { IDICT, IDICT2, IWord } from '../Segment';
 5 | 
 6 | /**
 7 |  * 此模組目前無任何用處與效果
 8 |  *
 9 |  * @todo 部首
10 |  */
11 | export class ZhRadicalTokenizer extends SubSModuleTokenizer
12 | {
13 | 
14 | 	override name = 'ZhRadicalTokenizer';
15 | 
16 | 	protected override _TABLE: IDICT<IWord>;
17 | 	protected _TABLE2: IDICT2<IWord>;
18 | 
19 | 	protected override _cache(...argv)
20 | 	{
21 | 		super._cache(...argv);
22 | 	}
23 | 
24 | 	split(words: IWord[]): IWord[]
25 | 	{
26 | 		return this._splitUnset(words, this.splitZhRadical);
27 | 	}
28 | 
29 | 	splitZhRadical(text: string, cur?: number): IWord[]
30 | 	{
31 | 		let ret: IWord[] = [];
32 | 		let self = this;
33 | 
34 | 		let _r = /[\u4136\u4137]/u;
35 | 
36 | 		if (!_r.test(text))
37 | 		{
38 | 			return null;
39 | 		}
40 | 
41 | 		text
42 | 			.split(/([\u4136\u4137]+)/u)
43 | 			.forEach(function (w, i)
44 | 			{
45 | 				if (w !== '')
46 | 				{
47 | 					if (_r.test(w))
48 | 					{
49 | 						ret.push(self.debugToken({
50 | 							w,
51 | 						}, {
52 | 							[self.name]: true,
53 | 						}, true));
54 | 					}
55 | 					else
56 | 					{
57 | 						ret.push({
58 | 							w,
59 | 						});
60 | 					}
61 | 				}
62 | 			})
63 | 		;
64 | 
65 | 		return ret.length ? ret : null;
66 | 	}
67 | 
68 | }
69 | 
70 | export const init = ZhRadicalTokenizer.init.bind(ZhRadicalTokenizer) as ISubTokenizerCreate<ZhRadicalTokenizer>;
71 | 
72 | export const type = ZhRadicalTokenizer.type;
73 | 
74 | export default ZhRadicalTokenizer;
75 | 


--------------------------------------------------------------------------------
/lib/submod/ZhtSynonymOptimizer.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/16/016.
 3 |  */
 4 | import { SubSModuleOptimizer } from '../mod';
 5 | import { IDICT, IDICT_SYNONYM, IWord } from '../Segment';
 6 | import { IWordDebug } from '../util';
 7 | /**
 8 |  * 以詞意來自動轉換 而不需要手動加入字典於 synonym.txt
 9 |  * 適用於比較容易需要人工處理的轉換
10 |  *
11 |  * 自動處理 `里|后`
12 |  *
13 |  * 建議在字典內追加人名地名等等名字 來增加準確性
14 |  * 防止轉換錯誤
15 |  *
16 |  * @todo 發于余干松冲准呆只范舍涂
17 |  */
18 | export declare class ZhtSynonymOptimizer extends SubSModuleOptimizer {
19 |     name: string;
20 |     protected _SYNONYM?: IDICT_SYNONYM;
21 |     protected _TABLE: IDICT<IWord>;
22 |     _cache(): void;
23 |     isSynonymBlacklist(w: string): boolean;
24 |     protected _getSynonym(w: string, nw: string): string;
25 |     doOptimize<T extends IWordDebug>(words: T[]): T[];
26 | }
27 | export declare const init: typeof ZhtSynonymOptimizer.init;
28 | export declare const type = "optimizer";
29 | export default ZhtSynonymOptimizer;
30 | 


--------------------------------------------------------------------------------
/lib/submod/ZhuyinTokenizer.d.ts:
--------------------------------------------------------------------------------
 1 | import { ISubTokenizerCreate, SubSModuleTokenizer } from '../mod';
 2 | import { IDICT, IDICT2, IWord } from '../Segment';
 3 | /**
 4 |  * 注音
 5 |  */
 6 | export declare class ZhuyinTokenizer extends SubSModuleTokenizer {
 7 |     name: string;
 8 |     protected _TABLE: IDICT<IWord>;
 9 |     protected _TABLE2: IDICT2<IWord>;
10 |     protected _cache(...argv: any[]): void;
11 |     split(words: IWord[]): IWord[];
12 |     splitZhuyin(text: string, cur?: number): IWord[];
13 | }
14 | export declare const init: ISubTokenizerCreate<ZhuyinTokenizer>;
15 | export declare const type = "tokenizer";
16 | export default ZhuyinTokenizer;
17 | 


--------------------------------------------------------------------------------
/lib/submod/ZhuyinTokenizer.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.type = exports.init = exports.ZhuyinTokenizer = void 0;
 4 | const mod_1 = require("../mod");
 5 | /**
 6 |  * 注音
 7 |  */
 8 | class ZhuyinTokenizer extends mod_1.SubSModuleTokenizer {
 9 |     constructor() {
10 |         super(...arguments);
11 |         this.name = 'ZhuyinTokenizer';
12 |     }
13 |     _cache(...argv) {
14 |         super._cache(...argv);
15 |     }
16 |     split(words) {
17 |         return this._splitUnset(words, this.splitZhuyin);
18 |     }
19 |     splitZhuyin(text, cur) {
20 |         let ret = [];
21 |         let self = this;
22 |         let _r = /[\u31A0-\u31BA\u3105-\u312E]/u;
23 |         if (!_r.test(text)) {
24 |             return null;
25 |         }
26 |         text
27 |             .split(/([\u31A0-\u31BA\u3105-\u312E]+)/u)
28 |             .forEach(function (w, i) {
29 |             if (w !== '') {
30 |                 if (_r.test(w)) {
31 |                     ret.push(self.debugToken({
32 |                         w,
33 |                     }, {
34 |                         [self.name]: true,
35 |                     }, true));
36 |                 }
37 |                 else {
38 |                     ret.push({
39 |                         w,
40 |                     });
41 |                 }
42 |             }
43 |         });
44 |         return ret.length ? ret : null;
45 |     }
46 | }
47 | exports.ZhuyinTokenizer = ZhuyinTokenizer;
48 | exports.init = ZhuyinTokenizer.init.bind(ZhuyinTokenizer);
49 | exports.type = ZhuyinTokenizer.type;
50 | exports.default = ZhuyinTokenizer;
51 | //# sourceMappingURL=ZhuyinTokenizer.js.map


--------------------------------------------------------------------------------
/lib/submod/ZhuyinTokenizer.ts:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | import { ISubTokenizerCreate, SubSModuleTokenizer } from '../mod';
 4 | import { IDICT, IDICT2, IWord } from '../Segment';
 5 | 
 6 | /**
 7 |  * 注音
 8 |  */
 9 | export class ZhuyinTokenizer extends SubSModuleTokenizer
10 | {
11 | 
12 | 	override name = 'ZhuyinTokenizer';
13 | 
14 | 	protected override _TABLE: IDICT<IWord>;
15 | 	protected _TABLE2: IDICT2<IWord>;
16 | 
17 | 	protected override _cache(...argv)
18 | 	{
19 | 		super._cache(...argv);
20 | 	}
21 | 
22 | 	split(words: IWord[]): IWord[]
23 | 	{
24 | 		return this._splitUnset(words, this.splitZhuyin);
25 | 	}
26 | 
27 | 	splitZhuyin(text: string, cur?: number): IWord[]
28 | 	{
29 | 		let ret: IWord[] = [];
30 | 		let self = this;
31 | 
32 | 		let _r = /[\u31A0-\u31BA\u3105-\u312E]/u;
33 | 
34 | 		if (!_r.test(text))
35 | 		{
36 | 			return null;
37 | 		}
38 | 
39 | 		text
40 | 			.split(/([\u31A0-\u31BA\u3105-\u312E]+)/u)
41 | 			.forEach(function (w, i)
42 | 			{
43 | 				if (w !== '')
44 | 				{
45 | 					if (_r.test(w))
46 | 					{
47 | 						ret.push(self.debugToken({
48 | 							w,
49 | 						}, {
50 | 							[self.name]: true,
51 | 						}, true));
52 | 					}
53 | 
54 | 					else
55 | 					{
56 | 						ret.push({
57 | 							w,
58 | 						});
59 | 					}
60 | 				}
61 | 			})
62 | 		;
63 | 
64 | 		return ret.length ? ret : null;
65 | 	}
66 | 
67 | }
68 | 
69 | export const init = ZhuyinTokenizer.init.bind(ZhuyinTokenizer) as ISubTokenizerCreate<ZhuyinTokenizer>;
70 | 
71 | export const type = ZhuyinTokenizer.type;
72 | 
73 | export default ZhuyinTokenizer;
74 | 


--------------------------------------------------------------------------------
/lib/util/debug.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/19/019.
 3 |  */
 4 | import { IWord } from '@novel-segment/types';
 5 | export declare const SYMBOL_DEBUG_KEY = "_debug";
 6 | export type IWordDebugInfo<T extends IWordDebug = IWordDebug> = {
 7 |     ZhtSynonymOptimizer?: boolean;
 8 |     convertSynonym?: boolean;
 9 |     autoCreate?: boolean;
10 |     _source?: T & IWordDebug;
11 |     index?: number;
12 |     ps_en?: string;
13 |     [key: string]: any;
14 |     [key: number]: any;
15 | };
16 | export type IWordDebug = IWord & {
17 |     m?: Array<IWordDebug | string>;
18 |     ps?: string;
19 |     pp?: string;
20 |     ow?: string;
21 |     op?: number;
22 |     ops?: string;
23 |     opp?: string;
24 |     os?: boolean;
25 |     [SYMBOL_DEBUG_KEY]?: IWordDebugInfo<IWordDebug>;
26 | };
27 | export declare function clearTokemDebug(data: IWordDebugInfo, returnClone?: false): data is IWord;
28 | export declare function clearTokemDebug(data: IWordDebugInfo, returnClone?: true): IWord;
29 | export declare function debugToken<T extends IWordDebug, U extends IWordDebugInfo>(data: T, attr: U & IWordDebugInfo, returnToken: true, ...argv: any[]): T;
30 | export declare function debugToken<T extends IWordDebug, U extends IWordDebugInfo>(data: T, attr?: U & IWordDebugInfo, returnToken?: boolean, ...argv: any[]): U & IWordDebugInfo;
31 | export declare function debug_token<T extends IWordDebug>(ks: Array<T>, returnSource?: boolean): Array<T | IWordDebug>;
32 | export declare function token_add_info<T extends IWordDebug>(v: T): T;
33 | export declare function toHex(p: number): string;
34 | 


--------------------------------------------------------------------------------
/lib/util/debug.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | /**
  3 |  * Created by user on 2018/4/19/019.
  4 |  */
  5 | Object.defineProperty(exports, "__esModule", { value: true });
  6 | exports.SYMBOL_DEBUG_KEY = void 0;
  7 | exports.clearTokemDebug = clearTokemDebug;
  8 | exports.debugToken = debugToken;
  9 | exports.debug_token = debug_token;
 10 | exports.token_add_info = token_add_info;
 11 | exports.toHex = toHex;
 12 | const tslib_1 = require("tslib");
 13 | const sort_object_keys2_1 = tslib_1.__importDefault(require("sort-object-keys2"));
 14 | const i18n_1 = require("@novel-segment/postag/lib/i18n");
 15 | //export const SYMBOL_DEBUG_KEY = Symbol.for('_debug');
 16 | exports.SYMBOL_DEBUG_KEY = '_debug';
 17 | function clearTokemDebug(data, returnClone) {
 18 |     if (returnClone) {
 19 |         return {
 20 |             w: data.w,
 21 |             p: data.p,
 22 |             f: data.f,
 23 |         };
 24 |     }
 25 |     for (let k in data) {
 26 |         if (k !== 'w' && k !== 'p' && k !== 'f') {
 27 |             delete data[k];
 28 |         }
 29 |     }
 30 |     delete data[exports.SYMBOL_DEBUG_KEY];
 31 |     return data;
 32 | }
 33 | function debugToken(data, attr, returnToken, ...argv) {
 34 |     if (attr) {
 35 |         data[exports.SYMBOL_DEBUG_KEY] = Object.assign(data[exports.SYMBOL_DEBUG_KEY] || {}, attr);
 36 |     }
 37 |     if (returnToken) {
 38 |         return data;
 39 |     }
 40 |     return (data[exports.SYMBOL_DEBUG_KEY] || {});
 41 | }
 42 | function debug_token(ks, returnSource) {
 43 |     let ks2 = [];
 44 |     // @ts-ignore
 45 |     ks.map(function (v, index) {
 46 |         //v.index = index;
 47 |         // @ts-ignore
 48 |         debugToken(v, {
 49 |             index,
 50 |         });
 51 |         if (v.p) {
 52 |             // @ts-ignore
 53 |             token_add_info(v);
 54 |         }
 55 |         else if (v.m) {
 56 |             // @ts-ignore
 57 |             v.m.map(token_add_info);
 58 |         }
 59 |         else {
 60 |             // @ts-ignore
 61 |             ks2.push(v);
 62 |         }
 63 |     });
 64 |     return returnSource ? ks : ks2;
 65 | }
 66 | function token_add_info(v) {
 67 |     if (v.p) {
 68 |         v.ps = (0, i18n_1.zhName)(v.p);
 69 |         //v.ps_en = POSTAG.enName(v.p);
 70 |         let debug = debugToken(v, {
 71 |             ps_en: (0, i18n_1.enName)(v.p),
 72 |         });
 73 |         v.pp = toHex(v.p);
 74 |         if (v.op) {
 75 |             v.ops = (0, i18n_1.zhName)(v.op);
 76 |             v.opp = toHex(v.op);
 77 |         }
 78 |         if (v.m) {
 79 |             v.m.map(function (v) {
 80 |                 if (typeof v === 'string') {
 81 |                     return v;
 82 |                 }
 83 |                 return token_add_info(v);
 84 |             });
 85 |         }
 86 |         if (debug._source) {
 87 |             token_add_info(debug._source);
 88 |         }
 89 |     }
 90 |     if (v) {
 91 |         (0, sort_object_keys2_1.default)(v, {
 92 |             keys: [
 93 |                 'w',
 94 |                 'p',
 95 |                 'f',
 96 |                 'ps',
 97 |                 'pp',
 98 |                 'ow',
 99 |                 'op',
100 |                 'ops',
101 |                 'opp',
102 |                 'os',
103 |             ],
104 |             useSource: true,
105 |         });
106 |     }
107 |     return v;
108 | }
109 | function toHex(p) {
110 |     return '0x' + p
111 |         .toString(16)
112 |         .padStart(4, '0')
113 |         .toUpperCase();
114 | }
115 | //# sourceMappingURL=debug.js.map


--------------------------------------------------------------------------------
/lib/util/index.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/17/017.
 3 |  */
 4 | import { debug_token, IWordDebug, IWordDebugInfo, toHex, token_add_info } from './debug';
 5 | import { InspectOptions } from 'util';
 6 | export { IWordDebug, IWordDebugInfo, debug_token, toHex, token_add_info };
 7 | export declare function debug_inspect(argv: any[], options?: InspectOptions): string[];
 8 | export declare function debug(...argv: any[]): void;
 9 | export declare function debug_options(argv: any[], options?: InspectOptions): void;
10 | export declare function hexAndAny(n: number, p?: number, ...argv: number[]): number;
11 | export declare function hexAnd(n: number, p?: number, ...argv: number[]): number;
12 | export declare function hexOr(n: number, p?: number, ...argv: number[]): number;
13 | 


--------------------------------------------------------------------------------
/lib/util/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/4/17/017.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.token_add_info = exports.toHex = exports.debug_token = void 0;
 7 | exports.debug_inspect = debug_inspect;
 8 | exports.debug = debug;
 9 | exports.debug_options = debug_options;
10 | exports.hexAndAny = hexAndAny;
11 | exports.hexAnd = hexAnd;
12 | exports.hexOr = hexOr;
13 | const debug_1 = require("./debug");
14 | Object.defineProperty(exports, "debug_token", { enumerable: true, get: function () { return debug_1.debug_token; } });
15 | Object.defineProperty(exports, "toHex", { enumerable: true, get: function () { return debug_1.toHex; } });
16 | Object.defineProperty(exports, "token_add_info", { enumerable: true, get: function () { return debug_1.token_add_info; } });
17 | const util_1 = require("util");
18 | function debug_inspect(argv, options = {}) {
19 |     options = Object.assign({
20 |         colors: true,
21 |     }, options);
22 |     return argv.map(function (b) {
23 |         return (0, util_1.inspect)(b, options);
24 |     }, []);
25 | }
26 | function debug(...argv) {
27 |     return console.log(...debug_inspect(argv));
28 | }
29 | function debug_options(argv, options) {
30 |     return console.log(...debug_inspect(argv, options));
31 | }
32 | function hexAndAny(n, ...argv) {
33 |     if (!argv.length) {
34 |         return n;
35 |     }
36 |     for (let v of argv) {
37 |         let r = (n & v);
38 |         if (r) {
39 |             return r;
40 |         }
41 |     }
42 |     return 0;
43 | }
44 | function hexAnd(n, ...argv) {
45 |     if (argv.length) {
46 |         let r = 0;
47 |         for (let v of argv) {
48 |             let p = n & v;
49 |             if (!p) {
50 |                 return 0;
51 |             }
52 |             r |= v;
53 |         }
54 |         return r;
55 |     }
56 |     return n;
57 | }
58 | function hexOr(n, ...argv) {
59 |     for (let v of argv) {
60 |         n |= v;
61 |     }
62 |     return n;
63 | }
64 | //let p = hexAnd(0x6000 | 0x8000, 0x2000, 0x4000)
65 | //debug(p, toHex(p));
66 | //# sourceMappingURL=index.js.map


--------------------------------------------------------------------------------
/lib/util/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/17/017.
 3 |  */
 4 | 
 5 | import { debug_token, IWordDebug, IWordDebugInfo, toHex, token_add_info } from './debug';
 6 | import { inspect, InspectOptions } from 'util';
 7 | 
 8 | export { IWordDebug, IWordDebugInfo, debug_token, toHex, token_add_info }
 9 | 
10 | export function debug_inspect(argv: any[], options: InspectOptions = {})
11 | {
12 | 	options = Object.assign({
13 | 		colors: true,
14 | 	}, options);
15 | 
16 | 	return argv.map(function (b)
17 | 	{
18 | 		return inspect(b, options);
19 | 	}, []);
20 | }
21 | 
22 | export function debug(...argv)
23 | {
24 | 	return console.log(...debug_inspect(argv));
25 | }
26 | 
27 | export function debug_options(argv: any[], options?: InspectOptions)
28 | {
29 | 	return console.log(...debug_inspect(argv, options));
30 | }
31 | 
32 | export function hexAndAny(n: number, p?: number, ...argv: number[]): number
33 | export function hexAndAny(n: number, ...argv: number[])
34 | {
35 | 	if (!argv.length)
36 | 	{
37 | 		return n;
38 | 	}
39 | 
40 | 	for (let v of argv)
41 | 	{
42 | 		let r = (n & v);
43 | 
44 | 		if (r)
45 | 		{
46 | 			return r;
47 | 		}
48 | 	}
49 | 
50 | 	return 0;
51 | }
52 | 
53 | export function hexAnd(n: number, p?: number, ...argv: number[]): number
54 | export function hexAnd(n: number, ...argv: number[])
55 | {
56 | 	if (argv.length)
57 | 	{
58 | 		let r = 0;
59 | 
60 | 		for (let v of argv)
61 | 		{
62 | 			let p = n & v;
63 | 
64 | 			if (!p)
65 | 			{
66 | 				return 0;
67 | 			}
68 | 
69 | 			r |= v;
70 | 		}
71 | 
72 | 		return r;
73 | 	}
74 | 
75 | 	return n;
76 | }
77 | 
78 | export function hexOr(n: number, p?: number, ...argv: number[]): number
79 | export function hexOr(n: number, ...argv: number[])
80 | {
81 | 	for (let v of argv)
82 | 	{
83 | 		n |= v;
84 | 	}
85 | 
86 | 	return n;
87 | }
88 | 
89 | //let p = hexAnd(0x6000 | 0x8000, 0x2000, 0x4000)
90 | //debug(p, toHex(p));
91 | 


--------------------------------------------------------------------------------
/lib/util/isUnset.d.ts:
--------------------------------------------------------------------------------
1 | export declare function isUnset<T>(val: T): val is Extract<T, null | undefined>;
2 | export declare function isSet<T>(val: T): val is Exclude<T, null | undefined>;
3 | export default isUnset;
4 | 


--------------------------------------------------------------------------------
/lib/util/isUnset.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.isUnset = isUnset;
 4 | exports.isSet = isSet;
 5 | function isUnset(val) {
 6 |     return typeof val === 'undefined' || val === null;
 7 | }
 8 | function isSet(val) {
 9 |     return typeof val !== 'undefined' && val !== null;
10 | }
11 | exports.default = isUnset;
12 | //# sourceMappingURL=isUnset.js.map


--------------------------------------------------------------------------------
/lib/util/isUnset.ts:
--------------------------------------------------------------------------------
 1 | 
 2 | export function isUnset<T>(val: T): val is Extract<T, null | undefined>
 3 | {
 4 | 	return typeof val === 'undefined' || val === null
 5 | }
 6 | 
 7 | export function isSet<T>(val: T): val is Exclude<T, null | undefined>
 8 | {
 9 | 	return typeof val !== 'undefined' && val !== null
10 | }
11 | 
12 | export default isUnset
13 | 


--------------------------------------------------------------------------------
/project.config.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2017/8/13/013.
 3 |  */
 4 | export declare const project_root: string;
 5 | export declare const dict_root: string;
 6 | export declare const temp_root: string;
 7 | declare const _default: {
 8 |     project_root: string;
 9 |     dict_root: string;
10 |     temp_root: string;
11 | };
12 | export default _default;
13 | 


--------------------------------------------------------------------------------
/project.config.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2017/8/13/013.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.temp_root = exports.dict_root = exports.project_root = void 0;
 7 | const path_1 = require("path");
 8 | exports.project_root = (0, path_1.join)(__dirname);
 9 | exports.dict_root = (0, path_1.join)(exports.project_root, 'dicts');
10 | //export const dist_root = path.join(project_root, 'dist');
11 | exports.temp_root = (0, path_1.join)(exports.project_root, 'test/temp');
12 | exports.default = {
13 |     project_root: exports.project_root,
14 |     dict_root: exports.dict_root,
15 |     temp_root: exports.temp_root,
16 | };
17 | //# sourceMappingURL=project.config.js.map


--------------------------------------------------------------------------------
/project.config.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2017/8/13/013.
 3 |  */
 4 | 
 5 | import { join } from 'path';
 6 | 
 7 | export const project_root = join(__dirname);
 8 | 
 9 | export const dict_root = join(project_root, 'dicts');
10 | 
11 | //export const dist_root = path.join(project_root, 'dist');
12 | export const temp_root = join(project_root, 'test/temp');
13 | 
14 | export default {
15 | 	project_root,
16 | 	dict_root,
17 | 	temp_root,
18 | };
19 | 


--------------------------------------------------------------------------------
/repl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | const fs = require('fs');
 4 | const path = require('path');
 5 | const repl = require('repl');
 6 | 
 7 | const r = repl.start('> ');
 8 | const c = r.context;
 9 | 
10 | c._load = function ()
11 | {
12 | 	c.Segment = require('./');
13 | 	const segment = new c.Segment();
14 | 	segment.useDefault();
15 | 	c.segment = segment;
16 | 	c.s = function ()
17 | 	{
18 | 		return c.segment.doSegment.apply(c.segment, arguments);
19 | 	};
20 | 	c.ss = function ()
21 | 	{
22 | 		const list = c.s.apply(null, arguments);
23 | 		return list.map(function (v) { return v.w; }).join('/');
24 | 	};
25 | };
26 | 
27 | c.reload = function ()
28 | {
29 | 	const t = Date.now();
30 | 	const dir = path.resolve(__dirname) + path.sep;
31 | 	for (let i in require.cache)
32 | 	{
33 | 		if (i.indexOf(dir) === 0)
34 | 		{
35 | 			delete require.cache[i];
36 | 			// console.log('delete %s', i);
37 | 		}
38 | 	}
39 | 	c._load();
40 | 	console.log('OK. (spent %sms)', Date.now() - t);
41 | }
42 | 
43 | c._load();
44 | 


--------------------------------------------------------------------------------
/script/publish-after.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * Created by user on 2018/7/24/024.
3 |  */
4 | export {};
5 | 


--------------------------------------------------------------------------------
/script/publish-after.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/7/24/024.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | const tslib_1 = require("tslib");
 7 | const path = tslib_1.__importStar(require("path"));
 8 | const project_config_1 = tslib_1.__importDefault(require("../project.config"));
 9 | // @ts-ignore
10 | const PackageJson = tslib_1.__importStar(require("../package.json"));
11 | /// <reference types="cross-spawn" />
12 | const index = require("../index");
13 | (async () => {
14 |     let crossSpawn;
15 |     // @ts-ignore
16 |     crossSpawn = await Promise.resolve().then(() => tslib_1.__importStar(require('cross-spawn-extra')));
17 |     let gitroot;
18 |     // @ts-ignore
19 |     gitroot = await Promise.resolve().then(() => tslib_1.__importStar(require('git-root2'))).then(m => m.sync);
20 |     // @ts-ignore
21 |     gitroot = gitroot(__dirname);
22 |     if (!gitroot || path.relative(gitroot, project_config_1.default.project_root)) {
23 |         console.warn(`no git exists`);
24 |         return;
25 |     }
26 |     let options = {
27 |         cwd: project_config_1.default.project_root,
28 |         stdio: 'inherit',
29 |     };
30 |     let msg = `npm publish ${PackageJson.version}`;
31 |     msg += `\n\nnovel-segment@${index.versions['novel-segment']}, segment-dict@${index.versions['segment-dict']}, cjk-conv@${index.versions['cjk-conv']}, regexp-cjk@${index.versions['regexp-cjk']}`;
32 |     await crossSpawn('git', [
33 |         'commit',
34 |         '-a',
35 |         '-m',
36 |         msg,
37 |     ], options);
38 |     await new Promise(function (done) {
39 |         setTimeout(done, 500);
40 |     });
41 |     await crossSpawn('git', [
42 |         'tag',
43 |         '-a',
44 |         PackageJson.version,
45 |         '-m',
46 |         msg,
47 |     ], options);
48 | })().catch(e => console.error(e));
49 | //# sourceMappingURL=publish-after.js.map


--------------------------------------------------------------------------------
/script/publish-after.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/7/24/024.
 3 |  */
 4 | 
 5 | import * as path from 'path';
 6 | import ProjectConfig from '../project.config';
 7 | // @ts-ignore
 8 | import * as PackageJson from '../package.json';
 9 | import CrossSpawn = require('cross-spawn-extra');
10 | /// <reference types="cross-spawn" />
11 | import index = require('../index');
12 | import { sync } from 'git-root2/core';
13 | 
14 | (async () =>
15 | {
16 | 	let crossSpawn: typeof CrossSpawn;
17 | 	// @ts-ignore
18 | 	crossSpawn = await import('cross-spawn-extra');
19 | 
20 | 	let gitroot: string;
21 | 
22 | 	// @ts-ignore
23 | 	gitroot = await import('git-root2').then(m => m.sync);
24 | 	// @ts-ignore
25 | 	gitroot = gitroot(__dirname);
26 | 
27 | 	if (!gitroot || path.relative(gitroot, ProjectConfig.project_root))
28 | 	{
29 | 		console.warn(`no git exists`);
30 | 		return;
31 | 	}
32 | 
33 | 	let options = {
34 | 		cwd: ProjectConfig.project_root,
35 | 		stdio: 'inherit',
36 | 	};
37 | 
38 | 	let msg = `npm publish ${PackageJson.version}`;
39 | 
40 | 	msg += `\n\nnovel-segment@${index.versions['novel-segment']}, segment-dict@${index.versions['segment-dict']}, cjk-conv@${index.versions['cjk-conv']}, regexp-cjk@${index.versions['regexp-cjk']}`;
41 | 
42 | 	await crossSpawn('git', [
43 | 		'commit',
44 | 		'-a',
45 | 		'-m',
46 | 		msg,
47 | 	], options);
48 | 
49 | 	await new Promise(function (done)
50 | 	{
51 | 		setTimeout(done, 500);
52 | 	});
53 | 
54 | 	await crossSpawn('git', [
55 | 		'tag',
56 | 		'-a',
57 | 		PackageJson.version,
58 | 		'-m',
59 | 		msg,
60 | 	], options);
61 | 
62 | })().catch(e => console.error(e));
63 | 


--------------------------------------------------------------------------------
/script/publish-after2.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * Created by user on 2018/7/24/024.
3 |  */
4 | export {};
5 | 


--------------------------------------------------------------------------------
/script/publish-after2.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2018/7/24/024.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | const tslib_1 = require("tslib");
 7 | const path = require("path");
 8 | /// <reference types="cross-spawn" />
 9 | const index = require("../index");
10 | const project_config_1 = tslib_1.__importDefault(require("../project.config"));
11 | const path_1 = require("path");
12 | (async () => {
13 |     let crossSpawn;
14 |     // @ts-ignore
15 |     crossSpawn = await Promise.resolve().then(() => tslib_1.__importStar(require('cross-spawn-extra')));
16 |     let gitroot;
17 |     // @ts-ignore
18 |     gitroot = await Promise.resolve().then(() => tslib_1.__importStar(require('git-root2'))).then(m => m.sync);
19 |     // @ts-ignore
20 |     gitroot = gitroot(__dirname);
21 |     if (!gitroot || path.relative(gitroot, project_config_1.default.project_root)) {
22 |         let __root_ws = await Promise.resolve().then(() => tslib_1.__importStar(require('../../../__root_ws'))).then(m => m.__root_ws)
23 |             .catch(e => null);
24 |         if (!__root_ws || path.relative(gitroot, __root_ws)) {
25 |             console.warn(`no git exists`);
26 |             console.warn(`__root_ws`, __root_ws);
27 |             console.warn(`gitroot`, gitroot);
28 |             console.warn(`path.relative`, path.relative(gitroot, project_config_1.default.project_root));
29 |             return;
30 |         }
31 |     }
32 |     let cwd = (0, path_1.join)(project_config_1.default.project_root, 'test');
33 |     let options = {
34 |         cwd,
35 |         stdio: 'inherit',
36 |     };
37 |     let msg = `novel-segment@${index.versions['novel-segment']}, segment-dict@${index.versions['segment-dict']}, cjk-conv@${index.versions['cjk-conv']}, regexp-cjk@${index.versions['regexp-cjk']}`;
38 |     await crossSpawn('git', [
39 |         'commit',
40 |         //'-a',
41 |         '-m',
42 |         msg,
43 |         '.',
44 |     ], options);
45 | })().catch(e => console.error(e));
46 | //# sourceMappingURL=publish-after2.js.map


--------------------------------------------------------------------------------
/script/publish-after2.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/7/24/024.
 3 |  */
 4 | 
 5 | import path = require('path');
 6 | // @ts-ignore
 7 | import PackageJson = require('../package.json');
 8 | import CrossSpawn = require('cross-spawn-extra');
 9 | /// <reference types="cross-spawn" />
10 | import index = require('../index');
11 | import ProjectConfig from '../project.config';
12 | import { join } from "path";
13 | 
14 | (async () =>
15 | {
16 | 	let crossSpawn: typeof CrossSpawn;
17 | 	// @ts-ignore
18 | 	crossSpawn = await import('cross-spawn-extra');
19 | 
20 | 	let gitroot: string;
21 | 
22 | 	// @ts-ignore
23 | 	gitroot = await import('git-root2').then(m => m.sync);
24 | 	// @ts-ignore
25 | 	gitroot = gitroot(__dirname);
26 | 
27 | 	if (!gitroot || path.relative(gitroot, ProjectConfig.project_root))
28 | 	{
29 | 		let __root_ws = await import('../../../__root_ws')
30 | 			.then(m => m.__root_ws)
31 | 			.catch(e => null)
32 | 		;
33 | 
34 | 		if (!__root_ws || path.relative(gitroot, __root_ws))
35 | 		{
36 | 			console.warn(`no git exists`);
37 | 			console.warn(`__root_ws`, __root_ws);
38 | 			console.warn(`gitroot`, gitroot);
39 | 			console.warn(`path.relative`, path.relative(gitroot, ProjectConfig.project_root));
40 | 			return;
41 | 		}
42 | 	}
43 | 
44 | 	let cwd = join(ProjectConfig.project_root, 'test');
45 | 
46 | 	let options = {
47 | 		cwd,
48 | 		stdio: 'inherit',
49 | 	};
50 | 
51 | 	let msg = `novel-segment@${index.versions['novel-segment']}, segment-dict@${index.versions['segment-dict']}, cjk-conv@${index.versions['cjk-conv']}, regexp-cjk@${index.versions['regexp-cjk']}`;
52 | 
53 | 	await crossSpawn('git', [
54 | 		'commit',
55 | 		//'-a',
56 | 		'-m',
57 | 		msg,
58 | 		'.',
59 | 	], options);
60 | 
61 | })().catch(e => console.error(e));
62 | 


--------------------------------------------------------------------------------
/script/sort-stringify-cache.d.ts:
--------------------------------------------------------------------------------
1 | export {};
2 | 


--------------------------------------------------------------------------------
/test/__snapshots__/bug.spec.ts.snap:
--------------------------------------------------------------------------------
1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
2 | 
3 | exports[`bug check word is constructor 1`] = `"inspection.dead.code.problem.synopsis28.constructor=构造函数有一个用法,但它是不可到达的从入口点."`;
4 | 


--------------------------------------------------------------------------------
/test/_local-dev.ts:
--------------------------------------------------------------------------------
 1 | // @ts-ignore
 2 | import _chai = require('chai');
 3 | // @ts-ignore
 4 | // @ts-ignore
 5 | //import { expect, assert } from 'chai';
 6 | 
 7 | import { IChaiInstalled } from 'chai-asserttype-extra'
 8 | //import ChaiPlugin = require('chai-asserttype-extra');
 9 | import ChaiStatic = Chai.ChaiStatic;
10 | 
11 | let chai: IChaiInstalled<ChaiStatic> | ChaiStatic;
12 | 
13 | if (requireResolve('chai-asserttype-extra'))
14 | {
15 | 	const ChaiPlugin = require('chai-asserttype-extra').ChaiPlugin;
16 | 
17 | 	chai = ChaiPlugin.install(_chai) as IChaiInstalled<ChaiStatic>;
18 | }
19 | else
20 | {
21 | 	chai = _chai;
22 | }
23 | 
24 | if (requireResolve('chai-string'))
25 | {
26 | 	chai.use(require('chai-string'));
27 | }
28 | 
29 | const { expect, assert } = chai;
30 | 
31 | export { chai, expect, assert }
32 | 
33 | // @ts-ignore
34 | import path = require('path');
35 | // @ts-ignore
36 | import util = require('util');
37 | 
38 | export { path, util };
39 | 
40 | // @ts-ignore
41 | export const rootDir: string = path.join(__dirname, '..');
42 | 
43 | export function relative(filename: string): string
44 | {
45 | 	return path.relative(rootDir, filename);
46 | }
47 | 
48 | export function mochaAsync(fn: Function)
49 | {
50 | 	return async (done) =>
51 | 	{
52 | 		try
53 | 		{
54 | 			await fn();
55 | 			done();
56 | 		}
57 | 		catch (err)
58 | 		{
59 | 			done(err);
60 | 		}
61 | 	};
62 | }
63 | 
64 | export default exports as typeof import('./_local-dev');
65 | 
66 | export function requireResolve(name: string): string
67 | {
68 | 	try
69 | 	{
70 | 		return require.resolve(name)
71 | 	}
72 | 	catch (e)
73 | 	{
74 | 
75 | 	}
76 | 	return null;
77 | }
78 | 


--------------------------------------------------------------------------------
/test/bug.spec.ts:
--------------------------------------------------------------------------------
 1 | //@noUnusedParameters:false
 2 | 
 3 | import { basename, extname } from 'path';
 4 | import { createSegment } from './lib/index';
 5 | import { stringifyList, stringify } from '@novel-segment/stringify';
 6 | 
 7 | describe(`bug`, () =>
 8 | {
 9 | 	const segment = createSegment(true, {
10 | 		nodeNovelMode: true,
11 | 	});
12 | 
13 | 	test(`check word is constructor`, () =>
14 | 	{
15 | 		let words = segment.doSegment(`inspection.dead.code.problem.synopsis28.constructor=构造函数有一个用法,但它是不可到达的从入口点.`);
16 | 
17 | 		let actual = stringify(words);
18 | 
19 | 		expect(actual).toContain(`inspection.dead.code.problem.synopsis28.constructor`);
20 | 		expect(actual).not.toContain(`[native code]`);
21 | 
22 | 		expect(actual).toMatchSnapshot();
23 | 
24 | 	});
25 | 
26 | })
27 | 


--------------------------------------------------------------------------------
/test/chk-fixme.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/4/12.
 3 |  */
 4 | 
 5 | import Mocha = require('mocha');
 6 | import fs = require('fs');
 7 | import path = require('path');
 8 | import yargs = require('yargs');
 9 | 
10 | let cli = yargs
11 | 	.argv
12 | ;
13 | 
14 | // @ts-ignore
15 | const mocha = new Mocha(cli);
16 | 
17 | mocha.addFile(
18 | 	path.join(__dirname, 'lazy.fixme')
19 | );
20 | 
21 | mocha.run(function(failures) {
22 | 
23 | 	failures && console.warn(`Tests failed: ${failures}`);
24 | 
25 | 	process.exitCode = 0;
26 | });
27 | 
28 | try
29 | {
30 | 	mocha.allowUncaught()
31 | }
32 | catch (e)
33 | {
34 | 
35 | }
36 | 
37 | process.exitCode = 0;
38 | 


--------------------------------------------------------------------------------
/test/lib/delete-cache.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/17/017.
 3 |  */
 4 | 
 5 | import { relative } from 'path';
 6 | import { removeSync } from 'fs-extra';
 7 | import { temp_root, project_root } from '../../project.config';
 8 | import { debug } from '../../lib/util';
 9 | import { async as FastGlob } from '@bluelovers/fast-glob/bluebird';
10 | 
11 | console.time(`[delete] cache`);
12 | 
13 | export default FastGlob([
14 | 	'**/cache.db',
15 | 	'**/cache*.db',
16 | ], {
17 | 	cwd: temp_root,
18 | 	absolute: true,
19 | })
20 | .map((cache_file) => {
21 | 
22 | 	debug(relative(project_root, cache_file));
23 | 	removeSync(cache_file);
24 | 
25 | })
26 | 	.tap(() => {
27 | 
28 | 		console.timeEnd(`[delete] cache`);
29 | 
30 | 	})
31 | ;
32 | 


--------------------------------------------------------------------------------
/test/lib/util.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/4/9.
 3 |  */
 4 | 
 5 | import { IWord } from '../../lib/Segment';
 6 | import tests_lazy_index from '../res/lazy.index';
 7 | import { zhDictCompare } from '@novel-segment/util';
 8 | import { assert, chai } from '../_local-dev';
 9 | import * as  _ from '@novel-segment/assert';
10 | 
11 | function _wrapFn<T extends typeof _.lazyMatch | typeof _.lazyMatch002 | typeof _.lazyMatchNot | typeof _.lazyMatchSynonym001>(fn: T): T
12 | {
13 | 	return ((...argv: Parameters<T>) => {
14 | 		argv[2] = {
15 | 			...(argv[2] ?? {}),
16 | 		};
17 | 		argv[2].inspectFn ??= chai.util.inspect;
18 | 		// @ts-ignore
19 | 		return fn(...argv)
20 | 	}) as T
21 | }
22 | 
23 | export const lazyMatch = _wrapFn(_.lazyMatch);
24 | export const lazyMatch002 = _wrapFn(_.lazyMatch002);
25 | export const lazyMatchNot = _wrapFn(_.lazyMatchNot);
26 | export const lazyMatchSynonym001 = _wrapFn(_.lazyMatchSynonym001);
27 | export const lazyMatchSynonym001Not = _wrapFn(_.lazyMatchSynonym001Not);
28 | 
29 | export function mochaSetup(mocha: Mocha.Context)
30 | {
31 | 	mocha.timeout(30000);
32 | 
33 | 	return mocha;
34 | }
35 | 
36 | export function toStringArray<T extends IWord[]>(arr: T)
37 | {
38 | 	return arr.map(function (w)
39 | 	{
40 | 		return w.w;
41 | 	});
42 | }
43 | 
44 | export default exports as typeof import('./util');
45 | 
46 | export function sortTests<T extends typeof tests_lazy_index['tests_lazy_base'] | typeof tests_lazy_index['tests_lazy_base_not'] | typeof tests_lazy_index['tests_lazy_array'] | typeof tests_lazy_index['tests_lazy_indexof']>(list: T)
47 | {
48 | 	list.sort(function (a, b)
49 | 	{
50 | 		return zhDictCompare(String(a[1]), String(b[1]))
51 | 			|| zhDictCompare(a[0], b[0])
52 | 	})
53 | }
54 | 


--------------------------------------------------------------------------------
/test/res/gc.data.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/6/12.
 3 |  */
 4 | 
 5 | export const fixedGC = [
 6 | 	'接著就是對市政府的對外放送讓他能跑來避難以及多虧了市政府提供了這般安全的地方讓他歇息表達深深地感謝',
 7 | 	`（二）控股股东，是指其出资额占有限责任公司资本总额百分之五十以上或者其持有的股份占股份有限公司股本总额百分之五十以上的股东；出资额或者持有股份的比例虽然不足百分之五十，但依其出资额或者持有的股份所享有的表决权已足以对股东会、股东大会的决议产生重大影响的股东。`,
 8 | ];
 9 | 
10 | export default fixedGC
11 | 


--------------------------------------------------------------------------------
/test/res/gc.not/666962621.txt:
--------------------------------------------------------------------------------
1 | https://github.com/leizongmin/node-segment/issues/35#issuecomment-666962621
2 | 
3 | 一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十
4 | 


--------------------------------------------------------------------------------
/test/res/lazy.index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2019/4/12.
 3 |  *
 4 |  * 測試段落 每次發布版本時 會保證以下分析轉換是符合預期
 5 |  */
 6 | 
 7 | import {
 8 | 	sortTests,
 9 | 
10 | } from '../lib/util';
11 | 
12 | import tests_lazy_base from './lazy.index/tests_lazy_base';
13 | import tests_lazy_base_not from './lazy.index/tests_lazy_base_not';
14 | import tests_lazy_array from './lazy.index/tests_lazy_array';
15 | import tests_lazy_indexof from './lazy.index/tests_lazy_indexof';
16 | import tests_lazy_indexof_not from './lazy.index/tests_lazy_indexof_not';
17 | import {
18 | 	lazyMatch,
19 | 	lazyMatch002,
20 | 	lazyMatchNot,
21 | 	lazyMatchSynonym001,
22 | 	lazyMatchSynonym001Not,
23 | } from '@novel-segment/assert';
24 | 
25 | sortTests(tests_lazy_base);
26 | sortTests(tests_lazy_base_not);
27 | sortTests(tests_lazy_array);
28 | sortTests(tests_lazy_indexof);
29 | sortTests(tests_lazy_indexof_not);
30 | 
31 | export {
32 | 	tests_lazy_base,
33 | 	tests_lazy_base_not,
34 | 	tests_lazy_array,
35 | 	tests_lazy_indexof,
36 | 	tests_lazy_indexof_not,
37 | };
38 | 
39 | export default {
40 | 	tests_lazy_base,
41 | 	tests_lazy_base_not,
42 | 	tests_lazy_array,
43 | 	tests_lazy_indexof,
44 | 	tests_lazy_indexof_not,
45 | };
46 | 


--------------------------------------------------------------------------------
/test/res/lazy.index/tests_lazy_array.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2020/1/17.
 3 |  */
 4 | 
 5 | import { lazyMatch002 } from '@novel-segment/assert';
 6 | 
 7 | /**
 8 |  * 分析後應該要符合以下其中一個結果
 9 |  */
10 | export const tests_lazy_array: [string, Parameters<typeof lazyMatch002>['1'], Parameters<typeof lazyMatch002>['2']?][] = [
11 | 
12 | 	[
13 | 		'胡锦涛出席APEC领导人会议后回京',
14 | 		[
15 | 			[
16 | 				'会议',
17 | 				'回京',
18 | 			],
19 | 		],
20 | 	],
21 | 
22 | 	[
23 | 		'在這裡有兩具自動人偶隨侍在側的烏列爾',
24 | 		[
25 | 			[
26 | 				'兩具',
27 | 				'自動',
28 | 				'人偶',
29 | 				'隨侍',
30 | 			],
31 | 			[
32 | 				'兩具',
33 | 				'自動人偶',
34 | 				'隨侍',
35 | 			],
36 | 		],
37 | 	],
38 | 
39 | 	[
40 | 		'我摀住嘴',
41 | 		[
42 | 			[
43 | 				'我',
44 | 				'摀住',
45 | 				'嘴',
46 | 			],
47 | 			[
48 | 				'我',
49 | 				'摀住嘴',
50 | 			],
51 | 		],
52 | 	],
53 | 
54 | 	[
55 | 		'世間萬物終歸于虛無',
56 | 		[
57 | 			[
58 | 				'世間',
59 | 				'萬物',
60 | 				'終歸',
61 | 				'於',
62 | 				'虛無',
63 | 			],
64 | 			[
65 | 				'世間',
66 | 				'萬物',
67 | 				'終歸於',
68 | 				'虛無',
69 | 			],
70 | 		],
71 | 	],
72 | 
73 | ];
74 | 
75 | export default tests_lazy_array
76 | 


--------------------------------------------------------------------------------
/test/res/lazy.index/tests_lazy_indexof_not.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by user on 2020/1/17.
  3 |  */
  4 | 
  5 | import { lazyMatchSynonym001Not } from '@novel-segment/assert';
  6 | 
  7 | /**
  8 |  * 分析轉換後不應該具有以下字詞
  9 |  */
 10 | export const tests_lazy_indexof_not: [string, Parameters<typeof lazyMatchSynonym001Not>['1'], Parameters<typeof lazyMatchSynonym001Not>['2']?][] = [
 11 | 
 12 | 	[
 13 | 		'那是里靈魂的世界。',
 14 | 		[
 15 | 			'裡',
 16 | 		],
 17 | 	],
 18 | 
 19 | 	[
 20 | 		'原因還是在於教會對于究極療癒所抱持的想法吧',
 21 | 		[
 22 | 			'于',
 23 | 		],
 24 | 	],
 25 | 
 26 | 	[
 27 | 		'遥遥领先于帝位争夺的皇太子战死于战场都是太过奇怪的事了',
 28 | 		[
 29 | 			'于',
 30 | 		],
 31 | 	],
 32 | 
 33 | 	[
 34 | 		'那里民风保守',
 35 | 		[
 36 | 			'里',
 37 | 		],
 38 | 	],
 39 | 
 40 | 	[
 41 | 		'似乎在一栋别墅里长住不走了',
 42 | 		[
 43 | 			'里',
 44 | 		],
 45 | 	],
 46 | 
 47 | 	[
 48 | 		'生活里长期充满了无奈',
 49 | 		[
 50 | 			'里',
 51 | 		],
 52 | 	],
 53 | 
 54 | 	[
 55 | 		'异次元里拉出来',
 56 | 		[
 57 | 			'里',
 58 | 		],
 59 | 	],
 60 | 
 61 | 	[
 62 | 		'他被寄养在别的家庭里长达十年',
 63 | 		[
 64 | 			'里',
 65 | 		],
 66 | 	],
 67 | 
 68 | 	[
 69 | 		'我在这本第一卷里见到的雷普凌辱鬼畜等词汇比我这一整年在其他地方看到的都多',
 70 | 		[
 71 | 			'里',
 72 | 		],
 73 | 	],
 74 | 
 75 | 	[
 76 | 		'「好耶！来去冲冲水！！」',
 77 | 		[
 78 | 			'衝',
 79 | 		],
 80 | 	],
 81 | 
 82 | 	[
 83 | 		'那是她連在血斗場都未曾見識過的速度。',
 84 | 		[
 85 | 			'斗',
 86 | 		],
 87 | 	],
 88 | 
 89 | 	[
 90 | 		'先不说那个擦过了她手和嘴的脏手帕干不干净，',
 91 | 		[
 92 | 			'干',
 93 | 			'幹',
 94 | 		],
 95 | 	],
 96 | 
 97 | 	[
 98 | 		'而是于公于私的看法都相同，',
 99 | 		[
100 | 			'于',
101 | 		]
102 | 	],
103 | 
104 | 	[
105 | 		'「維爾德拉流斗殺法」',
106 | 		[
107 | 			'斗',
108 | 		]
109 | 	],
110 | 
111 | 	[
112 | 		'。面對一隻做困獸之斗的老虎,',
113 | 		[
114 | 			'斗',
115 | 		]
116 | 	],
117 | 
118 | 	[
119 | 		'，比我想象的还强──',
120 | 		[
121 | 			'象',
122 | 		]
123 | 	],
124 | 
125 | 	[
126 | 		'不论那副姿态有多么不堪入目',
127 | 		[
128 | 			'么',
129 | 		]
130 | 	],
131 | 
132 | 	[
133 | 		'钢铁制面具隐藏的嘴部现形了',
134 | 		[
135 | 			'制',
136 | 			'麵',
137 | 		]
138 | 	],
139 | 
140 | 	[
141 | 		'，人类根本干不出这种事这样？',
142 | 		[
143 | 			'干',
144 | 		]
145 | 	],
146 | 
147 | 	[
148 | 		'和庵彼此搏命相斗，会操控火焰的男人。',
149 | 		[
150 | 			'斗',
151 | 		],
152 | 	],
153 | 
154 | 	[
155 | 		'。我干我该干的就对了。」',
156 | 		[
157 | 			'干',
158 | 		],
159 | 	],
160 | 
161 | 	[
162 | 		'，让我沉溺在你们两人的温柔乡里吧。」',
163 | 		[
164 | 			'里',
165 | 		],
166 | 	],
167 | 
168 | ];
169 | 
170 | export default tests_lazy_indexof_not
171 | 


--------------------------------------------------------------------------------
/test/res/lazy.novel.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 測試段落 每次發布版本時 會保證以下分析轉換是符合預期
  3 |  *
  4 |  * 此檔案內的測試只有在開啟 nodeNovelMode 後才會符合預期
  5 |  */
  6 | 
  7 | import {
  8 | 	sortTests,
  9 | 
 10 | } from '../lib/util';
 11 | import {
 12 | 	lazyMatch,
 13 | 	lazyMatch002,
 14 | 	lazyMatchNot,
 15 | 	lazyMatchSynonym001,
 16 | 	lazyMatchSynonym001Not,
 17 | } from '@novel-segment/assert';
 18 | 
 19 | /**
 20 |  * 分析後應該要符合以下結果
 21 |  */
 22 | export const tests_lazy_novel_base: [string, Parameters<typeof lazyMatch>['1'], Parameters<typeof lazyMatch>['2']?][] = [
 23 | 
 24 | ];
 25 | 
 26 | /**
 27 |  * 分析後不應該存在符合以下結果
 28 |  */
 29 | export const tests_lazy_novel_base_not: [string, Parameters<typeof lazyMatchNot>['1'], Parameters<typeof lazyMatchNot>['2']?][] = [
 30 | 
 31 | 
 32 | 
 33 | ];
 34 | 
 35 | /**
 36 |  * 分析後應該要符合以下其中一個結果
 37 |  */
 38 | export const tests_lazy_novel_array: [string, Parameters<typeof lazyMatch002>['1'], Parameters<typeof lazyMatch002>['2']?][] = [
 39 | 
 40 | ];
 41 | 
 42 | /**
 43 |  * 分析轉換後應該要具有以下字詞
 44 |  */
 45 | export const tests_lazy_novel_indexof: [string, Parameters<typeof lazyMatchSynonym001>['1'], Parameters<typeof lazyMatchSynonym001>['2']?][] = [
 46 | 
 47 | 	[
 48 | 		'但是在發出邀請後卻被回以“吾才不要去那種魚龍混雜的地方呢”，義正言辭的回絕了',
 49 | 		[
 50 | 			'義正辭嚴',
 51 | 		],
 52 | 	],
 53 | 
 54 | 	[
 55 | 		'也許是不知道有普通人被卷入了結界',
 56 | 		[
 57 | 			'捲',
 58 | 		],
 59 | 	],
 60 | 
 61 | 	[
 62 | 		'不過好象只是杞人憂天。',
 63 | 		[
 64 | 			'像',
 65 | 		],
 66 | 	],
 67 | 
 68 | 	[
 69 | 		'鶫讓自己的身體深深的陷入政府準備的轎車后座',
 70 | 		[
 71 | 			'後',
 72 | 		],
 73 | 	],
 74 | 
 75 | 	[
 76 | 		'貝爾不遜的回復道。',
 77 | 		[
 78 | 			'覆',
 79 | 		],
 80 | 	],
 81 | 
 82 | 	[
 83 | 		'「沒有借口啊」',
 84 | 		[
 85 | 			'藉',
 86 | 		],
 87 | 	],
 88 | 
 89 | 	[
 90 | 		'「⋯⋯你啊。说了吧。会把我卷进你的漩涡里，让我无处可逃。」',
 91 | 		[
 92 | 			'捲',
 93 | 		],
 94 | 	],
 95 | 
 96 | 	[
 97 | 		'「⋯⋯好困啊。都快要做像黃金絲綢般美好的夢了。」',
 98 | 		[
 99 | 			'睏',
100 | 		],
101 | 	],
102 | 
103 | 	[
104 | 		'廣瀨的下一個對象是我嗎？很遺憾，對男人沒有興趣。',
105 | 		[
106 | 			'象',
107 | 		],
108 | 	],
109 | 
110 | 	[
111 | 		'　墮入絕望的深淵的她，最後抓住的對象是――',
112 | 		[
113 | 			'象',
114 | 		],
115 | 	],
116 | 
117 | 	[
118 | 		'基本上的印象是伊撒古向勞拉絕對服從',
119 | 		[
120 | 			'象',
121 | 		],
122 | 	],
123 | 
124 | 	[
125 | 		'企圖甩掉貨物的八腳獨眼象用它的巨大身軀衝撞正在拆解中的鷹架',
126 | 		[
127 | 			'象',
128 | 		],
129 | 	],
130 | 
131 | 	[
132 | 		'「艾莉卡！卡露米雅，你們把大象抓起來！」',
133 | 		[
134 | 			'象',
135 | 		],
136 | 	],
137 | 
138 | 	[
139 | 		'還有因為最近在就業活動中占有優勢的等等。',
140 | 		[
141 | 			'佔',
142 | 		],
143 | 	],
144 | 
145 | 	[
146 | 		'将系成蝴蝶结的一头拉动的话',
147 | 		[
148 | 			'繫',
149 | 		],
150 | 	],
151 | 
152 | 	[
153 | 		'由於發生了恐怖分子佔領事件，因此換了一輛列車。',
154 | 		[
155 | 			'份',
156 | 		],
157 | 	],
158 | 
159 | 	[
160 | 		'「哦⋯⋯『業火灼熱拉面』，這怎麼看都是很危險的東西。」',
161 | 		[
162 | 			'麵',
163 | 		],
164 | 	],
165 | 
166 | 	[
167 | 		'伊甸人非常喜爱地上的牲畜及谷类。',
168 | 		[
169 | 			'穀',
170 | 		],
171 | 	],
172 | 
173 | 	[
174 | 		'一邊喂我喝了些甚麼。',
175 | 		[
176 | 			'餵',
177 | 		],
178 | 	],
179 | 
180 | 	[
181 | 		'艾倫諾拉征得龍龍同意後',
182 | 		[
183 | 			'徵',
184 | 		],
185 | 	],
186 | 
187 | 	[
188 | 		'讓婚約者的心系在自己身上也是尤菲莉亞的任務',
189 | 		[
190 | 			'繫',
191 | 		],
192 | 	],
193 | 
194 | 	[
195 | 		'是以蕎麥湯面',
196 | 		[
197 | 			'麵',
198 | 		],
199 | 	],
200 | 
201 | ];
202 | 
203 | /**
204 |  * 分析轉換後不應該具有以下字詞
205 |  */
206 | export const tests_lazy_novel_indexof_not: [string, Parameters<typeof lazyMatchSynonym001Not>['1'], Parameters<typeof lazyMatchSynonym001Not>['2']?][] = [
207 | 
208 | 	[
209 | 		'還有一碗名叫『麻辣力湯面』的拉面類料理。',
210 | 		[
211 | 			'面',
212 | 		],
213 | 	],
214 | 
215 | ];
216 | 
217 | sortTests(tests_lazy_novel_base);
218 | sortTests(tests_lazy_novel_base_not);
219 | sortTests(tests_lazy_novel_array);
220 | sortTests(tests_lazy_novel_indexof);
221 | sortTests(tests_lazy_novel_indexof_not);
222 | 
223 | export default exports as typeof import('./lazy.novel');
224 | 


--------------------------------------------------------------------------------
/test/script/build-submod.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2020/7/1.
 3 |  */
 4 | 
 5 | import FastGlob from '@bluelovers/fast-glob';
 6 | import { join, parse } from 'path';
 7 | import { outputFile } from 'fs-extra';
 8 | 
 9 | const __root = join(__dirname, '../..');
10 | 
11 | FastGlob
12 | 	.async<string>([
13 | 	'!*.d.ts',
14 | 	'*.ts',
15 | ], {
16 | 	cwd: join(__root, 'lib', 'submod')
17 | })
18 | 	.then(ls => {
19 | 
20 | 		let record = {
21 | 			Optimizer: [] as string[],
22 | 			Tokenizer: [] as string[],
23 | 			all: [] as string[],
24 | 		}
25 | 
26 | 		ls.sort();
27 | 
28 | 		let lines = [] as string[];
29 | 
30 | 		lines.push('');
31 | 
32 | 		ls.forEach(row => {
33 | 
34 | 			let name = parse(row).name;
35 | 
36 | 			if (/Optimizer$/.test(name))
37 | 			{
38 | 				record.Optimizer.push(name)
39 | 			}
40 | 			else if (/Tokenizer$/.test(name))
41 | 			{
42 | 				record.Tokenizer.push(name)
43 | 			}
44 | 
45 | 			record.all.push(name)
46 | 
47 | 			lines.push(`import * as ${name} from './submod/${name}';`)
48 | 
49 | 		});
50 | 
51 | 		lines.push('');
52 | 
53 | 		record.all.forEach(name => {
54 | 
55 | 			lines.push(`export { ${name} }`)
56 | 
57 | 		})
58 | 
59 | 		lines.push('');
60 | 
61 | 		return outputFile(join(__root, 'lib', 'submod.ts'), lines.join(`\n`))
62 | 	})
63 | ;
64 | 
65 | function _record(list: string[])
66 | {
67 | 	return list.map(m => `\t${m},`)
68 | }
69 | 


--------------------------------------------------------------------------------
/test/sleep.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/13/013.
 3 |  */
 4 | 
 5 | import * as deasync from 'deasync';
 6 | import { sleepSync, awaitSync } from '../lib/util/sleep';
 7 | 
 8 | const timstamp = Date.now();
 9 | 
10 | function f(n: number)
11 | {
12 | 	return new Promise(function (done)
13 | 	{
14 | 		setTimeout(done, n);
15 | 	})
16 | 		.then(function ()
17 | 		{
18 | 			logWithTime(n);
19 | 
20 | 			return n;
21 | 		})
22 | 	;
23 | }
24 | 
25 | console.time();
26 | 
27 | f(500);
28 | 
29 | let p2 = sleepSync(250);
30 | 
31 | p2.thenSync(function (n)
32 | {
33 | 	return logWithTime('thenSync', n);
34 | });
35 | p2.then(function (n)
36 | {
37 | 	return logWithTime(n);
38 | });
39 | 
40 | awaitSync(p2)
41 | 
42 | 	.then(function (n)
43 | {
44 | 	logWithTime(666, n);
45 | });
46 | 
47 | let p = f(1500);
48 | 
49 | deasync.sleep(1000);
50 | //msleep(1000);
51 | logWithTime(1000);
52 | 
53 | let p33 = awaitSync(p);
54 | 
55 | let v33 = p33.thenSync(function (n)
56 | {
57 | 	return logWithTime('thenSync', n);
58 | });
59 | 
60 | logWithTime('print v33', v33);
61 | 
62 | p33
63 | 	.then(function (n)
64 | {
65 | 	logWithTime(777, n);
66 | });
67 | 
68 | console.timeEnd();
69 | 
70 | function logWithTime(...argv)
71 | {
72 | 	console.log(`[${Date.now() - timstamp}]`, ...argv);
73 | 
74 | 	return argv;
75 | }
76 | 


--------------------------------------------------------------------------------
/test/submod.spec.ts:
--------------------------------------------------------------------------------
  1 | import FastGlob from '@bluelovers/fast-glob';
  2 | import { join, parse } from "path";
  3 | import __root from '../__root';
  4 | import { ISubOptimizer, ISubTokenizer } from '../lib/mod';
  5 | import { dirname } from 'path';
  6 | import { array_unique, array_unique_overwrite } from 'array-hyper-unique';
  7 | import SegmentCore from '../lib/segment/core';
  8 | import isUnset, { isSet } from '../lib/util/isUnset';
  9 | import * as SubmodList from '../lib/submod';
 10 | 
 11 | const segment = new SegmentCore;
 12 | 
 13 | describe(`check all files`, () =>
 14 | {
 15 | 
 16 | 	const files = FastGlob
 17 | 		.sync<string>([
 18 | 			'!*.d.ts',
 19 | 			'*.ts',
 20 | 		], {
 21 | 			cwd: join(__root, 'lib', 'submod'),
 22 | 			absolute: true,
 23 | 		})
 24 | 	;
 25 | 
 26 | 	FastGlob
 27 | 		.sync<string>([
 28 | 			'*/index.ts',
 29 | 		], {
 30 | 			cwd: join(__root, 'lib', 'submod'),
 31 | 			absolute: true,
 32 | 		})
 33 | 		.forEach(m => {
 34 | 
 35 | 			files.push(dirname(m))
 36 | 
 37 | 		})
 38 | 	;
 39 | 
 40 | 	array_unique_overwrite(files);
 41 | 
 42 | 	files.sort();
 43 | 
 44 | 	files.forEach(row => {
 45 | 
 46 | 		let name = parse(row).name;
 47 | 
 48 | 		describe(name, () => {
 49 | 
 50 | 			test(`import`, async () =>
 51 | 			{
 52 | 				const mod = await import(row);
 53 | 
 54 | 				_check(mod, name);
 55 | 			});
 56 | 
 57 | 			test(`require`, async () =>
 58 | 			{
 59 | 				const mod = require(row);
 60 | 
 61 | 				_check(mod, name);
 62 | 			});
 63 | 
 64 | 		})
 65 | 
 66 | 	})
 67 | 
 68 | })
 69 | 
 70 | function _check(mod: ISubOptimizer | ISubTokenizer, name: string)
 71 | {
 72 | 	expect(typeof mod).toStrictEqual('object');
 73 | 	expect(typeof mod.init).toStrictEqual('function');
 74 | 
 75 | 
 76 | 	let actual = mod.init(segment as any);
 77 | 	// @ts-ignore
 78 | 	let _mod: ISubOptimizer | ISubTokenizer = actual ?? mod;
 79 | 
 80 | 	if (/Optimizer$/.test(name))
 81 | 	{
 82 | 
 83 | 		//expect(typeof (mod as ISubOptimizer).doOptimize).toStrictEqual('function');
 84 | 
 85 | 		expect(mod).toHaveProperty('type', 'optimizer');
 86 | 
 87 | 		_checkApi(_mod, name)
 88 | 
 89 | 		if (isUnset(actual))
 90 | 		{
 91 | 			_checkApi(mod, name)
 92 | 		}
 93 | 
 94 | 	}
 95 | 	else if (/Tokenizer$/.test(name))
 96 | 	{
 97 | 		//expect(typeof (mod as ISubTokenizer).split).toStrictEqual('function');
 98 | 
 99 | 		expect(mod).toHaveProperty('type', 'tokenizer');
100 | 
101 | 		_checkApi(_mod, name)
102 | 
103 | 		if (isUnset(actual))
104 | 		{
105 | 			_checkApi(mod, name)
106 | 		}
107 | 
108 | 	}
109 | 	else
110 | 	{
111 | 		expect(name).toMatch(/(?:Tokenizer|Optimizer)$/)
112 | 	}
113 | }
114 | 
115 | describe(`submod index`, () =>
116 | {
117 | 
118 | 	Object.entries(SubmodList)
119 | 		.forEach(([name, mod]) => {
120 | 
121 | 			test(name, () => {
122 | 				_check(mod as any, name);
123 | 			})
124 | 
125 | 		})
126 | 	;
127 | 
128 | })
129 | 
130 | function _checkApi(mod: ISubOptimizer | ISubTokenizer, name: string)
131 | {
132 | 	if (/Optimizer$/.test(name))
133 | 	{
134 | 
135 | 		//expect(typeof (mod as ISubOptimizer).doOptimize).toStrictEqual('function');
136 | 
137 | 		expect(mod).toHaveProperty('type', 'optimizer');
138 | 
139 | 		expect(typeof (mod as ISubOptimizer).doOptimize).toStrictEqual('function');
140 | 
141 | 	}
142 | 	else if (/Tokenizer$/.test(name))
143 | 	{
144 | 		//expect(typeof (mod as ISubTokenizer).split).toStrictEqual('function');
145 | 
146 | 		expect(mod).toHaveProperty('type', 'tokenizer');
147 | 
148 | 		expect(typeof (mod as ISubTokenizer).split).toStrictEqual('function');
149 | 
150 | 	}
151 | 	else
152 | 	{
153 | 		expect(name).toMatch(/(?:Tokenizer|Optimizer)$/)
154 | 	}
155 | }
156 | 


--------------------------------------------------------------------------------
/test/temp/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | !/cache/**/*
3 | !/cache
4 | !/cache.db.info.json
5 | !/stringify.sorted.txt
6 | /stringify.txt
7 | /cache.common.synonym.db.info.json
8 | /cache.db.info.json
9 | 


--------------------------------------------------------------------------------
/test/temp/cache/0/eng.txt:
--------------------------------------------------------------------------------
1 | 3G|0x100000|100
2 | App|0x100010|0
3 | T恤|0x100010|0
4 | VR|0x100010|0
5 | 
6 | 


--------------------------------------------------------------------------------
/test/temp/cache/0/other.txt:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/test/temp/cache/i.txt:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/test/temp/cache/u.txt:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/test/temp/cache/v.txt:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/test/test_segment.test.ts:
--------------------------------------------------------------------------------
 1 | import { createSegment } from './lib';
 2 | import { IOptionsDoSegment, Segment } from '../lib/Segment';
 3 | import { mochaSetup, toStringArray } from './lib/util';
 4 | import { ENUM_SUBMODS } from '../lib/mod/index';
 5 | import { tests_old } from './res/default';
 6 | import { console } from 'debug-color2';
 7 | import { chai, relative, expect, path, assert, util, mochaAsync } from './_local-dev';
 8 | 
 9 | describe(relative(__filename), function ()
10 | {
11 | 
12 | 	let segment: Segment = null;
13 | 
14 | 	before(function ()
15 | 	{
16 | 		mochaSetup(this);
17 | 
18 | 		segment = createSegment(false, {
19 | 			disableModules: [
20 | 				//ENUM_SUBMODS.ZhtSynonymOptimizer,
21 | 			]
22 | 		});
23 | 	});
24 | 
25 | 	function doSegment(a: string, options?: IOptionsDoSegment)
26 | 	{
27 | 		return segment.doSegment(a, {
28 | 			convertSynonym: false,
29 | 			disableModules: [
30 | 				ENUM_SUBMODS.ZhtSynonymOptimizer,
31 | 			],
32 | 			...options,
33 | 		})
34 | 	}
35 | 
36 | 	it('init', function ()
37 | 	{
38 | 
39 | 	});
40 | 
41 | 	describe('default test', function ()
42 | 	{
43 | 
44 | 		let equal = function (a, b)
45 | 		{
46 | 			//console.info(a);
47 | 
48 | 			let c = toStringArray(doSegment(a));
49 | 			console.debug(c.join('/'));
50 | 			//assert.equal(c.toString('\t'), b.toString('\t'));
51 | 
52 | 			expect(c).to.deep.equal(b)
53 | 		};
54 | 
55 | 		//console.info('分词测试');
56 | 
57 | 		tests_old.forEach(function (args)
58 | 		{
59 | 			it(args[0], function ()
60 | 			{
61 | 				equal(...args);
62 | 			});
63 | 		});
64 | 
65 | 	});
66 | 
67 | 	it('options: simple=true', function ()
68 | 	{
69 | 		assert.equal(doSegment('永和服装饰品有限公司', { simple: true }).join('\t'),
70 | 			['永和', '服装', '饰品', '有限公司'].join('\t'),
71 | 		);
72 | 	});
73 | 
74 | 	it('options: stripPunctuation=true', function ()
75 | 	{
76 | 		assert.equal(doSegment('王五和张三丰、李强是谁', { simple: true, stripPunctuation: true }).join('').includes('丰李'),
77 | 			true,
78 | 		);
79 | 	});
80 | 
81 | 	/*
82 | 	it('options: convertSynonym=true', function ()
83 | 	{
84 | 		assert.equal(doSegment('何时入睡', { simple: true, convertSynonym: true }).join('\t'),
85 | 			['什么时候', '入眠'].join('\t'),
86 | 		);
87 | 	});
88 | 	*/
89 | 
90 | 	it('options: stripStopword=true', function ()
91 | 	{
92 | 		assert.equal(doSegment('因为李三买了一张三角桌子', { simple: true, stripStopword: true }).join('\t'),
93 | 			['李三', '买', '一张', '三角', '桌子'].join('\t'),
94 | 		);
95 | 	});
96 | 
97 | });
98 | 


--------------------------------------------------------------------------------
/test/version.spec.ts:
--------------------------------------------------------------------------------
 1 | import _m0 = require('../version');
 2 | import _m1 from '../version';
 3 | import { version } from '../package.json';
 4 | 
 5 | test(`export version check`, () =>
 6 | {
 7 | 
 8 | 	expect(_m0.version).toStrictEqual(_m1);
 9 | 	expect(_m0.version).toStrictEqual(version);
10 | 
11 | });
12 | 


--------------------------------------------------------------------------------
/test/versions.spec.ts:
--------------------------------------------------------------------------------
 1 | import _m1, { versions } from '../version';
 2 | import { version } from '../package.json';
 3 | 
 4 | test(`export version check`, () =>
 5 | {
 6 | 
 7 | 	expect(_m1).toStrictEqual(versions['novel-segment']);
 8 | 	expect(_m1).toStrictEqual(version);
 9 | 
10 | });
11 | 
12 | test(`export versions check 2`, () =>
13 | {
14 | 
15 | 	expect(versions).toMatchObject({
16 | 		'novel-segment': expect.any(String),
17 | 		'segment-dict': expect.any(String),
18 | 		'regexp-cjk': expect.any(String),
19 | 		'cjk-conv': expect.any(String),
20 | 	});
21 | 
22 | });
23 | 


--------------------------------------------------------------------------------
/test/z.0010.test.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by User on 2019/6/12.
 3 |  */
 4 | 
 5 | /// <reference types="mocha" />
 6 | /// <reference types="benchmark" />
 7 | /// <reference types="chai" />
 8 | /// <reference types="node" />
 9 | 
10 | import { chai, relative, expect, path, assert, util, mochaAsync } from './_local-dev';
11 | import { mochaSetup, toStringArray } from './lib/util';
12 | import { createSegment } from './lib';
13 | import { Segment } from '../lib';
14 | import { console } from 'debug-color2';
15 | import fixedGC from './res/gc.data';
16 | import { IOptionsDoSegment } from '../lib/Segment';
17 | 
18 | console.setOptions({
19 | 	label: true,
20 | });
21 | 
22 | // @ts-ignore
23 | describe(relative(__filename), () =>
24 | {
25 | 	let currentTest: Mocha.Test;
26 | 
27 | 	let segment: Segment = null;
28 | 
29 | 	// @ts-ignore
30 | 	before(function ()
31 | 	{
32 | 		// @ts-ignore
33 | 		this.timeout(60000);
34 | 
35 | 		segment = createSegment(true, {
36 | 			disableModules: [
37 | 				//ENUM_SUBMODS.ZhtSynonymOptimizer,
38 | 			]
39 | 		});
40 | 	});
41 | 
42 | 	// @ts-ignore
43 | 	beforeEach(function ()
44 | 	{
45 | 		// @ts-ignore
46 | 		currentTest = this.currentTest;
47 | 
48 | 		//console.log('it:before', currentTest.title);
49 | 		//console.log('it:before', currentTest.fullTitle());
50 | 	});
51 | 
52 | 	// @ts-ignore
53 | 	describe(`干支`, function ()
54 | 	{
55 | 
56 | 		`甲子|乙丑|丙寅|丁卯|戊辰|己巳|庚午|辛未|壬申|癸酉|甲戌|乙亥|丙子|丁丑|戊寅|己卯|庚辰|辛巳|壬午|癸未|甲申|乙酉|丙戌|丁亥|戊子|己丑|庚寅|辛卯|壬辰|癸巳|甲午|乙未|丙申|丁酉|戊戌|己亥|庚子|辛丑|壬寅|癸卯|甲辰|乙巳|丙午|丁未|戊申|己酉|庚戌|辛亥|壬子|癸丑|甲寅|乙卯|丙辰|丁巳|戊午|己未|庚申|辛酉|壬戌|癸亥|寅月|丙寅月|戊寅月|庚寅月|壬寅月|甲寅月|卯月|丁卯月|己卯月|辛卯月|癸卯月|乙卯月|辰月|戊辰月|庚辰月|壬辰月|甲辰月|丙辰月|巳月|己巳月|辛巳月|癸巳月|乙巳月|丁巳月|午月|庚午月|壬午月|甲午月|丙午月|戊午月|未月|辛未月|癸未月|乙未月|丁未月|己未月|申月|壬申月|甲申月|丙申月|戊申月|庚申月|酉月|癸酉月|乙酉月|丁酉月|己酉月|辛酉月|戌月|甲戌月|丙戌月|戊戌月|庚戌月|壬戌月|亥月|乙亥月|丁亥月|己亥月|辛亥月|癸亥月|子月|丙子月|戊子月|庚子月|壬子月|甲子月|丑月|丁丑月|己丑月|辛丑月|癸丑月|乙丑月`.split('|')
57 | 			.forEach(text => {
58 | 
59 | 			// @ts-ignore
60 | 			it(text, function ()
61 | 			{
62 | 				let actual = toStringArray(doSegment(text));
63 | 
64 | 				expect(actual).length.gt(0).lte(2);
65 | 
66 | 				if (actual.length === 2)
67 | 				{
68 | 					expect(actual).to.have.deep
69 | 						.property('1', '月')
70 | 					;
71 | 
72 | 					if (actual[0].length === 1)
73 | 					{
74 | 						expect(actual[0]).length.gt(1)
75 | 					}
76 | 				}
77 | 			});
78 | 
79 | 		})
80 | 
81 | 	});
82 | 
83 | 	function doSegment(a: string, options?: IOptionsDoSegment)
84 | 	{
85 | 		return segment.doSegment(a, {
86 | 			...options,
87 | 		})
88 | 	}
89 | });
90 | 


--------------------------------------------------------------------------------
/test/z.gc.not.test.ts:
--------------------------------------------------------------------------------
 1 | import { createSegment } from './lib';
 2 | import FastGlob from '@bluelovers/fast-glob/bluebird';
 3 | import { join } from 'path';
 4 | import { readFileSync } from 'fs';
 5 | 
 6 | describe(`check not gc`, () =>
 7 | {
 8 | 	const segment = createSegment(true, {
 9 | 		nodeNovelMode: true,
10 | 	});
11 | 
12 | 	const __res = join(__dirname, 'res/gc.not');
13 | 
14 | 	FastGlob
15 | 		.sync([
16 | 		'**/*.txt',
17 | 	], {
18 | 		cwd: __res
19 | 	})
20 | 		.forEach(file => {
21 | 
22 | 			it(file, () =>
23 | 			{
24 | 				console.time(file)
25 | 				const text = readFileSync(join(__res, file))
26 | 				let actual = segment.doSegment(text);
27 | 				console.timeEnd(file)
28 | 			});
29 | 
30 | 		})
31 | 	;
32 | 
33 | })
34 | 


--------------------------------------------------------------------------------
/test/z.gc.test.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by User on 2019/6/12.
 3 |  */
 4 | 
 5 | /// <reference types="mocha" />
 6 | /// <reference types="benchmark" />
 7 | /// <reference types="chai" />
 8 | /// <reference types="node" />
 9 | 
10 | import { chai, relative, expect, path, assert, util, mochaAsync } from './_local-dev';
11 | import { mochaSetup, toStringArray } from './lib/util';
12 | import { createSegment } from './lib';
13 | import { Segment } from '../lib';
14 | import { console } from 'debug-color2';
15 | import fixedGC from './res/gc.data';
16 | import { IOptionsDoSegment } from '../lib/Segment';
17 | 
18 | console.setOptions({
19 | 	label: true,
20 | });
21 | 
22 | // @ts-ignore
23 | describe(relative(__filename), () =>
24 | {
25 | 	// @ts-ignore
26 | 	let currentTest: Mocha.Test;
27 | 
28 | 	let segment: Segment = null;
29 | 
30 | 	// @ts-ignore
31 | 	before(function ()
32 | 	{
33 | 		this.timeout(60000);
34 | 
35 | 		segment = createSegment(true, {
36 | 			disableModules: [
37 | 				//ENUM_SUBMODS.ZhtSynonymOptimizer,
38 | 			]
39 | 		});
40 | 	});
41 | 
42 | 	// @ts-ignore
43 | 	beforeEach(function ()
44 | 	{
45 | 		// @ts-ignore
46 | 		currentTest = this.currentTest;
47 | 
48 | 		//console.log('it:before', currentTest.title);
49 | 		//console.log('it:before', currentTest.fullTitle());
50 | 	});
51 | 
52 | 	// @ts-ignore
53 | 	describe(`suite`, function ()
54 | 	{
55 | 
56 | 		fixedGC.forEach(text => {
57 | 
58 | 			// @ts-ignore
59 | 			it(text, function ()
60 | 			{
61 | 				// @ts-ignore
62 | 				this.timeout(60000);
63 | 
64 | 				let actual = toStringArray(doSegment(text));
65 | 
66 | 				console.debug(actual.join('/'));
67 | 
68 | 			});
69 | 
70 | 		})
71 | 
72 | 	});
73 | 
74 | 	function doSegment(a: string, options?: IOptionsDoSegment)
75 | 	{
76 | 		return segment.doSegment(a, {
77 | 			...options,
78 | 		})
79 | 	}
80 | });
81 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "@bluelovers/tsconfig/esm/mapfile.json",
3 |   "compilerOptions": {
4 |     "importHelpers": true,
5 |     "noPropertyAccessFromIndexSignature": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/typedoc.config.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2018/4/21/021.
 3 |  */
 4 | 
 5 | const path = require('path');
 6 | 
 7 | let p = path.resolve(path.join(path.dirname(require.resolve('typedoc-themes-color')), 'theme'));
 8 | 
 9 | console.log(p);
10 | console.log(path.relative(process.cwd(), p));
11 | 
12 | module.exports = {
13 | 	src : '.',
14 | 	out: './docs',
15 | 	//theme: './my-theme',
16 | //	theme: path.relative(process.cwd(), p),
17 | 	theme: p,
18 | 	ignoreCompilerErrors: true,
19 | 	excludeExternals: true,
20 | 
21 | 	externalPattern: "**/node_modules/**",
22 | 
23 | 	exclude: [
24 | 		"test",
25 | 		"node_modules",
26 | 		"test/",
27 | 		"node_modules/",
28 | 		"**/test",
29 | 		"**/node_modules",
30 | 		"**/test/**/*",
31 | 		"**/node_modules/**/*",
32 | 	],
33 | };
34 | 
35 | console.log(module.exports);
36 | 


--------------------------------------------------------------------------------
/version.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2020/6/2.
 3 |  */
 4 | export declare let version: string;
 5 | export default version;
 6 | export declare let version_dict: string;
 7 | export declare let versions: {
 8 |     'novel-segment': string;
 9 |     'segment-dict': string;
10 |     'regexp-cjk': string;
11 |     'cjk-conv': string;
12 | };
13 | 


--------------------------------------------------------------------------------
/version.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | /**
 3 |  * Created by user on 2020/6/2.
 4 |  */
 5 | Object.defineProperty(exports, "__esModule", { value: true });
 6 | exports.versions = exports.version_dict = exports.version = void 0;
 7 | exports.default = exports.version;
 8 | Object.defineProperty(exports, "version", {
 9 |     get() {
10 |         return require('./package.json').version;
11 |     }
12 | });
13 | Object.defineProperty(exports, "version_dict", {
14 |     get() {
15 |         return require('segment-dict/version').version;
16 |     }
17 | });
18 | Object.defineProperty(exports, "versions", {
19 |     get() {
20 |         return {
21 |             'novel-segment': exports.version,
22 |             'segment-dict': exports.version_dict,
23 |             'regexp-cjk': require('regexp-cjk/version').version,
24 |             'cjk-conv': require('cjk-conv/version').version,
25 |         };
26 |     }
27 | });
28 | Object.defineProperty(exports, "default", {
29 |     get() {
30 |         return exports.version;
31 |     }
32 | });
33 | //# sourceMappingURL=version.js.map


--------------------------------------------------------------------------------
/version.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by user on 2020/6/2.
 3 |  */
 4 | 
 5 | // @ts-ignore
 6 | export let version: string;
 7 | export default version
 8 | // @ts-ignore
 9 | export let version_dict: string;
10 | 
11 | // @ts-ignore
12 | export let versions: {
13 | 	'novel-segment': string;
14 | 	'segment-dict': string;
15 | 	'regexp-cjk': string;
16 | 	'cjk-conv': string;
17 | }
18 | 
19 | Object.defineProperty(exports, "version", {
20 | 	get()
21 | 	{
22 | 		return require('./package.json').version
23 | 	}
24 | });
25 | 
26 | Object.defineProperty(exports, "version_dict", {
27 | 	get()
28 | 	{
29 | 		return require('segment-dict/version').version
30 | 	}
31 | });
32 | 
33 | Object.defineProperty(exports, "versions", {
34 | 	get()
35 | 	{
36 | 		return {
37 | 			'novel-segment': version,
38 | 			'segment-dict': version_dict,
39 | 			'regexp-cjk': require('regexp-cjk/version').version,
40 | 			'cjk-conv': require('cjk-conv/version').version,
41 | 		}
42 | 	}
43 | });
44 | 
45 | Object.defineProperty(exports, "default", {
46 | 	get()
47 | 	{
48 | 		return version
49 | 	}
50 | });
51 | 
52 | 


--------------------------------------------------------------------------------