├── .gitmodules
├── LICENSE
├── README-en.md
├── README.md
├── docs
├── Increasing_the_Efficiency_of_Text_Input_in_the_8pen_Method.pdf
├── design
│ ├── cross-mode.md
│ ├── cross-mode
│ │ ├── 00-latin-init.png
│ │ ├── 10-8pen-input.png
│ │ ├── 10-8vim-active-upper-case.png
│ │ ├── 10-8vim-input-demo.mov
│ │ ├── 20-latin-input-active-zone.png
│ │ ├── 20-latin-input-another.png
│ │ ├── 20-latin-input-expand-zone.png
│ │ ├── 20-latin-input-lower-b.png
│ │ ├── 20-latin-input-steps.png
│ │ ├── 20-latin-input-upper-b.png
│ │ ├── 30-latin-trace-shape-horizontal.png
│ │ ├── 30-latin-trace-shape-vertical.png
│ │ ├── 40-full-screen-input.png
│ │ ├── 40-input-trace-to-vector-horizontal.png
│ │ ├── 40-input-trace-to-vector-vertical.png
│ │ ├── 50-latin-blind-hit.png
│ │ ├── 60-select-latin-completion.png
│ │ ├── 60-switch-latin-to-pinyin.png
│ │ ├── 70-pinyin-input-step-1.png
│ │ ├── 70-pinyin-input-step-2.png
│ │ ├── 75-pinyin-input-active-zone-level-2.png
│ │ ├── 75-pinyin-input-active-zone.png
│ │ ├── 75-pinyin-input-end.png
│ │ ├── 75-pinyin-input-expand-zone-level-1.png
│ │ ├── 75-pinyin-input-expand-zone-level-2.png
│ │ ├── 75-pinyin-input-expand-zone-level-3.png
│ │ └── Kuzi_IME_Cross_Mode_Prototype.drawio
│ └── x-mode
│ │ ├── 10-latin-input-steps.png
│ │ └── Kuaizi_IME_X_Mode_Prototype.drawio
├── donate
│ ├── alipay.jpg
│ ├── index.md
│ └── wechat.png
├── image
│ ├── kuaizi_ime_candidates_and_emojis.png
│ ├── kuaizi_ime_editor_editing.png
│ ├── kuaizi_ime_has_many_emojis.png
│ ├── kuaizi_ime_latin_case_input.png
│ ├── kuaizi_ime_main.png
│ ├── kuaizi_ime_math_expr_input.png
│ ├── kuaizi_ime_pair_symbols.png
│ ├── kuaizi_ime_pinyin_slipping_input.png
│ └── kuaizi_ime_x_pad_input.png
└── video
│ ├── kuaizi_ime_auto_completion.mov
│ ├── kuaizi_ime_editor_editing.mov
│ ├── kuaizi_ime_emoji_keyword_matching.mov
│ ├── kuaizi_ime_math_expr_input.mov
│ ├── kuaizi_ime_pair_symbol_input.mov
│ ├── kuaizi_ime_pinyin_comitting_options.mov
│ ├── kuaizi_ime_pinyin_slipping_input.mov
│ └── kuaizi_ime_x_pad_input.mov
├── logo.svg
├── thirdparty
├── hanzi-level-1.txt
├── hanzi-level-2.txt
├── hanzi-level-3.txt
├── hanzi-traditional-to-simple.txt
├── hanzi-weight.ciyu.txt
├── hanzi-weight.txt
├── 古代汉语语料库字频表.xls
├── 现代汉语常用字表.xls
├── 现代汉语语料库分词类词频表.xls
├── 现代汉语语料库字频表.xls
├── 现代汉语语料库词频表.xls
├── 现代汉语通用字表.xls
└── 通用规范汉字表.xls
└── tools
├── analyze
├── LICENSE.txt
├── README.md
├── char-links.html
├── char-links.js
├── char-tree.html
├── char-tree.js
├── files
│ ├── char-links.json
│ ├── char-tree.json
│ └── pinyin.txt
├── img
│ ├── pinyin-char-links.png
│ ├── pinyin-char-tree.png
│ └── pinyin-key-layout.png
├── playground.css
├── playground.html
├── playground.js
├── runtime.js
├── simulate.css
├── simulate.html
├── simulate.js
└── tree.css
└── pinyin-dict
├── .gitignore
├── README.md
├── data
├── emojis.json
├── pinyin-dict-data-phrase.zip
├── pinyin-dict-data-word.zip
├── pinyin-dict-db.zip
└── sample.json
├── docs
└── img
│ └── donate-cngwzj.png
├── package.json
├── src
├── app
│ ├── shell.mjs
│ └── sqlite.mjs
├── generate
│ ├── emoji
│ │ ├── emoji.mjs
│ │ └── index.mjs
│ ├── phrase
│ │ ├── index.mjs
│ │ └── phrase.mjs
│ ├── raw
│ │ ├── index.mjs
│ │ └── raw.mjs
│ ├── sqlite
│ │ ├── ime
│ │ │ ├── ime.mjs
│ │ │ └── index.mjs
│ │ ├── phrase
│ │ │ └── hmm
│ │ │ │ ├── index.mjs
│ │ │ │ ├── sqlite.mjs
│ │ │ │ ├── trans
│ │ │ │ ├── index.mjs
│ │ │ │ └── trans.mjs
│ │ │ │ ├── trans_kewen
│ │ │ │ ├── index.mjs
│ │ │ │ └── trans.mjs
│ │ │ │ └── utils.mjs
│ │ └── word
│ │ │ ├── diff.mjs
│ │ │ ├── index.mjs
│ │ │ ├── patch.mjs
│ │ │ └── sqlite.mjs
│ └── test.mjs
└── utils
│ ├── sqlite.mjs
│ ├── utils.mjs
│ └── zdic.mjs
└── yarn.lock
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "android"]
2 | path = android
3 | url = git@github.com:crazydan-studio/kuaizi-ime-android.git
4 | [submodule "thirdparty/OpenCC"]
5 | path = thirdparty/OpenCC
6 | url = git@github.com:crazydan-studio/OpenCC.git
7 | [submodule "thirdparty/pinyin-data"]
8 | path = thirdparty/pinyin-data
9 | url = git@github.com:crazydan-studio/pinyin-data.git
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README-en.md:
--------------------------------------------------------------------------------
1 | 筷字输入法 (Kuaizi IME)
2 | =======================================
3 |
4 | [中文版](./README.md) | English
5 |
6 |
7 |
8 | > To download the latest version for Android,
9 | > please go to https://github.com/crazydan-studio/kuaizi-ime-android/releases/latest
10 |
11 | [
](https://f-droid.org/packages/org.crazydan.studio.app.ime.kuaizi)
14 |
15 | > **Warning**:筷字输入法(Kuaizi IME) is currently exclusively released
16 | > on [F-Droid](https://f-droid.org) and has not been published on other application platforms.
17 | > Please be cautious and aware of related risks. If you need to download and use it,
18 | > please use the link above.
19 |
20 | > If you find any errors or have improvement suggestions,
21 | > please visit [Issues](https://github.com/crazydan-studio/kuaizi-ime/issues)
22 |
23 | **筷字输入法** (Kuaizi IME) is a system based on the combinatorial features of Chinese Pinyin.
24 | It is an efficient and fast input method editor, suitable for electronic touchscreen devices.
25 |
26 | **筷字输入法** (Kuaizi IME) abandones the traditional method of tapping on virtual keyboard keys one by one,
27 | it instead offers a swipe input method for Pinyin, combined with the combinatorial features of Pinyin letters.
28 | The key layout is designed to make Pinyin input more accurate and convenient.
29 |
30 | **筷字输入法** (Kuaizi IME) provides editing support such as cursor movement
31 | and text selection for the target editor, thereby eliminating the frustration
32 | of being unable to accurately position inputs and text due to the small screen size of mobile devices.
33 | This further enhances text input and editing efficiency on small-screen devices.
34 |
35 | User-data generated by **筷字输入法** (Kuaizi IME) is stored locally.
36 | It does not connect to the internet and will not collect nor analyze user data or behavior habits.
37 | Furthermore, it does not support or provide predictive input nor fuzzy matching mechanisms.
38 | To some extent, this input method editor aims to enhance the users' ability to recognize
39 | and memorize Chinese characters and Pinyin, ensuring that the reliance on digital tools
40 | does not lead to the gradual forgetting and eventual abandonment of
41 | 「汉字」 (Hànzì, Chinese characters), a treasure of Chinese civilization.
42 |
43 | ## About the icon
44 |
45 | **筷字输入法** The icon is composed of the Chinese character 「字」 (zì, character)
46 | and the Chinese-style utensil 「筷子」 (Kuàizi, chopsticks).
47 | This combination closely links Chinese civilization with the unique culinary culture of its people,
48 | showcasing the long history of Chinese civilization.
49 | It also helps to spread traditional Chinese culture to the world,
50 | deepening global friends' understanding and appreciation of Chinese culture.
51 | At the same time, it allows the Chinese people to have a stronger confidence in their national culture.
52 |
53 | ## How to clone the repository
54 |
55 | ```bash
56 | git clone git@github.com:crazydan-studio/kuaizi-ime.git
57 |
58 | cd kuaizi-ime
59 | git submodule update --init android
60 | ```
61 |
62 | > For detailed build instructions, please refer to the README documentation of each module.
63 |
64 | ## Project structure
65 |
66 | - [android/](https://github.com/crazydan-studio/kuaizi-ime-android):
67 | The complete project code for the 筷字输入法 (Kuaizi IME) Android client.
68 | Note: Use the command `git submodule update --init android` to initialize this git submodule.
69 | - [docs/](./docs/): The documentation directory of this project.
70 | - [Cross-shaped input interaction design](./docs/design/cross-mode.md):
71 | Optimization and improvement plan for cross-shaped input.
72 | - [tools/analyze/](./tools/analyze/):
73 | Pinyin key layout online analysis tool, used to analyze the letter combination structure of Pinyin
74 | and validate the layout scheme of the regular hexagonal keys.
75 | - [tools/pinyin-dict/](./tools/pinyin-dict/):
76 | Pinyin dictionary data collection and correction program,
77 | which collects Chinese character data and generates an SQLite database.
78 | The character/dictionary database for the 筷字输入法 (Kuaizi IME) client is also generated by this tool.
79 | - [thirdparty/](./thirdparty/):
80 | Third-party character and word data used in this project. **Note:** Use the command
81 | `git submodule update --init thirdparty/OpenCC thirdparty/pinyin-data`
82 | to initialize the git-submodules within this directory.
83 |
84 | ## License
85 |
86 | [Apache 2.0](./LICENSE)
87 |
88 | ## Donations
89 |
90 | **Note**:Please add the remark `筷字输入法` when donating.
91 | For a complete list of donations, please check the [Donation list](./docs/donate/index.md)。
92 |
93 | | 支付宝 (Alipay) | 微信支付 (WeChat Pay) |
94 | | -- | -- |
95 | |
|
|
96 |
97 | ## Feature Highlights
98 |
99 | - Input Pinyin via a swiping motion, with a maximum of only two swipes required for a single input.
100 | - Built-in
101 | [8VIM](https://github.com/8VIM/8VIM)/[8pen](./docs/Increasing_the_Efficiency_of_Text_Input_in_the_8pen_Method.pdf)
102 | input mode, allowing continuous input of Pinyin, English, and numbers by drawing circles,
103 | providing a smooth input experience.
104 | - All candidate characters are displayed with their complete Pinyin,
105 | making it easy to identify the accurate pronunciation of each character.
106 | - Input data is stored locally; no collection or analysis of user data.
107 | - Utilizes Hidden Markov Models (HMM) and the Viterbi algorithm for Pinyin input prediction.
108 | - Supports only precise Pinyin input matching; does not support Pinyin predictive input or fuzzy matching mechanisms.
109 | - Provides editing functions such as cursor movement, text selection, copy, paste, and cut.
110 | Additionally, supports undoing inputs submitted to the target editor for easy corrections.
111 | - Supports left-handed and right-handed mode switching to accommodate different user habits.
112 | - Supports basic mathematical operations (addition, subtraction, multiplication, and division),
113 | allowing calculations while typing.
114 | - Supports input of punctuation, emojis, and other text symbols, offering rich forms of expression.
115 | - Supports direct input of paired punctuation marks (e.g., brackets, quotes)
116 | and automatically wraps selected content within them.
117 | - Automatically adds spaces between Latin characters and Chinese characters,
118 | as well as between operands and operators, to standardize input formatting.
119 |
120 | https://github.com/user-attachments/assets/c5a3c769-0e6d-42e1-bc2f-babe85607bfb
121 |
122 | > Note: The audio and subtitles were provided by [GitPodcast](https://www.gitpodcast.com/),
123 | > the final videos were generated and produced using [Veed](https://www.veed.io)
124 |
125 |
126 |
127 | ## Feature demonstration
128 |
129 | > The latest version's key layout may differ from the demonstration videos.
130 | > Please refer to the latest version.
131 |
132 | ### Pinyin Swipe Input
133 |
134 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/eb2f74f9-f64e-4d02-ad80-98e3ecb9d61b
135 |
136 | ### Arithmetic Input
137 |
138 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/f461b564-0ac4-4257-82ad-11afcd3e1d6c
139 |
140 | ### Content Editing
141 |
142 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/992a0a5e-7e1e-4b93-a1ac-c893d0e3ff2e
143 |
144 | ### Emoji Matching
145 |
146 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/7fff6ddf-9e10-408f-b160-3b3b8e2ab215
147 |
148 | ### Paired Symbol Input
149 |
150 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/97b0f99b-92e1-4b28-a5b6-d45150c4bada
151 |
152 | ### Pinyin Input Submission Options
153 |
154 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/274d41dd-e858-4b71-a041-31df3dd24f7d
155 |
156 | ### 8VIM/8pen-style Input
157 |
158 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/534fa61e-34dc-4e81-a7d1-5eb7cc3b291f
159 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 筷字(Kuaizi)输入法
2 | =======================================
3 |
4 | 中文版 | [English](./README-en.md)
5 |
6 |
7 |
8 | > 下载 Android 版最新版本,请前往 https://github.com/crazydan-studio/kuaizi-ime-android/releases/latest
9 |
10 | [
](https://f-droid.org/packages/org.crazydan.studio.app.ime.kuaizi)
13 |
14 | > **Warning**:筷字输入法目前仅发布在 [F-Droid](https://f-droid.org) 上,并未发布在其他应用平台,
15 | > 请注意识别并警惕相关风险,若需下载使用,请直接点击以上链接。
16 |
17 | > 若有缺陷反馈和改进意见,请移步至 [Issues](https://github.com/crazydan-studio/kuaizi-ime/issues)
18 | > 页面。
19 |
20 | **筷字输入法** 是一款根据汉语拼音的组合特征而专门设计的、
21 | 高效且快速的、适用于电子触屏设备的拼音输入法。
22 |
23 | **筷字输入法** 摒弃传统的逐个点击虚拟键盘按键的录入方式,
24 | 改为以滑屏方式录入拼音,并结合拼音字母的有限组合特征,精心设计按键布局,
25 | 让拼音输入更加准确且便捷。
26 |
27 | **筷字输入法** 同时提供对目标编辑器的光标移动和文本选择等编辑支持,
28 | 从而彻底摆脱因移动设备屏幕太小而无法准确定位输入和文本位置的烦恼,
29 | 进一步提高在小屏设备上的文本输入和编辑效率。
30 |
31 | **筷字输入法** 的用户数据仅留存在本地,
32 | 其不连接互联网,不会收集和分析用户数据和行为习惯。
33 | 其也不支持和提供联想输入和模糊匹配机制,在某种程度上,
34 | 该输入法是希望加强用户对汉字和拼音的识别和记忆能力,
35 | 不要因为对数字工具的依赖而逐渐遗忘并最终丢弃「汉字」这一中华文明的瑰宝。
36 |
37 | ## 关于图标
38 |
39 | **筷字输入法** 的图标由汉字「字」与中国特色餐具「筷子」组合而成,
40 | 该组合将中华文明与其人民独有的饮食文化紧密相连,既展现了中华文明悠久的历史,
41 | 也有助于向世界传播中华的传统文化,加深世界友人对中华文化的认识和了解,
42 | 同时,也让中华儿女能够对其民族文化拥有更加坚定的自信。
43 |
44 | ## 仓库克隆
45 |
46 | ```bash
47 | git clone git@github.com:crazydan-studio/kuaizi-ime.git
48 |
49 | cd kuaizi-ime
50 | git submodule update --init android
51 | ```
52 |
53 | > 具体的构建说明,请详见各模块的 README 文档。
54 |
55 | ## 工程结构
56 |
57 | - [android/](https://github.com/crazydan-studio/kuaizi-ime-android):
58 | 筷字输入法 Android 客户端的完整工程代码。注:使用命令 `git submodule update --init android`
59 | 初始化该 git 子模块
60 | -
61 | - [docs/](./docs/): 本项目的文档目录
62 | - [十字型输入的交互设计](./docs/design/cross-mode.md): 针对 X 型输入的优化改进方案
63 | - [tools/analyze/](./tools/analyze/):
64 | 拼音按键布局在线分析工具,用于分析拼音的字母组合结构,并验证正六边形按键的布局方案
65 | - [tools/pinyin-dict/](./tools/pinyin-dict/):
66 | 拼音字典数据采集和校正程序,采集汉字数据并生成 SQLite 数据库。筷字输入法客户端的字/词典数据库也由该工具生成
67 | - [thirdparty/](./thirdparty/):
68 | 本项目所使用的第三方字词数据。注:使用命令
69 | `git submodule update --init thirdparty/OpenCC thirdparty/pinyin-data`
70 | 初始化该目录内的 git 子模块
71 |
72 | ## License
73 |
74 | [Apache 2.0](./LICENSE)
75 |
76 | ## 友情赞助
77 |
78 | **注**:赞助时请添加备注信息 `筷字输入法`。
79 |
80 | 详细的赞助清单请查看[《友情赞助清单》](./docs/donate/index.md)。
81 |
82 | | 支付宝 | 微信支付 |
83 | | -- | -- |
84 | |
|
|
85 |
86 | ## 功能特性
87 |
88 | - 以连续滑屏方式录入拼音,且单次录入最多仅需滑屏两次
89 | - 内置 [8VIM](https://github.com/8VIM/8VIM)/[8pen](./docs/Increasing_the_Efficiency_of_Text_Input_in_the_8pen_Method.pdf)
90 | 输入模式,以画圈方式进行拼音、英文和数字的连续输入,从而提供顺滑的输入体验
91 | - 所有候选字均附带显示完整的拼音,可清晰识别各个字的准确读音
92 | - 输入数据本地存储,不收集、不分析用户数据
93 | - 采用隐马尔科夫模型(Hidden Markov Models)和维特比(Viterbi)算法实现拼音输入预测
94 | - 仅支持精确的拼音输入匹配,**不支持**拼音联想输入和模糊匹配机制
95 | - 提供光标移动、文本选择、复制、粘贴、剪切等编辑功能,
96 | 同时,支持撤回已提交至目标编辑器的输入,以便于对输入进行修正
97 | - 支持左右手模式切换,以适应不同的用户使用习惯
98 | - 支持简单的数学四则运算,可以边输入边计算
99 | - 支持录入标点、表情等文本符号,提供丰富的内容表达形式
100 | - 支持直接输入括号、引号等配对的标点符号,并自动将选中内容包裹在配对符号中
101 | - 在拉丁字符与汉字、操作数与运算符之间自动添加空格,以规范输入格式
102 |
103 | https://github.com/user-attachments/assets/c5a3c769-0e6d-42e1-bc2f-babe85607bfb
104 |
105 | > Note: 以上音频和字幕由 [GitPodcast](https://www.gitpodcast.com/)
106 | > 生成,最终的视频则是通过 [Veed](https://www.veed.io) 制作而成。
107 |
108 |
109 |
110 | ## 功能演示
111 |
112 | > 最新版本的按键布局可能会与演示视频有差异,请以最新版本的为准。
113 |
114 | ### 拼音滑屏输入
115 |
116 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/eb2f74f9-f64e-4d02-ad80-98e3ecb9d61b
117 |
118 | ### 算术输入
119 |
120 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/f461b564-0ac4-4257-82ad-11afcd3e1d6c
121 |
122 | ### 内容编辑
123 |
124 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/992a0a5e-7e1e-4b93-a1ac-c893d0e3ff2e
125 |
126 | ### 表情符号匹配
127 |
128 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/7fff6ddf-9e10-408f-b160-3b3b8e2ab215
129 |
130 | ### 配对符号输入
131 |
132 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/97b0f99b-92e1-4b28-a5b6-d45150c4bada
133 |
134 | ### 拼音输入提交选项
135 |
136 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/274d41dd-e858-4b71-a041-31df3dd24f7d
137 |
138 | ### 类 8VIM/8pen 型输入
139 |
140 | https://github.com/crazydan-studio/kuaizi-ime/assets/1321315/534fa61e-34dc-4e81-a7d1-5eb7cc3b291f
141 |
--------------------------------------------------------------------------------
/docs/Increasing_the_Efficiency_of_Text_Input_in_the_8pen_Method.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/Increasing_the_Efficiency_of_Text_Input_in_the_8pen_Method.pdf
--------------------------------------------------------------------------------
/docs/design/cross-mode/00-latin-init.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/00-latin-init.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/10-8pen-input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/10-8pen-input.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/10-8vim-active-upper-case.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/10-8vim-active-upper-case.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/10-8vim-input-demo.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/10-8vim-input-demo.mov
--------------------------------------------------------------------------------
/docs/design/cross-mode/20-latin-input-active-zone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/20-latin-input-active-zone.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/20-latin-input-another.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/20-latin-input-another.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/20-latin-input-expand-zone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/20-latin-input-expand-zone.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/20-latin-input-lower-b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/20-latin-input-lower-b.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/20-latin-input-steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/20-latin-input-steps.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/20-latin-input-upper-b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/20-latin-input-upper-b.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/30-latin-trace-shape-horizontal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/30-latin-trace-shape-horizontal.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/30-latin-trace-shape-vertical.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/30-latin-trace-shape-vertical.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/40-full-screen-input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/40-full-screen-input.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/40-input-trace-to-vector-horizontal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/40-input-trace-to-vector-horizontal.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/40-input-trace-to-vector-vertical.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/40-input-trace-to-vector-vertical.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/50-latin-blind-hit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/50-latin-blind-hit.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/60-select-latin-completion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/60-select-latin-completion.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/60-switch-latin-to-pinyin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/60-switch-latin-to-pinyin.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/70-pinyin-input-step-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/70-pinyin-input-step-1.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/70-pinyin-input-step-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/70-pinyin-input-step-2.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/75-pinyin-input-active-zone-level-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/75-pinyin-input-active-zone-level-2.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/75-pinyin-input-active-zone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/75-pinyin-input-active-zone.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/75-pinyin-input-end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/75-pinyin-input-end.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/75-pinyin-input-expand-zone-level-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/75-pinyin-input-expand-zone-level-1.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/75-pinyin-input-expand-zone-level-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/75-pinyin-input-expand-zone-level-2.png
--------------------------------------------------------------------------------
/docs/design/cross-mode/75-pinyin-input-expand-zone-level-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/cross-mode/75-pinyin-input-expand-zone-level-3.png
--------------------------------------------------------------------------------
/docs/design/x-mode/10-latin-input-steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/design/x-mode/10-latin-input-steps.png
--------------------------------------------------------------------------------
/docs/donate/alipay.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/donate/alipay.jpg
--------------------------------------------------------------------------------
/docs/donate/index.md:
--------------------------------------------------------------------------------
1 | 友情赞助清单
2 | ================================
3 |
4 | | 赞助人 | 赞助日期 | 赞助途径 | 赞助金额 | 备注 |
5 | | -- | -- | -- | -- | -- |
6 | | \*头 | 2025-01-22 19:17:23 | 微信支付 (收款单号: \*577504) | 5 RMB | |
7 | | \*翔 | 2025-01-24 21:30:35 | 支付宝 (收款单号: \*782440) | 10 RMB | |
8 | | S\*t | 2025-01-29 16:36:04 | 微信支付 (收款单号: \*139532) | 20 RMB | |
9 | | \*生 | 2025-02-03 18:51:25 | 微信支付 (收款单号: \*380626) | 0.01 RMB | 囊中羞涩,但还是支持一下 😄 |
10 | | J\*3 | 2025-02-05 12:54:04 | 微信支付 (收款单号: \*395589) | 6.66 RMB | |
11 | | \*夜 | 2025-02-06 16:46:39 | 微信支付 (收款单号: \*953513) | 5 RMB | 希望筷子输入法增加剪切板功能 |
12 | | \*巴 | 2025-02-07 10:49:53 | 微信支付 (收款单号: \*959595) | 8.8 RMB | 要是有剪贴板就好了 |
13 | | \*\*漪 | 2025-02-09 01:40:09 | 支付宝 (收款单号: \*579449) | 11.45 RMB | 很特别的小玩意,支持一下 |
14 | | h\*p | 2025-02-14 12:27:48 | 微信支付 (收款单号: \*570557) | 10 RMB | 很有意思的输入法,支持一下 |
15 |
--------------------------------------------------------------------------------
/docs/donate/wechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/donate/wechat.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_candidates_and_emojis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_candidates_and_emojis.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_editor_editing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_editor_editing.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_has_many_emojis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_has_many_emojis.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_latin_case_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_latin_case_input.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_main.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_main.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_math_expr_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_math_expr_input.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_pair_symbols.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_pair_symbols.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_pinyin_slipping_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_pinyin_slipping_input.png
--------------------------------------------------------------------------------
/docs/image/kuaizi_ime_x_pad_input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/image/kuaizi_ime_x_pad_input.png
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_auto_completion.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_auto_completion.mov
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_editor_editing.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_editor_editing.mov
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_emoji_keyword_matching.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_emoji_keyword_matching.mov
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_math_expr_input.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_math_expr_input.mov
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_pair_symbol_input.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_pair_symbol_input.mov
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_pinyin_comitting_options.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_pinyin_comitting_options.mov
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_pinyin_slipping_input.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_pinyin_slipping_input.mov
--------------------------------------------------------------------------------
/docs/video/kuaizi_ime_x_pad_input.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/docs/video/kuaizi_ime_x_pad_input.mov
--------------------------------------------------------------------------------
/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
84 |
--------------------------------------------------------------------------------
/thirdparty/hanzi-level-3.txt:
--------------------------------------------------------------------------------
1 | # http://corpus.zhonghuayuwen.org/resources.aspx
2 | 亍
3 | 尢
4 | 彳
5 | 卬
6 | 殳
7 | 𠙶
8 | 毌
9 | 邘
10 | 戋
11 | 圢
12 | 氕
13 | 伋
14 | 仝
15 | 冮
16 | 氿
17 | 汈
18 | 氾
19 | 忉
20 | 宄
21 | 讱
22 | 扞
23 | 圲
24 | 圫
25 | 芏
26 | 芃
27 | 朳
28 | 朸
29 | 𨙸
30 | 邨
31 | 吒
32 | 吖
33 | 屼
34 | 屾
35 | 辿
36 | 钆
37 | 仳
38 | 伣
39 | 伈
40 | 癿
41 | 甪
42 | 邠
43 | 犴
44 | 冱
45 | 邡
46 | 闫
47 | 汋
48 | 䜣
49 | 讻
50 | 孖
51 | 纩
52 | 玒
53 | 玓
54 | 玘
55 | 玚
56 | 刬
57 | 坜
58 | 坉
59 | 扽
60 | 坋
61 | 扺
62 | 㧑
63 | 毐
64 | 芰
65 | 芣
66 | 苊
67 | 苉
68 | 芘
69 | 芴
70 | 芠
71 | 芤
72 | 杕
73 | 杙
74 | 杄
75 | 杧
76 | 杩
77 | 尪
78 | 尨
79 | 轪
80 | 坒
81 | 芈
82 | 旴
83 | 旵
84 | 呙
85 | 㕮
86 | 岍
87 | 岠
88 | 岜
89 | 呇
90 | 冏
91 | 觃
92 | 岙
93 | 伾
94 | 㑇
95 | 伭
96 | 佖
97 | 伲
98 | 佁
99 | 飏
100 | 狃
101 | 闶
102 | 汧
103 | 汫
104 | 𣲘
105 | 𣲗
106 | 沄
107 | 沘
108 | 汭
109 | 㳇
110 | 沇
111 | 忮
112 | 忳
113 | 忺
114 | 祃
115 | 诇
116 | 邲
117 | 诎
118 | 诐
119 | 屃
120 | 岊
121 | 阽
122 | 䢺
123 | 阼
124 | 妧
125 | 妘
126 | 𨚕
127 | 纮
128 | 驲
129 | 纻
130 | 纼
131 | 玤
132 | 玞
133 | 玱
134 | 玟
135 | 邽
136 | 邿
137 | 坥
138 | 坰
139 | 坬
140 | 坽
141 | 弆
142 | 耵
143 | 䢼
144 | 𦭜
145 | 茋
146 | 苧
147 | 苾
148 | 苠
149 | 枅
150 | 㭎
151 | 枘
152 | 枍
153 | 矼
154 | 矻
155 | 匼
156 | 旿
157 | 昇
158 | 昄
159 | 昒
160 | 昈
161 | 咉
162 | 咇
163 | 咍
164 | 岵
165 | 岽
166 | 岨
167 | 岞
168 | 峂
169 | 㟃
170 | 囷
171 | 钐
172 | 钔
173 | 钖
174 | 牥
175 | 佴
176 | 垈
177 | 侁
178 | 侹
179 | 佸
180 | 佺
181 | 隹
182 | 㑊
183 | 侂
184 | 佽
185 | 侘
186 | 郈
187 | 舠
188 | 郐
189 | 郃
190 | 攽
191 | 肭
192 | 肸
193 | 肷
194 | 狉
195 | 狝
196 | 饳
197 | 忞
198 | 於
199 | 炌
200 | 炆
201 | 泙
202 | 沺
203 | 泂
204 | 泜
205 | 泃
206 | 泇
207 | 怊
208 | 峃
209 | 穸
210 | 祋
211 | 祊
212 | 鸤
213 | 弢
214 | 弨
215 | 陑
216 | 陎
217 | 卺
218 | 乸
219 | 妭
220 | 姈
221 | 迳
222 | 叕
223 | 驵
224 | 䌹
225 | 驺
226 | 绋
227 | 绐
228 | 砉
229 | 耔
230 | 㛃
231 | 玶
232 | 珇
233 | 珅
234 | 珋
235 | 玹
236 | 珌
237 | 玿
238 | 韨
239 | 垚
240 | 垯
241 | 垙
242 | 垲
243 | 埏
244 | 垍
245 | 耇
246 | 垎
247 | 垴
248 | 垟
249 | 垞
250 | 挓
251 | 垵
252 | 垏
253 | 拶
254 | 荖
255 | 荁
256 | 荙
257 | 荛
258 | 茈
259 | 茽
260 | 荄
261 | 茺
262 | 荓
263 | 茳
264 | 𦰡
265 | 茛
266 | 荭
267 | 㭕
268 | 柷
269 | 柃
270 | 柊
271 | 枹
272 | 栐
273 | 柖
274 | 郚
275 | 剅
276 | 䴓
277 | 迺
278 | 厖
279 | 砆
280 | 砑
281 | 砄
282 | 耏
283 | 奓
284 | 䶮
285 | 轵
286 | 轷
287 | 轹
288 | 轺
289 | 昺
290 | 昽
291 | 盷
292 | 咡
293 | 咺
294 | 昳
295 | 昣
296 | 哒
297 | 昤
298 | 昫
299 | 昡
300 | 咥
301 | 昪
302 | 虷
303 | 虸
304 | 哃
305 | 峘
306 | 耑
307 | 峛
308 | 峗
309 | 峧
310 | 帡
311 | 钘
312 | 钜
313 | 钪
314 | 钬
315 | 钭
316 | 矧
317 | 秬
318 | 俫
319 | 舁
320 | 俜
321 | 俙
322 | 俍
323 | 垕
324 | 衎
325 | 舣
326 | 弇
327 | 侴
328 | 鸧
329 | 䏡
330 | 胠
331 | 𦙶
332 | 胈
333 | 胩
334 | 胣
335 | 朏
336 | 飐
337 | 訄
338 | 饻
339 | 庤
340 | 疢
341 | 炣
342 | 炟
343 | 㶲
344 | 洭
345 | 洘
346 | 洓
347 | 洿
348 | 㳚
349 | 泚
350 | 浈
351 | 浉
352 | 洸
353 | 洑
354 | 洢
355 | 洈
356 | 洚
357 | 洺
358 | 洨
359 | 浐
360 | 㳘
361 | 洴
362 | 洣
363 | 恔
364 | 宬
365 | 窀
366 | 扂
367 | 袆
368 | 祏
369 | 祐
370 | 祕
371 | 叚
372 | 陧
373 | 陞
374 | 娀
375 | 姞
376 | 姱
377 | 姤
378 | 姶
379 | 姽
380 | 枲
381 | 绖
382 | 骃
383 | 彖
384 | 骉
385 | 恝
386 | 珪
387 | 珛
388 | 珹
389 | 琊
390 | 玼
391 | 珖
392 | 珽
393 | 珦
394 | 珫
395 | 珒
396 | 珢
397 | 珕
398 | 珝
399 | 埗
400 | 垾
401 | 垺
402 | 埆
403 | 垿
404 | 埌
405 | 埇
406 | 莰
407 | 茝
408 | 鄀
409 | 莶
410 | 莝
411 | 䓖
412 | 莙
413 | 栻
414 | 桠
415 | 桄
416 | 梠
417 | 栴
418 | 梴
419 | 栒
420 | 酎
421 | 酏
422 | 砵
423 | 砠
424 | 砫
425 | 砬
426 | 硁
427 | 恧
428 | 翃
429 | 郪
430 | 𨐈
431 | 辀
432 | 辁
433 | 剕
434 | 赀
435 | 哢
436 | 晅
437 | 晊
438 | 唝
439 | 哳
440 | 哱
441 | 冔
442 | 晔
443 | 晐
444 | 晖
445 | 畖
446 | 蚄
447 | 蚆
448 | 帱
449 | 崁
450 | 峿
451 | 崄
452 | 帨
453 | 崀
454 | 赆
455 | 钷
456 | 眚
457 | 甡
458 | 笫
459 | 倻
460 | 倴
461 | 脩
462 | 倮
463 | 倕
464 | 倞
465 | 倓
466 | 倧
467 | 衃
468 | 虒
469 | 舭
470 | 舯
471 | 舥
472 | 瓞
473 | 鬯
474 | 鸰
475 | 脎
476 | 朓
477 | 胲
478 | 虓
479 | 鱽
480 | 狴
481 | 峱
482 | 狻
483 | 眢
484 | 勍
485 | 痄
486 | 疰
487 | 痃
488 | 竘
489 | 羖
490 | 羓
491 | 桊
492 | 敉
493 | 烠
494 | 烔
495 | 烶
496 | 烻
497 | 涍
498 | 浡
499 | 浭
500 | 浬
501 | 涄
502 | 涢
503 | 涐
504 | 浰
505 | 浟
506 | 浛
507 | 浼
508 | 浲
509 | 涘
510 | 悈
511 | 悃
512 | 悢
513 | 宧
514 | 窅
515 | 窊
516 | 窎
517 | 扅
518 | 扆
519 | 袪
520 | 袗
521 | 袯
522 | 祧
523 | 隺
524 | 堲
525 | 疍
526 | 𨺙
527 | 陴
528 | 烝
529 | 砮
530 | 㛚
531 | 哿
532 | 翀
533 | 翂
534 | 剟
535 | 绤
536 | 骍
537 | 䂮
538 | 琎
539 | 珸
540 | 珵
541 | 琄
542 | 琈
543 | 琀
544 | 珺
545 | 掭
546 | 堎
547 | 堐
548 | 埼
549 | 掎
550 | 埫
551 | 堌
552 | 晢
553 | 掞
554 | 埪
555 | 壸
556 | 㙍
557 | 聍
558 | 菝
559 | 萚
560 | 菥
561 | 莿
562 | 䓫
563 | 勚
564 | 䓬
565 | 萆
566 | 菂
567 | 菍
568 | 菼
569 | 萣
570 | 䓨
571 | 菉
572 | 䓛
573 | 梼
574 | 梽
575 | 桲
576 | 梾
577 | 桯
578 | 梣
579 | 梌
580 | 桹
581 | 敔
582 | 厣
583 | 硔
584 | 硙
585 | 硚
586 | 硊
587 | 硍
588 | 勔
589 | 䴕
590 | 龁
591 | 逴
592 | 唪
593 | 啫
594 | 翈
595 | 㫰
596 | 晙
597 | 畤
598 | 趼
599 | 跂
600 | 蛃
601 | 蚲
602 | 蚺
603 | 啴
604 | 䎃
605 | 崧
606 | 崟
607 | 崞
608 | 崒
609 | 崌
610 | 崡
611 | 铏
612 | 铕
613 | 铖
614 | 铘
615 | 铚
616 | 铞
617 | 铥
618 | 铴
619 | 牻
620 | 牿
621 | 稆
622 | 笱
623 | 笯
624 | 偰
625 | 偡
626 | 鸺
627 | 偭
628 | 偲
629 | 偁
630 | 㿠
631 | 鄅
632 | 偓
633 | 徛
634 | 衒
635 | 舳
636 | 舲
637 | 鸼
638 | 悆
639 | 鄃
640 | 瓻
641 | 䝙
642 | 脶
643 | 脞
644 | 脟
645 | 䏲
646 | 鱾
647 | 猇
648 | 猊
649 | 猄
650 | 觖
651 | 𠅤
652 | 庱
653 | 庼
654 | 庳
655 | 痓
656 | 䴔
657 | 竫
658 | 堃
659 | 阌
660 | 羝
661 | 羕
662 | 焆
663 | 烺
664 | 焌
665 | 淏
666 | 淟
667 | 淜
668 | 淴
669 | 淯
670 | 湴
671 | 涴
672 | 㥄
673 | 惛
674 | 惔
675 | 悰
676 | 惙
677 | 寁
678 | 逭
679 | 袼
680 | 裈
681 | 祲
682 | 谞
683 | 艴
684 | 弸
685 | 弶
686 | 隃
687 | 婞
688 | 娵
689 | 婼
690 | 媖
691 | 婳
692 | 婍
693 | 婌
694 | 婫
695 | 婤
696 | 婘
697 | 婠
698 | 绹
699 | 骕
700 | 絜
701 | 珷
702 | 琲
703 | 琡
704 | 琟
705 | 琔
706 | 琭
707 | 堾
708 | 堼
709 | 揕
710 | 㙘
711 | 堧
712 | 喆
713 | 堨
714 | 塅
715 | 堠
716 | 絷
717 | 𡎚
718 | 葜
719 | 惎
720 | 萳
721 | 葙
722 | 靬
723 | 葴
724 | 蒇
725 | 蒈
726 | 鄚
727 | 蒉
728 | 蓇
729 | 萩
730 | 蒐
731 | 葰
732 | 葎
733 | 鄑
734 | 蒎
735 | 葖
736 | 蒄
737 | 萹
738 | 棤
739 | 棽
740 | 棫
741 | 椓
742 | 椑
743 | 鹀
744 | 椆
745 | 棓
746 | 棬
747 | 棪
748 | 椀
749 | 楗
750 | 甦
751 | 酦
752 | 觌
753 | 奡
754 | 皕
755 | 硪
756 | 欹
757 | 詟
758 | 辌
759 | 棐
760 | 龂
761 | 黹
762 | 牚
763 | 睎
764 | 晫
765 | 晪
766 | 晱
767 | 𧿹
768 | 蛑
769 | 畯
770 | 斝
771 | 喤
772 | 崶
773 | 嵁
774 | 崾
775 | 嵅
776 | 崿
777 | 嵚
778 | 翙
779 | 圌
780 | 圐
781 | 赑
782 | 淼
783 | 赒
784 | 铹
785 | 铽
786 | 𨱇
787 | 锊
788 | 锍
789 | 锎
790 | 锓
791 | 犇
792 | 颋
793 | 稌
794 | 筀
795 | 筘
796 | 筜
797 | 筥
798 | 筅
799 | 傃
800 | 傉
801 | 翛
802 | 傒
803 | 傕
804 | 舾
805 | 畬
806 | 脿
807 | 腘
808 | 䐃
809 | 腙
810 | 腒
811 | 鲃
812 | 猰
813 | 猯
814 | 㺄
815 | 馉
816 | 凓
817 | 鄗
818 | 廋
819 | 廆
820 | 鄌
821 | 粢
822 | 遆
823 | 旐
824 | 焞
825 | 欻
826 | 𣸣
827 | 溚
828 | 溁
829 | 湝
830 | 渰
831 | 湓
832 | 㴔
833 | 渟
834 | 溠
835 | 渼
836 | 溇
837 | 湣
838 | 湑
839 | 溞
840 | 愐
841 | 愃
842 | 敩
843 | 甯
844 | 棨
845 | 扊
846 | 裣
847 | 祼
848 | 婻
849 | 媆
850 | 媞
851 | 㛹
852 | 媓
853 | 媂
854 | 媄
855 | 毵
856 | 矞
857 | 缊
858 | 缐
859 | 骙
860 | 瑃
861 | 瑓
862 | 瑅
863 | 瑆
864 | 䴖
865 | 瑖
866 | 瑝
867 | 瑔
868 | 瑀
869 | 𤧛
870 | 瑳
871 | 瑂
872 | 嶅
873 | 瑑
874 | 遘
875 | 髢
876 | 塥
877 | 堽
878 | 赪
879 | 摛
880 | 塝
881 | 搒
882 | 搌
883 | 蒱
884 | 蒨
885 | 蓏
886 | 蔀
887 | 蓢
888 | 蓂
889 | 蒻
890 | 蓣
891 | 椹
892 | 楪
893 | 榃
894 | 榅
895 | 楒
896 | 楞
897 | 楩
898 | 榇
899 | 椸
900 | 楙
901 | 歅
902 | 碃
903 | 碏
904 | 碈
905 | 䃅
906 | 硿
907 | 鄠
908 | 辒
909 | 龆
910 | 觜
911 | 䣘
912 | 暕
913 | 鹍
914 | 㬊
915 | 暅
916 | 跱
917 | 蜐
918 | 蜎
919 | 嵲
920 | 赗
921 | 骱
922 | 锖
923 | 锘
924 | 锳
925 | 锧
926 | 锪
927 | 锫
928 | 锬
929 | 稑
930 | 稙
931 | 䅟
932 | 筻
933 | 筼
934 | 筶
935 | 筦
936 | 筤
937 | 傺
938 | 鹎
939 | 僇
940 | 艅
941 | 艉
942 | 谼
943 | 貆
944 | 腽
945 | 腨
946 | 腯
947 | 鲉
948 | 鲊
949 | 鲌
950 | 䲟
951 | 鲏
952 | 雊
953 | 猺
954 | 飔
955 | 觟
956 | 𦝼
957 | 馌
958 | 裛
959 | 廒
960 | 瘀
961 | 瘅
962 | 鄘
963 | 鹒
964 | 鄜
965 | 麀
966 | 鄣
967 | 阘
968 | 煁
969 | 煃
970 | 煴
971 | 煋
972 | 煟
973 | 煓
974 | 滠
975 | 溍
976 | 溹
977 | 滆
978 | 滉
979 | 溦
980 | 溵
981 | 漷
982 | 滧
983 | 滘
984 | 滍
985 | 愭
986 | 慥
987 | 慆
988 | 塱
989 | 裼
990 | 禋
991 | 禔
992 | 禘
993 | 禒
994 | 谫
995 | 鹔
996 | 愍
997 | 嫄
998 | 媱
999 | 戤
1000 | 勠
1001 | 戣
1002 | 缞
1003 | 耤
1004 | 瑧
1005 | 瑨
1006 | 瑱
1007 | 瑷
1008 | 瑢
1009 | 斠
1010 | 摏
1011 | 墕
1012 | 墈
1013 | 墐
1014 | 墘
1015 | 摴
1016 | 銎
1017 | 𡐓
1018 | 墚
1019 | 撖
1020 | 靽
1021 | 鞁
1022 | 蔌
1023 | 蔈
1024 | 蓰
1025 | 蔹
1026 | 蔊
1027 | 嘏
1028 | 榰
1029 | 榑
1030 | 槚
1031 | 𣗋
1032 | 槜
1033 | 榍
1034 | 疐
1035 | 酺
1036 | 酾
1037 | 酲
1038 | 酴
1039 | 碶
1040 | 䃎
1041 | 碨
1042 | 𥔲
1043 | 碹
1044 | 碥
1045 | 劂
1046 | 䴗
1047 | 夥
1048 | 瞍
1049 | 鹖
1050 | 㬎
1051 | 跽
1052 | 蜾
1053 | 幖
1054 | 嶍
1055 | 圙
1056 | 𨱏
1057 | 锺
1058 | 锼
1059 | 锽
1060 | 锾
1061 | 锿
1062 | 镃
1063 | 镄
1064 | 镅
1065 | 馝
1066 | 鹙
1067 | 箨
1068 | 箖
1069 | 劄
1070 | 僬
1071 | 僦
1072 | 僔
1073 | 僎
1074 | 槃
1075 | 㙦
1076 | 鲒
1077 | 鲕
1078 | 鲖
1079 | 鲗
1080 | 鲘
1081 | 鲙
1082 | 𩽾
1083 | 夐
1084 | 獍
1085 | 飗
1086 | 凘
1087 | 廑
1088 | 廙
1089 | 瘗
1090 | 瘥
1091 | 瘕
1092 | 鲝
1093 | 鄫
1094 | 熇
1095 | 漹
1096 | 漖
1097 | 潆
1098 | 漤
1099 | 潩
1100 | 漼
1101 | 漴
1102 | 㽏
1103 | 漈
1104 | 漋
1105 | 漻
1106 | 慬
1107 | 窬
1108 | 窭
1109 | 㮾
1110 | 褕
1111 | 禛
1112 | 禚
1113 | 隩
1114 | 嫕
1115 | 嫭
1116 | 嫜
1117 | 嫪
1118 | 㻬
1119 | 麹
1120 | 璆
1121 | 漦
1122 | 叇
1123 | 墣
1124 | 墦
1125 | 墡
1126 | 劐
1127 | 薁
1128 | 蕰
1129 | 蔃
1130 | 鼒
1131 | 槱
1132 | 鹝
1133 | 磏
1134 | 磉
1135 | 殣
1136 | 慭
1137 | 霅
1138 | 暵
1139 | 暲
1140 | 暶
1141 | 踦
1142 | 踣
1143 | 䗖
1144 | 蝘
1145 | 蝲
1146 | 蝤
1147 | 噇
1148 | 噂
1149 | 噀
1150 | 罶
1151 | 嶲
1152 | 嶓
1153 | 㠇
1154 | 嶟
1155 | 嶒
1156 | 镆
1157 | 镈
1158 | 镋
1159 | 镎
1160 | 镕
1161 | 稹
1162 | 儇
1163 | 皞
1164 | 皛
1165 | 䴘
1166 | 艎
1167 | 艏
1168 | 鹟
1169 | 𩾃
1170 | 鲦
1171 | 鲪
1172 | 鲬
1173 | 橥
1174 | 觭
1175 | 鹠
1176 | 鹡
1177 | 糇
1178 | 糈
1179 | 翦
1180 | 鹢
1181 | 鹣
1182 | 熛
1183 | 潖
1184 | 潵
1185 | 㵐
1186 | 澂
1187 | 澛
1188 | 瑬
1189 | 潽
1190 | 潾
1191 | 潏
1192 | 憭
1193 | 憕
1194 | 戭
1195 | 褯
1196 | 禤
1197 | 嫽
1198 | 遹
1199 | 璥
1200 | 璲
1201 | 璒
1202 | 憙
1203 | 擐
1204 | 鄹
1205 | 薳
1206 | 鞔
1207 | 黇
1208 | 蕗
1209 | 薢
1210 | 蕹
1211 | 橞
1212 | 橑
1213 | 橦
1214 | 醑
1215 | 觱
1216 | 磡
1217 | 𥕢
1218 | 磜
1219 | 豮
1220 | 鹾
1221 | 虤
1222 | 暿
1223 | 曌
1224 | 曈
1225 | 㬚
1226 | 蹅
1227 | 踶
1228 | 䗛
1229 | 螗
1230 | 疁
1231 | 㠓
1232 | 幪
1233 | 嶦
1234 | 𨱑
1235 | 馞
1236 | 穄
1237 | 篚
1238 | 篯
1239 | 簉
1240 | 鼽
1241 | 衠
1242 | 盦
1243 | 螣
1244 | 縢
1245 | 鲭
1246 | 鲯
1247 | 鲰
1248 | 鲺
1249 | 鲹
1250 | 亸
1251 | 癀
1252 | 瘭
1253 | 羱
1254 | 糒
1255 | 燋
1256 | 熻
1257 | 燊
1258 | 燚
1259 | 燏
1260 | 濩
1261 | 濋
1262 | 澪
1263 | 澽
1264 | 澴
1265 | 澭
1266 | 澼
1267 | 憷
1268 | 憺
1269 | 懔
1270 | 黉
1271 | 嬛
1272 | 鹨
1273 | 翯
1274 | 璱
1275 | 𤩽
1276 | 璬
1277 | 璮
1278 | 髽
1279 | 擿
1280 | 薿
1281 | 薸
1282 | 檑
1283 | 櫆
1284 | 檞
1285 | 醨
1286 | 繄
1287 | 磹
1288 | 磻
1289 | 瞫
1290 | 瞵
1291 | 蹐
1292 | 蟏
1293 | 㘎
1294 | 镤
1295 | 镥
1296 | 镨
1297 | 𨱔
1298 | 矰
1299 | 穙
1300 | 穜
1301 | 穟
1302 | 簕
1303 | 簃
1304 | 簏
1305 | 儦
1306 | 魋
1307 | 斶
1308 | 艚
1309 | 谿
1310 | 䲠
1311 | 鲾
1312 | 鲿
1313 | 鳁
1314 | 鳂
1315 | 鳈
1316 | 鳉
1317 | 獯
1318 | 䗪
1319 | 馘
1320 | 襕
1321 | 襚
1322 | 螱
1323 | 甓
1324 | 嬬
1325 | 嬥
1326 | 𦈡
1327 | 瓀
1328 | 釐
1329 | 鬶
1330 | 爇
1331 | 鞳
1332 | 鞮
1333 | 藟
1334 | 藦
1335 | 藨
1336 | 鹲
1337 | 檫
1338 | 黡
1339 | 礞
1340 | 礌
1341 | 𥖨
1342 | 蹢
1343 | 蹜
1344 | 蟫
1345 | 䗴
1346 | 嚚
1347 | 髃
1348 | 镮
1349 | 镱
1350 | 酂
1351 | 馧
1352 | 簠
1353 | 簝
1354 | 簰
1355 | 鼫
1356 | 鼩
1357 | 皦
1358 | 臑
1359 | 䲢
1360 | 鳑
1361 | 鳒
1362 | 鹱
1363 | 鹯
1364 | 癗
1365 | 𦒍
1366 | 旞
1367 | 翷
1368 | 冁
1369 | 䎖
1370 | 瀔
1371 | 瀍
1372 | 瀌
1373 | 襜
1374 | 䴙
1375 | 嚭
1376 | 㰀
1377 | 鬷
1378 | 醭
1379 | 蹯
1380 | 蠋
1381 | 翾
1382 | 鳘
1383 | 儳
1384 | 儴
1385 | 鼗
1386 | 𩾌
1387 | 鳚
1388 | 鳛
1389 | 麑
1390 | 麖
1391 | 蠃
1392 | 彟
1393 | 嬿
1394 | 鬒
1395 | 蘘
1396 | 欂
1397 | 醵
1398 | 颥
1399 | 甗
1400 | 𨟠
1401 | 巇
1402 | 酅
1403 | 髎
1404 | 犨
1405 | 𨭉
1406 | 㸌
1407 | 爔
1408 | 瀱
1409 | 瀹
1410 | 瀼
1411 | 瀵
1412 | 襫
1413 | 孅
1414 | 骦
1415 | 耰
1416 | 𤫉
1417 | 瓖
1418 | 鬘
1419 | 趯
1420 | 罍
1421 | 鼱
1422 | 鳠
1423 | 鳡
1424 | 鳣
1425 | 爟
1426 | 爚
1427 | 灈
1428 | 韂
1429 | 糵
1430 | 蘼
1431 | 礵
1432 | 鹴
1433 | 躔
1434 | 皭
1435 | 龢
1436 | 鳤
1437 | 亹
1438 | 籥
1439 | 鼷
1440 | 玃
1441 | 醾
1442 | 齇
1443 | 觿
1444 | 蠼
--------------------------------------------------------------------------------
/thirdparty/古代汉语语料库字频表.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/thirdparty/古代汉语语料库字频表.xls
--------------------------------------------------------------------------------
/thirdparty/现代汉语常用字表.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/thirdparty/现代汉语常用字表.xls
--------------------------------------------------------------------------------
/thirdparty/现代汉语语料库分词类词频表.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/thirdparty/现代汉语语料库分词类词频表.xls
--------------------------------------------------------------------------------
/thirdparty/现代汉语语料库字频表.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/thirdparty/现代汉语语料库字频表.xls
--------------------------------------------------------------------------------
/thirdparty/现代汉语语料库词频表.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/thirdparty/现代汉语语料库词频表.xls
--------------------------------------------------------------------------------
/thirdparty/现代汉语通用字表.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/thirdparty/现代汉语通用字表.xls
--------------------------------------------------------------------------------
/thirdparty/通用规范汉字表.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crazydan-studio/kuaizi-ime/3eaa40866034455c8938a645c9c1e08047f1a253/thirdparty/通用规范汉字表.xls
--------------------------------------------------------------------------------
/tools/analyze/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2019–2020 Observable, Inc.
2 |
3 | Permission to use, copy, modify, and/or distribute this software for any
4 | purpose with or without fee is hereby granted, provided that the above
5 | copyright notice and this permission notice appear in all copies.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 |
--------------------------------------------------------------------------------
/tools/analyze/README.md:
--------------------------------------------------------------------------------
1 | 拼音按键布局在线分析工具
2 | ==============================
3 |
4 | > 本代码改造自
5 | > [Force-Directed Tree](https://observablehq.com/@d3/force-directed-tree@183)
6 | > 和 [Collapsible tree](https://observablehq.com/@d3/collapsible-tree)。
7 |
8 | 在当前目录中执行命令以启动静态页面服务:
9 |
10 | ```sh
11 | npx http-server
12 | ```
13 |
14 | > 需先安装 [NodeJS](https://nodejs.org/)。
15 |
16 | ## 汉语拼音字母后继树
17 |
18 | 访问地址 http://127.0.0.1:8080/char-tree.html
19 | 以查看拼音的后继字母的树形结构,从而规划出适和滑屏输入的拼音字母的按键布局。
20 |
21 | > 代码为 [char-tree.js](./char-tree.js)。
22 |
23 | 
24 |
25 | ## 汉语拼音字母组合树
26 |
27 | 访问地址 http://127.0.0.1:8080/char-links.html
28 | 以查看拼音字母的组合关系。
29 |
30 | > 代码为 [char-links.js](./char-links.js)。
31 |
32 | 
33 |
34 | ## 汉语拼音划词模拟
35 |
36 | 访问地址 http://127.0.0.1:8080/simulate.html
37 | 以查看规划的按键布局是否符合要求。
38 |
39 | > 代码为 [simulate.js](./simulate.js)。
40 |
41 | 
42 |
43 | ## 外部资料
44 |
45 | ### 拼音字母统计频率
46 |
47 | > - 数据来自于[@軒轅羽](https://www.zhihu.com/question/23111438/answer/559582999)
48 | > - 脚本:`echo $data | sort -r -n -k 2`
49 |
50 | | 声母 | 频率 |
51 | | -- | -- |
52 | | d | 10.29% |
53 | | y | 9.69% |
54 | | sh | 8.04% |
55 | | j | 6.86% |
56 | | zh | 6.52% |
57 | | x | 5.86% |
58 | | g | 5.64% |
59 | | l | 4.61% |
60 | | b | 4.49% |
61 | | h | 4.10% |
62 | | z | 3.65% |
63 | | w | 3.26% |
64 | | q | 3.23% |
65 | | ch | 3.02% |
66 | | f | 2.97% |
67 | | m | 2.90% |
68 | | t | 2.79% |
69 | | r | 2.76% |
70 | | n | 2.18% |
71 | | k | 1.85% |
72 | | s | 1.47% |
73 | | p | 1.40% |
74 | | c | 1.27% |
75 |
76 | | 单字母韵母 | 频率 |
77 | | -- | -- |
78 | | i | 15.81% |
79 | | e | 10.48% |
80 | | u | 9.08% |
81 | | a | 2.95% |
82 | | o | 0.51% |
83 | | ü | 0.40% |
84 |
85 | | 多字母韵母 | 频率 |
86 | | -- | -- |
87 | | ao | 4.04% |
88 | | ai | 3.91% |
89 | | an | 3.78% |
90 | | ang | 3.45% |
91 | | en | 4.02% |
92 | | eng | 3.26% |
93 | | ei | 2.83% |
94 | | er | 0.57% |
95 | | ian | 3.68% |
96 | | ing | 3.68% |
97 | | in | 2.07% |
98 | | iang | 1.57% |
99 | | iao | 1.83% |
100 | | ie | 1.24% |
101 | | ia | 1.18% |
102 | | iu | 1.09% |
103 | | iong | 0.02% |
104 | | ong | 3.83% |
105 | | ou | 3.35% |
106 | | uo | 3.41% |
107 | | uan | 2.73% |
108 | | ui | 2.14% |
109 | | ue | 1.29% |
110 | | un | 0.96% |
111 | | uang | 0.41% |
112 | | ua | 0.35% |
113 | | uai | 0.10% |
114 | | üe | 0.03% |
115 |
116 | ### 英文字母统计频率
117 |
118 | > - 数据来自于[英语单词中首字母的频率](https://zh.wikipedia.org/zh-cn/%E5%AD%97%E6%AF%8D%E9%A2%91%E7%8E%87#.E8.8B.B1.E8.AF.AD.E5.8D.95.E8.AF.8D.E4.B8.AD.E9.A6.96.E5.AD.97.E6.AF.8D.E7.9A.84.E9.A2.91.E7.8E.87)
119 | > - 脚本:`echo $data | sort -r -n -k 2`
120 |
121 | | 字母 | 频率 |
122 | | -- | -- |
123 | | t | 16.671% |
124 | | a | 11.602% |
125 | | s | 7.755% |
126 | | h | 7.232% |
127 | | w | 6.753% |
128 | | i | 6.286% |
129 | | o | 6.264% |
130 | | b | 4.702% |
131 | | m | 4.374% |
132 | | f | 3.779% |
133 | | c | 3.511% |
134 | | l | 2.705% |
135 | | d | 2.670% |
136 | | p | 2.545% |
137 | | n | 2.365% |
138 | | e | 2.007% |
139 | | g | 1.950% |
140 | | r | 1.653% |
141 | | y | 1.620% |
142 | | u | 1.487% |
143 | | v | 0.649% |
144 | | j | 0.597% |
145 | | k | 0.590% |
146 | | q | 0.173% |
147 | | x | 0.037% |
148 | | z | 0.034% |
149 |
--------------------------------------------------------------------------------
/tools/analyze/char-links.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
[{title: [...], subtitle: [...], pargraphs: [[...], ...]}, {...}, ...]27 | */ 28 | function readClausesFromArticles(articles, words, symbols) { 29 | let clauses = []; 30 | 31 | articles.forEach(({ title, subtitle, pargraphs }) => { 32 | const titleText = title.map((w) => w.zi).join(''); 33 | const subtitleText = subtitle.map((w) => w.zi).join(''); 34 | 35 | if ( 36 | titleText.includes('生字表') || 37 | titleText.includes('写字表') || 38 | titleText.includes('识字表') || 39 | titleText.includes('练习版') || 40 | titleText.includes('唐诗') || 41 | titleText.includes('诗词') || 42 | titleText.includes('诗歌') || 43 | titleText.includes('词四首') || 44 | titleText.includes('课文版') || 45 | titleText.includes('世说新语') || 46 | titleText.includes('短诗') || 47 | titleText.includes('庄子') || 48 | titleText.includes('老子') || 49 | titleText.includes('离骚') || 50 | titleText.includes('一年级') || 51 | titleText.includes('二年级') || 52 | subtitleText.includes('一年级') || 53 | subtitleText.includes('二年级') 54 | ) { 55 | console.log(` - 忽略文章: ${titleText}`); 56 | return; 57 | } 58 | console.log(` - 分析文章: ${titleText}`); 59 | 60 | [title, subtitle].concat(pargraphs).forEach((p) => { 61 | clauses = clauses.concat(readClausesFromPargraph(p, words, symbols)); 62 | }); 63 | }); 64 | 65 | return clauses; 66 | } 67 | 68 | /** 69 | * @param pargraph
[{zi: '字', py: 'zì'}, {zi: ','}, {...}, ...]70 | */ 71 | function readClausesFromPargraph(pargraph, words, symbols) { 72 | const clauses = []; 73 | const addClause = (c) => { 74 | c.length > 0 && clauses.push(c); 75 | // c.length > 0 && console.error(c.join(',')); 76 | }; 77 | 78 | let clause = []; 79 | for (let i = 0; i < pargraph.length; i++) { 80 | const prev = pargraph[i - 1] || {}; 81 | const curr = pargraph[i]; 82 | const zi = getCorrectWord(curr); 83 | const py = getCorrectPinyin(curr, prev); 84 | 85 | if (py) { 86 | const spells = words[zi] || []; 87 | 88 | if (/\w+/.test(zi)) { 89 | console.error(` - 非汉字:${curr.zi}:${curr.py}`); 90 | } else if (spells.includes(py)) { 91 | clause.push(`${zi}:${py}`); 92 | } else { 93 | console.error(` - 不存在拼音字: ${curr.zi}:${curr.py}`); 94 | } 95 | } else { 96 | symbols[zi] ||= 0; 97 | symbols[zi] += 1; 98 | 99 | // 短语结束 100 | if (isClauseEnd(zi)) { 101 | if (clause.length > 0) { 102 | addClause(clause); 103 | } 104 | clause = []; 105 | } 106 | } 107 | } 108 | addClause(clause); 109 | 110 | return clauses; 111 | } 112 | 113 | function isClauseEnd(zi) { 114 | return [',', '。', ';', ':', '?', '!', '∶', '…'].includes(zi); 115 | } 116 | 117 | function getCorrectWord({ zi, py }) { 118 | switch (zi) { 119 | case '轮': 120 | py == 'lūn' && (zi = '抡'); 121 | break; 122 | case '纤': 123 | // https://www.cngwzj.com/pygushi/SongDai/72474/ 124 | py == 'lián' && (zi = '廉'); 125 | break; 126 | case '沉': 127 | // https://www.cngwzj.com/pygushi/SongDai/61484/ 128 | py == 'shěn' && (zi = '沈'); 129 | break; 130 | case '挡': 131 | // https://www.cngwzj.com/pygushi/SongDai/57152/ 132 | // https://baike.baidu.com/item/%E5%BA%86%E5%AE%AB%E6%98%A5%C2%B7%E5%8F%8C%E6%A1%A8%E8%8E%BC%E6%B3%A2/9918314 133 | py == 'dāng' && (zi = '珰'); 134 | break; 135 | } 136 | return zi; 137 | } 138 | 139 | function getCorrectPinyin({ zi, py }, prev) { 140 | switch (zi) { 141 | // <<<<<<<<<<<<<<<<<<<<<< 142 | case '看': 143 | prev.zi == zi && (py = 'kàn'); 144 | break; 145 | // <<<<<<<<<<<<<<< 叠词:第二个字为轻声 146 | case '爸': 147 | case '妈': 148 | case '哥': 149 | case '弟': 150 | case '姐': 151 | case '妹': 152 | case '爷': 153 | case '奶': 154 | case '婶': 155 | case '叔': 156 | prev.zi == zi && (py = extractPinyinChars(py)); 157 | break; 158 | // <<<<<<<<<<<<<<<<<<<<<<< 159 | case '儿': 160 | ['墩', '褂', '势', '猴', '点', '劲'].includes(prev.zi) && (py = 'ér'); 161 | break; 162 | // >>>>>>>>>>>>>>>>>>>>> 163 | default: 164 | const replacements = { 165 | 其: 'qí', 166 | 实: 'shí', 167 | 他: 'tā', 168 | 朴: 'pǔ', 169 | 笼: 'lóng', 170 | 牛: 'niú', 171 | 妞: 'niū', 172 | 剔: 'tī', 173 | 菇: 'gū', 174 | 活: 'huó', 175 | 笛: 'dí', 176 | 杵: 'chǔ', 177 | 釭: 'gāng', 178 | 墩: 'dūn', 179 | 褂: 'guà', 180 | 势: 'shì', 181 | 猴: 'hóu', 182 | 点: 'diǎn', 183 | // 184 | '景:ijǐng': 'jǐng', 185 | '温:yùn': 'wēn', 186 | '篷:peng': 'péng', 187 | '蓬:peng': 'péng', 188 | '晨:chen': 'chén', 189 | '袋:dai': 'dài', 190 | '来:lai': 'lái', 191 | '枉:wang': 'wǎng', 192 | '蟆:ma': 'má', 193 | '铛:dang': 'dāng', 194 | '闷:men': 'mèn', 195 | '粱:liang': 'liáng', 196 | '里:li': 'lǐ', 197 | '角:gǔ': 'jiǎo', 198 | '那:nàr': 'nà', 199 | '时:shi': 'shí', 200 | '焚:fèn': 'fén', 201 | '亮:liang': 'liàng', 202 | '道:dao': 'dào', 203 | '家:gū': 'jiā', 204 | '司:si': 'sī', 205 | '上:shang': 'shàng', 206 | '是:shi': 'shì', 207 | '不:bu': 'bù', 208 | '芦:lu': 'lú', 209 | '莫:mo': 'mò', 210 | '夫:fu': 'fū', 211 | '么:mò': 'me', 212 | '少:shāo': 'shǎo', 213 | '搁:ge': 'gē', 214 | '地:di': 'dì', 215 | '呵:ā': 'a', 216 | '劲:jìnr': 'jìn', 217 | '碌:lū': 'lù', 218 | '碌:lu': 'lù' 219 | }; 220 | 221 | for (let key of Object.keys(replacements)) { 222 | if ([zi, `${zi}:${py}`].includes(key)) { 223 | py = replacements[key]; 224 | break; 225 | } 226 | } 227 | } 228 | 229 | return py; 230 | } 231 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/generate/sqlite/phrase/hmm/utils.mjs: -------------------------------------------------------------------------------- 1 | /** 2 | * 从字典库中读取字及其拼音 3 | * 4 | * @returns 结构为
{'字': ['zì'], ...}5 | */ 6 | export async function readWordsFromDB(wordDictDB) { 7 | const words = {}; 8 | (await wordDictDB.all(`select word_, spell_ from pinyin_word`)).forEach( 9 | (row) => { 10 | const { word_, spell_ } = row; 11 | 12 | words[word_] ||= []; 13 | words[word_].push(spell_); 14 | } 15 | ); 16 | 17 | return words; 18 | } 19 | 20 | /** 21 | * 计算汉字(状态)间转移概率:每个句子中汉字转移概率 22 | * 23 | * @param clauses 结构为
[['字:zì', ...], [...], ...]24 | */ 25 | export function countTrans(clauses, existTransProb) { 26 | const transProb = existTransProb || {}; 27 | 28 | clauses.forEach((clause) => { 29 | for (let i = 0; i <= clause.length; i++) { 30 | const curr = i == clause.length ? 'EOS' : clause[i]; 31 | const prev = i == 0 ? 'BOS' : clause[i - 1]; 32 | 33 | const prob = (transProb[curr] = transProb[curr] || {}); 34 | 35 | prob[prev] = (prob[prev] || 0) + 1; 36 | // 转移概率: math.log(前序字出现次数 / total) 37 | prob.__total__ = (prob.__total__ || 0) + 1; 38 | } 39 | }); 40 | 41 | return transProb; 42 | } 43 | 44 | /** 45 | * 统计短语中的汉字数量 46 | * 47 | * @param clauses 结构为
[['字:zì', ...], [...], ...]48 | */ 49 | export function countWords(clauses, existWordProp) { 50 | const wordProb = existWordProp || {}; 51 | 52 | clauses.forEach((clause) => { 53 | for (let i = 0; i < clause.length; i++) { 54 | const curr = clause[i]; 55 | 56 | wordProb[curr] ||= 0; 57 | wordProb[curr] += 1; 58 | } 59 | }); 60 | 61 | return wordProb; 62 | } 63 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/generate/sqlite/word/diff.mjs: -------------------------------------------------------------------------------- 1 | /* 对比不同版本的 SQLite 字典库的数据差异 */ 2 | import { fromRootPath, asyncForEach } from '#utils/utils.mjs'; 3 | 4 | import { openDB, closeDB } from '#utils/sqlite.mjs'; 5 | 6 | const oldDictDataSQLiteFile = fromRootPath( 7 | 'data', 8 | 'pinyin-word-dict.v2.sqlite' 9 | ); 10 | const dictDataSQLiteFile = fromRootPath('data', 'pinyin-word-dict.sqlite'); 11 | 12 | let oldDb = await openDB(oldDictDataSQLiteFile); 13 | let newDb = await openDB(dictDataSQLiteFile); 14 | 15 | try { 16 | console.log(); 17 | console.log('对比元数据的差异 ...'); 18 | await diffMetaData(oldDb, newDb); 19 | 20 | console.log(); 21 | console.log('对比字数据的差异 ...'); 22 | await diffWordData(oldDb, newDb); 23 | } catch (e) { 24 | throw e; 25 | } finally { 26 | await closeDB(oldDb); 27 | await closeDB(newDb); 28 | } 29 | 30 | async function diffMetaData(oldDb, newDb) { 31 | await asyncForEach( 32 | [ 33 | 'meta_pinyin', 34 | 'meta_pinyin_chars', 35 | 'meta_zhuyin', 36 | 'meta_zhuyin_chars', 37 | 'meta_word' 38 | ], 39 | async (table) => { 40 | const oldData = {}; 41 | const newData = {}; 42 | 43 | (await oldDb.all(`select * from ${table}`)).forEach((row) => { 44 | const value = row.value_; 45 | const id_ = row.id_; 46 | 47 | oldData[value] = { id_ }; 48 | }); 49 | (await newDb.all(`select * from ${table}`)).forEach((row) => { 50 | const value = row.value_; 51 | const id_ = row.id_; 52 | 53 | newData[value] = { id_ }; 54 | }); 55 | 56 | Object.keys(newData).forEach((value) => { 57 | if (!oldData[value]) { 58 | console.log(`- ${table} => 元数据 ${value} 为新增`); 59 | return; 60 | } 61 | 62 | const oldId = oldData[value].id_; 63 | const newId = newData[value].id_; 64 | 65 | if (oldId != newId) { 66 | console.log( 67 | `- ${table} => 元数据 ${value} 的 id 不同: ${oldId} -> ${newId}` 68 | ); 69 | } 70 | }); 71 | 72 | Object.keys(oldData).forEach((value) => { 73 | if (!newData[value]) { 74 | console.log(`- ${table} => 元数据 ${value} 已被删除`); 75 | return; 76 | } 77 | }); 78 | } 79 | ); 80 | } 81 | 82 | async function diffWordData(oldDb, newDb) { 83 | await asyncForEach(['pinyin_word' /*, 'zhuyin_word'*/], async (table) => { 84 | const oldData = {}; 85 | const newData = {}; 86 | 87 | (await oldDb.all(`select * from ${table}`)).forEach((row) => { 88 | const id = row.id_; 89 | 90 | oldData[id] = row; 91 | }); 92 | (await newDb.all(`select * from ${table}`)).forEach((row) => { 93 | const id = row.id_; 94 | 95 | newData[id] = row; 96 | }); 97 | 98 | const genCode = (row) => { 99 | return `${row.word_id_ || row.source_id_}:${ 100 | row.spell_id_ || row.target_id_ 101 | }:${row.spell_chars_id_ || row.target_chars_id_}`; 102 | }; 103 | 104 | Object.keys(newData).forEach((id) => { 105 | const oldRow = oldData[id]; 106 | const newRow = newData[id]; 107 | 108 | if (!oldRow) { 109 | console.log( 110 | `- ${table} => 字数据 ${id}:${newRow.word_}:${newRow.spell_} 为新增` 111 | ); 112 | return; 113 | } 114 | 115 | const oldCode = genCode(oldRow); 116 | const newCode = genCode(newRow); 117 | 118 | if (oldCode != newCode) { 119 | console.log( 120 | `- ${table} => 字数据 ${id}:${newRow.word_}:${newRow.spell_} 的组合不同: ${oldCode} -> ${newCode}` 121 | ); 122 | } 123 | }); 124 | 125 | Object.keys(oldData).forEach((id) => { 126 | const oldRow = oldData[id]; 127 | if (!newData[id]) { 128 | console.log( 129 | `- ${table} => 字数据 ${id}:${oldRow.word_}:${oldRow.spell_} 已被删除` 130 | ); 131 | return; 132 | } 133 | }); 134 | }); 135 | } 136 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/generate/sqlite/word/index.mjs: -------------------------------------------------------------------------------- 1 | /* SQLite 字典库 */ 2 | import { fromRootPath, readLineFromFile } from '#utils/utils.mjs'; 3 | import { patch } from './patch.mjs'; 4 | import * as sqlite from './sqlite.mjs'; 5 | 6 | // 收集数据 7 | const wordDataValidFile = fromRootPath('data', 'pinyin-dict.valid.txt'); 8 | const emojiDataFile = fromRootPath('data', 'emojis.json'); 9 | // 分析数据 10 | const pinyinCharsFile = fromRootPath('..', 'analyze/files/pinyin.txt'); 11 | const pinyinCharLinksFile = fromRootPath('..', 'analyze/files/char-links.json'); 12 | const pinyinCharTreeFile = fromRootPath('..', 'analyze/files/char-tree.json'); 13 | 14 | // SQLite 字典库 15 | const wordDictDataSQLiteFile = fromRootPath('data', 'pinyin-word-dict.sqlite'); 16 | 17 | console.log(); 18 | console.log('读取已收集的有效字信息 ...'); 19 | const wordMetas = []; 20 | await readLineFromFile(wordDataValidFile, (line) => { 21 | if (!line || !line.trim()) { 22 | return; 23 | } 24 | 25 | const metas = JSON.parse(line); 26 | metas.forEach((meta) => { 27 | wordMetas.push(meta); 28 | 29 | patch(meta); 30 | }); 31 | }); 32 | console.log('- 有效字信息总数:' + wordMetas.length); 33 | console.log(); 34 | 35 | console.log(); 36 | console.log('写入字信息到 SQLite ...'); 37 | let db1 = await sqlite.open(wordDictDataSQLiteFile); 38 | 39 | try { 40 | await sqlite.saveSpells(db1, wordMetas); 41 | console.log('- 已保存字读音信息'); 42 | 43 | await sqlite.saveWords(db1, wordMetas); 44 | console.log('- 已保存字信息'); 45 | 46 | await sqlite.savePhrases(db1, wordMetas); 47 | console.log('- 已保存词组信息'); 48 | } catch (e) { 49 | throw e; 50 | } finally { 51 | await sqlite.close(db1); 52 | } 53 | 54 | console.log(); 55 | 56 | console.log(); 57 | console.log('读取已收集的表情符号 ...'); 58 | const groupEmojiMetas = {}; 59 | await readLineFromFile(emojiDataFile, (line) => { 60 | if (!line || !line.trim()) { 61 | return; 62 | } 63 | 64 | const groups = JSON.parse(line); 65 | groups.forEach((group) => { 66 | let groupName = group.name.zh; 67 | switch (groupName) { 68 | case '表情与情感': 69 | groupName = '表情'; 70 | break; 71 | case '人物与身体': 72 | groupName = '人物'; 73 | break; 74 | case '动物与自然': 75 | groupName = '动植物'; 76 | break; 77 | case '食物与饮料': 78 | groupName = '饮食'; 79 | break; 80 | case '旅行与地理': 81 | groupName = '旅行'; 82 | break; 83 | case '符号标志': 84 | groupName = '符号'; 85 | break; 86 | } 87 | 88 | groupEmojiMetas[groupName] = group.emojis; 89 | }); 90 | }); 91 | console.log( 92 | '- 表情符号总数:' + 93 | Object.values(groupEmojiMetas).reduce( 94 | (acc, emojis) => acc + emojis.length, 95 | 0 96 | ) 97 | ); 98 | console.log(); 99 | 100 | console.log(); 101 | console.log('写入表情符号到 SQLite ...'); 102 | let db2 = await sqlite.open(wordDictDataSQLiteFile); 103 | try { 104 | await sqlite.saveEmojis(db2, groupEmojiMetas); 105 | console.log('- 已保存表情符号数据'); 106 | } catch (e) { 107 | throw e; 108 | } finally { 109 | await sqlite.close(db2); 110 | } 111 | console.log(); 112 | 113 | console.log(); 114 | console.log('通过 SQLite 生成分析数据 ...'); 115 | let db3 = await sqlite.open(wordDictDataSQLiteFile); 116 | try { 117 | await sqlite.generatePinyinChars(db3, pinyinCharsFile); 118 | console.log('- 已保存拼音字母组合数据'); 119 | 120 | await sqlite.generatePinyinCharLinks(db3, pinyinCharLinksFile); 121 | console.log('- 已保存拼音字母关联数据'); 122 | 123 | await sqlite.generatePinyinCharTree(db3, pinyinCharTreeFile); 124 | console.log('- 已保存拼音字母后继数据'); 125 | } catch (e) { 126 | throw e; 127 | } finally { 128 | await sqlite.close(db3); 129 | } 130 | console.log(); 131 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/generate/sqlite/word/patch.mjs: -------------------------------------------------------------------------------- 1 | import { extractPinyinChars } from '#utils/utils.mjs'; 2 | 3 | /** 修正输入数据 */ 4 | export function patch(meta) { 5 | const deleted = [ 6 | '虾:hā' // -> 虾:há 7 | ]; 8 | 9 | const added = [ 10 | // “一”和“不”变调有规律:https://www.chinanews.com.cn/hwjy/news/2010/04-15/2228742.shtml 11 | '不:bú', 12 | '一:yì', 13 | '一:yí', 14 | '子:zi', 15 | // 便宜:pián yi 16 | '宜:yi', 17 | '噷:hm', 18 | '吒:zhà', 19 | '虎:hu', 20 | '枸:gōu', 21 | '焘:tāo', 22 | '喇:lā', 23 | '喇:lá', 24 | '蕃:bō', 25 | '蕃:fān', 26 | '脯:pú', 27 | '蕻:hóng', 28 | '朵:duo', 29 | '鏜:táng', 30 | '咔:kā', 31 | '蹬:dèng', 32 | '爸:ba', 33 | '叔:shu', 34 | '喝:he', 35 | // 《定风波·自春来》 - 无那。恨薄情一去,音书无个 36 | // https://www.cngwzj.com/pygushi/SongDai/48900/ 37 | '那:nuó', 38 | // 《桂枝香·金陵怀古》 - 谩嗟荣辱 39 | // https://www.cngwzj.com/pygushi/SongDai/49417/ 40 | '谩:màn', 41 | // 《贺新郎·春情》 - 殢酒厌厌病 42 | // https://www.cngwzj.com/pygushi/SongDai/61645/ 43 | '厌:yǎn', 44 | // 《贺新郎·春情》 - 断鸿难倩 45 | // https://www.cngwzj.com/pygushi/SongDai/61645/ 46 | '倩:qìng', 47 | // 《八声甘州·记玉关踏雪事清游》 - 长河饮马 48 | // https://www.cngwzj.com/pygushi/SongDai/61043/ 49 | '饮:yìn', 50 | // 王维《青溪》 - 趣途无百里 51 | // https://www.cngwzj.com/pygushi/TangDai/10982/ 52 | '趣:qū', 53 | // 李白《关山月》 - 戍客望边色 54 | // https://www.cngwzj.com/pygushi/TangDai/12860/ 55 | '色:yì', 56 | // 《听董大弹胡笳声兼寄语弄房给事》 - 四郊秋叶惊摵摵 57 | // https://www.cngwzj.com/pygushi/TangDai/11474/ 58 | '摵:shè', 59 | // 白居易《琵琶行》 - 自言本是京城女,家在虾蟆陵下住 60 | // https://www.cngwzj.com/pygushi/TangDai/25273/ 61 | '虾:há', 62 | // 李白《将进酒》 63 | // https://www.cngwzj.com/pygushi/TangDai/12843/ 64 | '将:qiāng', 65 | // 《行经华阴》- 借问路傍名利客 66 | // https://www.cngwzj.com/pygushi/TangDai/11353/ 67 | '傍:páng', 68 | // 王维《鹿柴》 69 | // https://www.cngwzj.com/pygushi/TangDai/11206/ 70 | '柴:zhài', 71 | // 礼记《虽有嘉肴》- 学学半 72 | // https://www.cngwzj.com/pygushi/LiangHan/76970/ 73 | '学:xiào', 74 | // 屈原《离骚》- 肇锡余以嘉名 75 | // https://www.cngwzj.com/pygushi/XianQin/87343/ 76 | '锡:cì', 77 | // - 来吾道夫先路 78 | '道:dǎo', 79 | // 论语《不义而富且贵,于我如浮云》- 久要不忘平生之言 80 | // https://www.cngwzj.com/pygushi/XianQin/88550/ 81 | '要:yuē', 82 | // 论语《己所不欲,勿施于人》- 举皋陶 83 | // https://www.cngwzj.com/pygushi/XianQin/88549/ 84 | '陶:yáo', 85 | // - 乡也 86 | '乡:xiàng', 87 | // 论语《好仁不好学,其蔽也愚》- 陈亢问 88 | // https://www.cngwzj.com/pygushi/XianQin/88551/ 89 | '亢:gāng', 90 | // 荀子《劝学》- 君子生非异也 91 | // https://www.cngwzj.com/pygushi/XianQin/86629/ 92 | '生:xìng', 93 | // 司马迁《陈涉世家》- 发闾左適戍渔阳 94 | // https://www.cngwzj.com/pygushi/LiangHan/88083/ 95 | '適:zhé', 96 | '夏:jiǎ', 97 | '苦:hù', 98 | // 列子《杞人忧天》- 舍然大喜 99 | // https://www.cngwzj.com/pygushi/KeWen/87901/ 100 | '舍:shì' 101 | ]; 102 | 103 | // 先增改, 104 | extraWords(added).forEach(({ value, pinyin, chars }) => { 105 | if ( 106 | meta.value == value && 107 | meta.pinyins.filter(({ value }) => value == pinyin).length == 0 108 | ) { 109 | meta.pinyins.push({ value: pinyin, chars }); 110 | } 111 | }); 112 | // 再删除,以避免自增 id 发生较大变动 113 | extraWords(deleted).forEach(({ value, pinyin }) => { 114 | if (meta.value == value) { 115 | meta.pinyins = meta.pinyins.filter((py) => py.value !== pinyin); 116 | } 117 | }); 118 | } 119 | 120 | function extraWords(words) { 121 | return words 122 | .map((w) => w.split(':')) 123 | .map((s) => ({ 124 | value: s[0], 125 | pinyin: s[1], 126 | chars: extractPinyinChars(s[1]) 127 | })); 128 | } 129 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/generate/test.mjs: -------------------------------------------------------------------------------- 1 | import { hasGlyphFontForCodePoint } from '#utils/utils.mjs'; 2 | import { fetchWordMetas } from '#utils/zdic.mjs'; 3 | 4 | const unicodes = [ 5 | 'U+20C43' /* 𠱃 */, 6 | 'U+20C53' /* 𠱓 */, 7 | 'U+20C65' /* 𠱥 */, 8 | 'U+20C8D' /* 𠲍 */, 9 | 'U+20C96' /* 𠲖 */, 10 | 'U+20C9C' /* 𠲜 */, 11 | 'U+20CB5' /* 𠲵 */, 12 | 'U+20CD0' /* 𠳐 */, 13 | 'U+20CED' /* 𠳭 */ 14 | ]; 15 | for (let i = 0; i < unicodes.length; i++) { 16 | const unicode = unicodes[i]; 17 | const codePoint = parseInt(unicode.replaceAll(/^U\+/g, '0x'), 16); 18 | const char = String.fromCharCode(codePoint); 19 | const exist = hasGlyphFontForCodePoint(unicode); 20 | 21 | console.log(unicode + ' - ' + char + ': ' + exist); 22 | } 23 | 24 | // const words = ['㑵', '𥁞', '尽', '国', '𣴘']; 25 | // const wordMetas = await fetchWordMetas(words); 26 | // console.log(JSON.stringify(wordMetas)); 27 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/utils/sqlite.mjs: -------------------------------------------------------------------------------- 1 | // https://www.sqlitetutorial.net/sqlite-nodejs/connect/ 2 | // https://github.com/TryGhost/node-sqlite3/wiki/API 3 | import sqlite3 from 'sqlite3'; 4 | // https://www.npmjs.com/package/sqlite 5 | import * as sqlite from 'sqlite'; 6 | 7 | import { splitChars, extractPinyinChars, asyncForEach } from './utils.mjs'; 8 | 9 | export async function openDB(file, readonly) { 10 | const db = await sqlite.open({ 11 | filename: file, 12 | mode: readonly 13 | ? sqlite3.OPEN_READONLY 14 | : sqlite3.OPEN_CREATE | sqlite3.OPEN_READWRITE, 15 | driver: sqlite3.Database 16 | }); 17 | 18 | // 提升批量写入性能: https://avi.im/blag/2021/fast-sqlite-inserts/ 19 | await execSQL( 20 | db, 21 | ` 22 | pragma journal_mode = off; 23 | pragma synchronous = 0; 24 | pragma cache_size = 1000000; 25 | pragma locking_mode = exclusive; 26 | pragma temp_store = memory; 27 | ` 28 | ); 29 | 30 | return db; 31 | } 32 | 33 | export async function attachDB(db, sources) { 34 | // 附加数据库(连接期内有效): https://www.sqlite.org/lang_attach.html 35 | await execSQL( 36 | db, 37 | Object.keys(sources) 38 | .map((name) => `attach database '${sources[name]}' as ${name}`) 39 | .join(';') 40 | ); 41 | 42 | return db; 43 | } 44 | 45 | export async function closeDB(db, skipClean) { 46 | try { 47 | if (db.config.mode != sqlite3.OPEN_READONLY && !skipClean) { 48 | // 数据库无用空间回收 49 | await execSQL(db, 'vacuum'); 50 | } 51 | 52 | await db.close(); 53 | } catch (e) { 54 | console.error(e); 55 | } 56 | } 57 | 58 | /** 新增或更新数据 */ 59 | export async function saveToDB( 60 | db, 61 | table, 62 | dataMap, 63 | disableSorting, 64 | primaryKeys 65 | ) { 66 | const dataArray = mapToArray(dataMap, disableSorting); 67 | if (dataArray.length === 0) { 68 | return; 69 | } 70 | 71 | primaryKeys = primaryKeys || ['id_']; 72 | const hasOnlyIdKey = primaryKeys.length == 1 && primaryKeys[0] == 'id_'; 73 | 74 | const columnsWithPrimaryKey = Object.keys(dataArray[0]).filter( 75 | (k) => !k.startsWith('__') 76 | ); 77 | const columns = columnsWithPrimaryKey.filter((k) => !primaryKeys.includes(k)); 78 | 79 | const insertWithIdSql = `insert into ${table} (${columnsWithPrimaryKey.join( 80 | ', ' 81 | )}) values (${columnsWithPrimaryKey.map(() => '?').join(', ')}) 82 | `; 83 | const insertWithIdStatement = await db.prepare(insertWithIdSql); 84 | const insertStatement = hasOnlyIdKey 85 | ? await db.prepare( 86 | `insert into ${table} (${columns.join(', ')}) values (${columns 87 | .map(() => '?') 88 | .join(', ')}) 89 | ` 90 | ) 91 | : await db.prepare(insertWithIdSql); 92 | const updateStatement = 93 | columns.length > 0 94 | ? await db.prepare( 95 | `update ${table} set ${columns 96 | .map((c) => c + ' = ?') 97 | .join(', ')} where ${primaryKeys 98 | .map((key) => key + ' = ?') 99 | .join(' and ')} 100 | ` 101 | ) 102 | : // 所有的列都为主键,则不需要更新 103 | null; 104 | 105 | const getId = (d) => primaryKeys.map((k) => d[k]).join(''); 106 | await asyncForEach(dataArray, async (data) => { 107 | if (getId(data)) { 108 | const needToUpdate = 109 | data.__exist__ && 110 | columns.reduce((r, c) => r || data[c] !== data.__exist__[c], false); 111 | 112 | if (needToUpdate) { 113 | await updateStatement.run( 114 | ...columns.concat(primaryKeys).map((c) => data[c]) 115 | ); 116 | } 117 | // 新增包含 id 的数据 118 | else if (!data.__exist__) { 119 | await insertWithIdStatement.run( 120 | ...columnsWithPrimaryKey.map((c) => data[c]) 121 | ); 122 | } 123 | } else { 124 | const params = (hasOnlyIdKey ? columns : columnsWithPrimaryKey).map( 125 | (c) => data[c] 126 | ); 127 | await insertStatement.run(...params); 128 | } 129 | }); 130 | 131 | await insertStatement.finalize(); 132 | await insertWithIdStatement.finalize(); 133 | updateStatement && (await updateStatement.finalize()); 134 | } 135 | 136 | /** 删除数据 */ 137 | export async function removeFromDB(db, table, data, primaryKeys) { 138 | if (data.length === 0) { 139 | return; 140 | } 141 | 142 | primaryKeys = primaryKeys || ['id_']; 143 | 144 | const deleteStatement = await db.prepare( 145 | `delete from ${table} where ${primaryKeys 146 | .map((key) => key + ' = ?') 147 | .join(' and ')} 148 | ` 149 | ); 150 | 151 | await asyncForEach(data, async (d) => { 152 | const params = typeof d == 'object' ? primaryKeys.map((c) => d[c]) : [d]; 153 | await deleteStatement.run(...params); 154 | }); 155 | 156 | await deleteStatement.finalize(); 157 | } 158 | 159 | export async function hasTable(db, table) { 160 | const result = await db.get( 161 | `select count(*) as total from sqlite_master where type='table' and name='${table}'` 162 | ); 163 | return result.total == 1; 164 | } 165 | 166 | export async function execSQL(db, sqls) { 167 | await asyncForEach(sqls.split(/;/g), async (sql) => { 168 | await db.exec(sql); 169 | }); 170 | } 171 | 172 | function mapToArray(obj, disableSorting) { 173 | if (disableSorting === true) { 174 | return Object.keys(obj).map((k) => obj[k]); 175 | } 176 | 177 | const charSpecials = { 178 | a: ['ā', 'á', 'ǎ', 'à'], 179 | o: ['ō', 'ó', 'ǒ', 'ò'], 180 | e: ['ē', 'é', 'ě', 'è', 'ê', 'ê̄', 'ế', 'ê̌', 'ề'], 181 | i: ['ī', 'í', 'ǐ', 'ì'], 182 | u: ['ū', 'ú', 'ǔ', 'ù'], 183 | ü: ['ǖ', 'ǘ', 'ǚ', 'ǜ'], 184 | n: ['ń', 'ň', 'ǹ'], 185 | m: ['m̄', 'ḿ', 'm̀'] 186 | }; 187 | const charWeights = { ˉ: 10001, ˊ: 10002, ˇ: 10003, ˋ: 10004 }; 188 | for (let i = 97, j = 1; i <= 122; i++, j++) { 189 | const ch = String.fromCharCode(i); 190 | const weight = j * 15; 191 | charWeights[ch] = weight; 192 | 193 | const specials = charSpecials[ch]; 194 | if (specials) { 195 | for (let k = 0; k < specials.length; k++) { 196 | const special = specials[k]; 197 | 198 | charWeights[special] = weight + (k + 1); 199 | } 200 | } 201 | } 202 | const getCharCode = (ch) => { 203 | let sum = 0; 204 | for (let i = 0; i < ch.length; i++) { 205 | sum += ch.charCodeAt(i); 206 | } 207 | return sum; 208 | }; 209 | 210 | // Note: 主要排序带音调的拼音(注音规则暂时不清楚,故不处理),其余的按字符顺序排序 211 | const keys = Object.keys(obj).sort((a, b) => { 212 | const a_without_special = extractPinyinChars(a).replaceAll(/[ˊˇˋˉ]$/g, ''); 213 | const b_without_special = extractPinyinChars(b).replaceAll(/[ˊˇˋˉ]$/g, ''); 214 | 215 | if (a_without_special === b_without_special) { 216 | const a_weight = splitChars(a) 217 | .map((ch) => charWeights[ch] || getCharCode(ch)) 218 | .reduce((acc, w) => acc + w, 0); 219 | const b_weight = splitChars(b) 220 | .map((ch) => charWeights[ch] || getCharCode(ch)) 221 | .reduce((acc, w) => acc + w, 0); 222 | 223 | return a_weight - b_weight; 224 | } 225 | 226 | return a_without_special > b_without_special 227 | ? 1 228 | : a_without_special < b_without_special 229 | ? -1 230 | : 0; 231 | }); 232 | 233 | return keys.map((k) => obj[k]); 234 | } 235 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/utils/utils.mjs: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs'; 2 | import * as path from 'path'; 3 | import * as crypto from 'crypto'; 4 | import { fileURLToPath } from 'url'; 5 | import * as events from 'events'; 6 | import * as readline from 'readline'; 7 | 8 | import * as fontkit from 'fontkit'; 9 | import getSystemFonts from 'get-system-fonts'; 10 | import GraphemeSplitter from 'grapheme-splitter'; 11 | 12 | import { pinyin as parsePinyin, addDict } from 'pinyin-pro'; 13 | // https://pinyin-pro.cn/use/addDict.html 14 | import CompleteDict from '@pinyin-pro/data/complete'; 15 | 16 | addDict(CompleteDict); 17 | 18 | const systemFonts = await prepareSystemFonts(); 19 | const graphemeSplitter = new GraphemeSplitter(); 20 | 21 | // https://codingbeautydev.com/blog/javascript-dirname-is-not-defined-in-es-module-scope/ 22 | const __filename = fileURLToPath(import.meta.url); 23 | const __dirname = path.dirname(__filename); 24 | 25 | export async function asyncForEach(array, cb) { 26 | for (const e of array) { 27 | await cb(e); 28 | } 29 | } 30 | 31 | export function fromRootPath(...paths) { 32 | return path.join(__dirname, '../..', ...paths); 33 | } 34 | 35 | export function sleep(ms) { 36 | return new Promise((resolve) => setTimeout(() => resolve(), ms)); 37 | } 38 | 39 | export function fileSHA256(filepath) { 40 | // https://gist.github.com/GuillermoPena/9233069#gistcomment-3149231-permalink 41 | const file = fs.readFileSync(filepath); 42 | const hash = crypto.createHash('sha256'); 43 | hash.update(file); 44 | 45 | return hash.digest('hex'); 46 | } 47 | 48 | export function existFile(filepath) { 49 | return fs.existsSync(filepath); 50 | } 51 | 52 | export function copyFile(source, target, override) { 53 | if (existFile(target) && override !== true) { 54 | return; 55 | } 56 | 57 | fs.copyFileSync(source, target); 58 | } 59 | 60 | export function readJSONFromFile(filepath, defaultValue = {}) { 61 | if (!existFile(filepath)) { 62 | return defaultValue; 63 | } 64 | 65 | return JSON.parse(readFile(filepath)); 66 | } 67 | 68 | export function readFile(filepath) { 69 | return fs.readFileSync(filepath, 'utf8'); 70 | } 71 | 72 | export function readAllFiles(dir) { 73 | return getAllFiles(dir).map((file) => readFile(file)); 74 | } 75 | 76 | export function getAllFiles(dir) { 77 | if (Array.isArray(dir)) { 78 | return dir.map(getAllFiles).reduce((acc, files) => acc.concat(files), []); 79 | } 80 | 81 | if (fs.lstatSync(dir).isFile()) { 82 | return [dir]; 83 | } 84 | 85 | let files = []; 86 | fs.readdirSync(dir).forEach((file) => { 87 | const filepath = path.join(dir, file); 88 | 89 | if (fs.lstatSync(filepath).isDirectory()) { 90 | files = files.concat(getAllFiles(filepath)); 91 | } else { 92 | files.push(filepath); 93 | } 94 | }); 95 | 96 | return files; 97 | } 98 | 99 | export async function readLineFromFile(filepath, consumer) { 100 | if (!existFile(filepath)) { 101 | return []; 102 | } 103 | 104 | const rl = readline.createInterface({ 105 | input: fs.createReadStream(filepath), 106 | crlfDelay: Infinity 107 | }); 108 | 109 | const results = []; 110 | rl.on('line', (line) => { 111 | const result = consumer(line); 112 | if (typeof result !== 'undefined') { 113 | results.push(result); 114 | } 115 | }); 116 | 117 | await events.once(rl, 'close'); 118 | 119 | return results; 120 | } 121 | 122 | export function appendLineToFile(filepath, line, doEmpty) { 123 | const dirpath = path.dirname(filepath); 124 | 125 | if (!fs.existsSync(dirpath)) { 126 | fs.mkdirSync(dirpath); 127 | } 128 | 129 | if (!fs.existsSync(filepath) || doEmpty) { 130 | fs.writeFileSync(filepath, ''); 131 | } 132 | 133 | let fd; 134 | try { 135 | fd = fs.openSync(filepath, 'a'); 136 | fs.appendFileSync(fd, line + '\n', 'utf8'); 137 | } finally { 138 | fd && fs.closeSync(fd); 139 | } 140 | } 141 | 142 | export function naiveHTMLNodeInnerText(node) { 143 | // https://github.com/jsdom/jsdom/issues/1245#issuecomment-1243809196 144 | // We need Node(DOM's Node) for the constants, 145 | // but Node doesn't exist in the nodejs global space, 146 | // and any Node instance references the constants 147 | // through the prototype chain 148 | const Node = node; 149 | 150 | return node && node.childNodes 151 | ? [...node.childNodes] 152 | .map((node) => { 153 | switch (node.nodeType) { 154 | case Node.TEXT_NODE: 155 | return node.textContent; 156 | case Node.ELEMENT_NODE: 157 | return naiveHTMLNodeInnerText(node); 158 | default: 159 | return ''; 160 | } 161 | }) 162 | .join(' ') 163 | : ''; 164 | } 165 | 166 | async function prepareSystemFonts() { 167 | // https://www.npmjs.com/package/get-system-fonts 168 | const fontFiles = await getSystemFonts(); 169 | const fonts = []; 170 | 171 | // https://github.com/foliojs/fontkit#fonthasglyphforcodepointcodepoint 172 | fontFiles.forEach((file) => { 173 | try { 174 | const font = fontkit.openSync(file); 175 | if (!font.hasGlyphForCodePoint) { 176 | return; 177 | } 178 | 179 | //console.info('Read font file: ' + file); 180 | fonts.push(font); 181 | } catch (e) { 182 | //console.warn('Failed to read font file: ' + file, e); 183 | } 184 | }); 185 | 186 | return fonts; 187 | } 188 | 189 | /** 判断系统字体中是否存在指定编码的字形,若不存在,则表示该编码的字不可读 */ 190 | export function hasGlyphFontForCodePoint(unicode) { 191 | const codePoint = parseInt('0x' + unicode.replaceAll(/^U\+/g, ''), 16); 192 | 193 | for (let i = 0; i < systemFonts.length; i++) { 194 | const font = systemFonts[i]; 195 | 196 | if (font.hasGlyphForCodePoint(codePoint)) { 197 | return true; 198 | } 199 | } 200 | return false; 201 | } 202 | 203 | /** 部分中文和表情符号等占用字节数大于 2,比如: 𫫇,需单独处理 */ 204 | export function splitChars(str) { 205 | // https://github.com/orling/grapheme-splitter 206 | return graphemeSplitter.splitGraphemes(str); 207 | } 208 | 209 | /** @return ['nǐ', 'hǎo', 'ma'] */ 210 | export function getPinyin(str) { 211 | // https://pinyin-pro.cn/use/pinyin.html 212 | return parsePinyin(str, { 213 | // 输出为数组 214 | type: 'array', 215 | // 作为音调符号带在拼音字母上 216 | toneType: 'symbol', 217 | // 识别字符串开头的姓氏 218 | surname: 'head', 219 | // 是否对一和不应用智能变调 220 | // 不(bù)在去声字前面读阳平声,如“~会”“~是”,这属于变调读音 221 | // http://www.moe.gov.cn/jyb_hygq/hygq_zczx/moe_1346/moe_1364/tnull_42118.html 222 | // “一”和“不”变调有规律:https://www.chinanews.com.cn/hwjy/news/2010/04-15/2228742.shtml 223 | toneSandhi: true 224 | }); 225 | } 226 | 227 | /** 修正拼音 */ 228 | export function correctPinyin(str) { 229 | return str 230 | .replaceAll('ā', 'ā') 231 | .replaceAll('ă', 'ǎ') 232 | .replaceAll('à', 'à') 233 | .replaceAll('ɑ', 'a') 234 | .replaceAll('ō', 'ō') 235 | .replaceAll('ŏ', 'ǒ') 236 | .replaceAll('ī', 'ī') 237 | .replaceAll('ĭ', 'ǐ') 238 | .replaceAll('ŭ', 'ǔ') 239 | .replaceAll('ɡ', 'g') 240 | .replaceAll('ē', 'ē') 241 | .replaceAll(/[·]/g, ''); 242 | } 243 | 244 | /** 修正注音 */ 245 | export function correctZhuyin(str) { 246 | return str.replaceAll('π', 'ㄫ').replaceAll('˙', ''); 247 | } 248 | 249 | /** 拼音去掉声调后的字母组合 */ 250 | export function extractPinyinChars(pinyin) { 251 | if ('m̀' === pinyin || 'ḿ' === pinyin || 'm̄' === pinyin) { 252 | return 'm'; 253 | } else if ( 254 | 'ê̄' === pinyin || 255 | 'ế' === pinyin || 256 | 'ê̌' === pinyin || 257 | 'ề' === pinyin 258 | ) { 259 | return 'e'; 260 | } 261 | 262 | const chars = []; 263 | 264 | const splits = splitChars(pinyin); 265 | for (let i = 0; i < splits.length; i++) { 266 | const ch = splits[i]; 267 | switch (ch) { 268 | case 'ā': 269 | case 'á': 270 | case 'ǎ': 271 | case 'à': 272 | chars.push('a'); 273 | break; 274 | case 'ō': 275 | case 'ó': 276 | case 'ǒ': 277 | case 'ò': 278 | chars.push('o'); 279 | break; 280 | case 'ē': 281 | case 'é': 282 | case 'ě': 283 | case 'è': 284 | case 'ê': 285 | chars.push('e'); 286 | break; 287 | case 'ī': 288 | case 'í': 289 | case 'ǐ': 290 | case 'ì': 291 | chars.push('i'); 292 | break; 293 | case 'ū': 294 | case 'ú': 295 | case 'ǔ': 296 | case 'ù': 297 | chars.push('u'); 298 | break; 299 | case 'ǖ': 300 | case 'ǘ': 301 | case 'ǚ': 302 | case 'ǜ': 303 | chars.push('ü'); 304 | break; 305 | case 'ń': 306 | case 'ň': 307 | case 'ǹ': 308 | chars.push('n'); 309 | break; 310 | default: 311 | chars.push(ch); 312 | } 313 | } 314 | 315 | return chars.join(''); 316 | } 317 | 318 | export function getPinyinTone(pinyin) { 319 | const tones = { 320 | ā: 1, 321 | á: 2, 322 | ǎ: 3, 323 | à: 4, 324 | // 325 | ō: 1, 326 | ó: 2, 327 | ǒ: 3, 328 | ò: 4, 329 | // 330 | ē: 1, 331 | é: 2, 332 | ě: 3, 333 | è: 4, 334 | ê: 0, 335 | ê̄: 1, 336 | ế: 2, 337 | ê̌: 3, 338 | ề: 4, 339 | // 340 | ī: 1, 341 | í: 2, 342 | ǐ: 3, 343 | ì: 4, 344 | // 345 | ū: 1, 346 | ú: 2, 347 | ǔ: 3, 348 | ù: 4, 349 | // 350 | ǖ: 1, 351 | ǘ: 2, 352 | ǚ: 3, 353 | ǜ: 4, 354 | // 355 | ń: 2, 356 | ň: 3, 357 | ǹ: 4, 358 | // 359 | m̄: 1, 360 | ḿ: 2, 361 | m̀: 4 362 | }; 363 | 364 | for (let ch in tones) { 365 | if (pinyin.includes(ch)) { 366 | return tones[ch]; 367 | } 368 | } 369 | 370 | return 0; 371 | } 372 | 373 | /** 注音去掉声调后的字符组合 */ 374 | export function extractZhuyinChars(zhuyin) { 375 | return zhuyin.replaceAll(/[ˊˇˋˉ˙]/g, ''); 376 | } 377 | 378 | /** 379 | * 计算两个笔画的相似度(Levenshtein Distance): 380 | * - [Sort an array by the "Levenshtein Distance" with best performance in Javascript](https://stackoverflow.com/a/11958496) 381 | * - [字符串编辑距离之 Damerau–Levenshtein Distance](https://blog.csdn.net/asty9000/article/details/81570627) 382 | * - [字符串编辑距离之 Levenshtein Distance](https://blog.csdn.net/asty9000/article/details/81384650) 383 | * - [Damerau–Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) 384 | */ 385 | export function calculateStrokeSimilarity(s, t) { 386 | const d = []; // 2d matrix 387 | 388 | // Step 1 389 | const n = s.length; 390 | const m = t.length; 391 | 392 | if (n == 0) return 0; 393 | if (m == 0) return 0; 394 | 395 | // Create an array of arrays in javascript (a descending loop is quicker) 396 | for (let i = n; i >= 0; i--) d[i] = []; 397 | 398 | // Step 2 399 | for (let i = n; i >= 0; i--) d[i][0] = i; 400 | for (let j = m; j >= 0; j--) d[0][j] = j; 401 | 402 | // Step 3 403 | for (let i = 1; i <= n; i++) { 404 | const s_i = s.charAt(i - 1); 405 | 406 | // Step 4 407 | for (let j = 1; j <= m; j++) { 408 | // Check the jagged ld total so far 409 | if (i == j && d[i][j] > 4) return n; 410 | 411 | const t_j = t.charAt(j - 1); 412 | const cost = s_i == t_j ? 0 : 1; // Step 5 413 | 414 | // Calculate the minimum 415 | let mi = d[i - 1][j] + 1; 416 | const b = d[i][j - 1] + 1; 417 | const c = d[i - 1][j - 1] + cost; 418 | 419 | if (b < mi) mi = b; 420 | if (c < mi) mi = c; 421 | 422 | d[i][j] = mi; // Step 6 423 | 424 | // Note: 不做转换变换 425 | // // Damerau transposition 426 | // if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) { 427 | // d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost); 428 | // } 429 | } 430 | } 431 | 432 | // Step 7 433 | return 1 - d[n][m] / Math.max(n, m); 434 | } 435 | -------------------------------------------------------------------------------- /tools/pinyin-dict/src/utils/zdic.mjs: -------------------------------------------------------------------------------- 1 | import got from 'got'; 2 | import { JSDOM } from 'jsdom'; 3 | 4 | import { 5 | sleep, 6 | splitChars, 7 | hasGlyphFontForCodePoint, 8 | naiveHTMLNodeInnerText 9 | } from './utils.mjs'; 10 | 11 | // 根据 zdic.net 获取字的详细数据 12 | const baseUrl = 'https://www.zdic.net/hans/'; 13 | const gotOptions = { timeout: { connect: 50000 } }; 14 | 15 | export async function fetchWordMetas(words) { 16 | return await Promise.all(words.map(fetchWordMeta)); 17 | } 18 | 19 | async function fetchWordMeta(word) { 20 | const srcUrl = baseUrl + word; 21 | // const html = await (await fetch(srcUrl)).text(); 22 | const html = await got(srcUrl, gotOptions).text(); 23 | const $dom = new JSDOM(html); 24 | const $doc = (($dom || {}).window || {}).document; 25 | if (!$doc) { 26 | return { value: word }; 27 | } 28 | 29 | const wordMeta = { 30 | value: word, 31 | unicode: '', 32 | src_url: srcUrl, 33 | glyph_svg_url: '', 34 | glyph_gif_url: '', 35 | glyph_struct: '', 36 | glyph_font_exists: true, 37 | // 注音与拼音的区别和历史: https://sspai.com/post/75248 38 | pinyins: [], 39 | zhuyins: [], 40 | radical: '', 41 | stroke_order: '', 42 | total_stroke_count: 0, 43 | radical_stroke_count: 0, 44 | traditional: false, 45 | simple_words: [], 46 | variant_words: [], 47 | traditional_words: [], 48 | wubi_codes: [], 49 | cangjie_codes: [], 50 | zhengma_codes: [], 51 | sijiao_codes: [], 52 | phrases: [] 53 | }; 54 | 55 | // 字形图片和笔顺动画 56 | const $img = $doc.querySelector('.ziif .zipic img'); 57 | if ($img) { 58 | const src = $img.getAttribute('src'); 59 | const gif = $img.getAttribute('data-gif'); 60 | 61 | src && (wordMeta.glyph_svg_url = 'https:' + src); 62 | gif && (wordMeta.glyph_gif_url = 'https:' + gif); 63 | } 64 | 65 | // 拼音 66 | const $pinyin = $doc.querySelectorAll('.ziif .dsk .z_py .z_d'); 67 | $pinyin.forEach(($el) => { 68 | const value = naiveHTMLNodeInnerText($el).trim(); 69 | const $audio = $el.querySelector('a[data-src-mp3]'); 70 | const audio = ($audio && $audio.getAttribute('data-src-mp3')) || ''; 71 | 72 | wordMeta.pinyins.push({ 73 | value, 74 | audio_url: audio ? 'https:' + audio : '' 75 | }); 76 | }); 77 | 78 | // 注音,与拼音按顺序对应 79 | const $zhuyin = $doc.querySelectorAll('.ziif .dsk .z_zy .z_d'); 80 | $zhuyin.forEach(($el) => { 81 | const value = naiveHTMLNodeInnerText($el).trim(); 82 | const $audio = $el.querySelector('a[data-src-mp3]'); 83 | const audio = ($audio && $audio.getAttribute('data-src-mp3')) || ''; 84 | 85 | wordMeta.zhuyins.push({ 86 | value, 87 | audio_url: audio ? 'https:' + audio : '' 88 | }); 89 | }); 90 | 91 | // 总笔画数 92 | const $totalStrokeCount = $doc.querySelector('.ziif .dsk .z_bs2 .z_ts3'); 93 | $totalStrokeCount && 94 | (wordMeta.total_stroke_count = parseInt( 95 | naiveHTMLNodeInnerText($totalStrokeCount.parentElement) 96 | .replaceAll(/^.+\s+/g, '') 97 | .trim() 98 | )); 99 | 100 | // 部首、部外笔画数 101 | const $radical = $doc.querySelectorAll('.ziif .dsk .z_bs2 .z_ts2'); 102 | $radical.forEach(($el) => { 103 | const text = naiveHTMLNodeInnerText($el.parentElement); 104 | const value = text.replaceAll(/^.+\s+/g, '').trim(); 105 | 106 | if (text.includes('部首')) { 107 | wordMeta.radical = value; 108 | } else if (text.includes('部外')) { 109 | wordMeta.radical_stroke_count = Math.max( 110 | 0, 111 | wordMeta.total_stroke_count - parseInt(value) 112 | ); 113 | } 114 | }); 115 | 116 | // 简繁字 117 | const $jianfan = $doc.querySelectorAll('.ziif .dsk .z_jfz > p > a'); 118 | $jianfan.forEach(($el) => { 119 | if ($el.querySelector('img')) { 120 | return; 121 | } 122 | 123 | const parentText = naiveHTMLNodeInnerText($el.parentElement); 124 | const value = naiveHTMLNodeInnerText($el).trim(); 125 | 126 | if (parentText.includes('繁体')) { 127 | wordMeta.traditional = false; 128 | wordMeta.traditional_words = value.split(/\s+/g); 129 | } else if (parentText.includes('简体')) { 130 | wordMeta.traditional = true; 131 | wordMeta.simple_words = value.split(/\s+/g); 132 | } 133 | }); 134 | 135 | // 异体字 136 | const $variant = $doc.querySelectorAll('.ziif .dsk .z_ytz2 > a'); 137 | $variant.forEach(($el) => { 138 | if ($el.querySelector('img')) { 139 | return; 140 | } 141 | 142 | const value = naiveHTMLNodeInnerText($el).trim(); 143 | wordMeta.variant_words.push(value); 144 | }); 145 | 146 | // 笔顺 147 | const $strokeOrder = $doc.querySelector('.ziif .dsk .z_bis2'); 148 | $strokeOrder && 149 | (wordMeta.stroke_order = naiveHTMLNodeInnerText($strokeOrder).trim()); 150 | 151 | // 编码信息 152 | const codeTitles = []; 153 | const $codeTitle = $doc.querySelectorAll('.ziif .dsk .dsk_2_1 > p > span'); 154 | $codeTitle.forEach(($el) => { 155 | const value = naiveHTMLNodeInnerText($el).trim(); 156 | 157 | codeTitles.push(value); 158 | }); 159 | 160 | const codes = []; 161 | $doc.querySelectorAll('.ziif .dsk .dsk_2_1').forEach(($el) => { 162 | const value = naiveHTMLNodeInnerText($el).trim(); 163 | 164 | if (!codeTitles.includes(value)) { 165 | codes.push(value); 166 | } 167 | }); 168 | for (let i = 0; i < codeTitles.length; i++) { 169 | const title = codeTitles[i]; 170 | const value = codes[i]; 171 | 172 | if (title === '统一码') { 173 | wordMeta.unicode = value.replaceAll(/^.+(U\+.+)\s*/g, '$1'); 174 | } else if (title === '字形分析') { 175 | wordMeta.glyph_struct = value; 176 | } else if (title === '五笔') { 177 | wordMeta.wubi_codes = value.split(/\|/g); 178 | } else if (title === '仓颉') { 179 | wordMeta.cangjie_codes = value.split(/\|/g); 180 | } else if (title === '郑码') { 181 | wordMeta.zhengma_codes = value.split(/\|/g); 182 | } else if (title === '四角') { 183 | wordMeta.sijiao_codes = value.split(/\|/g); 184 | } 185 | } 186 | 187 | wordMeta.glyph_font_exists = hasGlyphFontForCodePoint(wordMeta.unicode); 188 | 189 | // 词组、短语 190 | const phrases = []; 191 | const $phrase = $doc.querySelectorAll('.crefe'); 192 | $phrase.forEach((el) => { 193 | const text = naiveHTMLNodeInnerText(el).trim(); 194 | 195 | phrases.push(text); 196 | }); 197 | 198 | const batchSize = 10; 199 | for (let i = 0; i < phrases.length; i += batchSize) { 200 | const phraseMetas = await Promise.all( 201 | phrases.slice(i, i + batchSize).map(fetchPhraseMeta) 202 | ); 203 | 204 | phraseMetas.forEach((phrase) => { 205 | wordMeta.phrases.push(...phrase); 206 | }); 207 | 208 | await sleep(1500); 209 | } 210 | 211 | return wordMeta; 212 | } 213 | 214 | async function fetchPhraseMeta(phrase) { 215 | const srcUrl = baseUrl + phrase; 216 | // const html = await (await fetch(srcUrl)).text(); 217 | const html = await got(srcUrl, gotOptions).text(); 218 | const $dom = new JSDOM(html); 219 | const $doc = (($dom || {}).window || {}).document; 220 | 221 | // https://www.zdic.net/hans/不塞不流,不止不行 222 | const phrases = phrase.split(/[,,、;]/g); 223 | const phraseCharsArray = phrases 224 | .map((p) => splitChars(p)) 225 | .filter((p) => p.length > 1); 226 | if (!$doc || phraseCharsArray.length === 0) { 227 | return phraseCharsArray.map((p) => ({ 228 | value: p, 229 | pinyins: [], 230 | zhuyins: [] 231 | })); 232 | } 233 | 234 | // 拼音及注音 235 | const pinyinsArray = []; 236 | const zhuyinsArray = []; 237 | const $duyin = $doc.querySelectorAll('.ciif p .z_ts2'); 238 | $duyin.forEach(($el) => { 239 | const text = naiveHTMLNodeInnerText($el); 240 | const $dicpy = $el.parentElement.querySelectorAll('.dicpy'); 241 | 242 | $dicpy.forEach(($e) => { 243 | naiveHTMLNodeInnerText($e) 244 | .split(/[,,;]/g) 245 | .forEach((val) => { 246 | const splits = val 247 | .replaceAll(/[·]/g, '') 248 | .replaceAll(/\s+([ˊˇˋˉ])/g, '$1') 249 | .split(/\s+/g); 250 | 251 | if (text === '拼音') { 252 | pinyinsArray.push({ value: splits }); 253 | } else if (text === '注音') { 254 | zhuyinsArray.push({ value: splits }); 255 | } 256 | }); 257 | }); 258 | }); 259 | 260 | const metas = []; 261 | if (pinyinsArray.length === 0) { 262 | return metas; 263 | } 264 | 265 | phraseCharsArray.forEach((chars, i) => { 266 | metas.push({ 267 | value: chars, 268 | src_url: srcUrl, 269 | pinyins: [pinyinsArray[i]], 270 | zhuyins: [zhuyinsArray[i]] 271 | }); 272 | }); 273 | 274 | return metas; 275 | } 276 | --------------------------------------------------------------------------------