├── .travis.yml ├── LICENSE.txt ├── README.md ├── config └── ik │ ├── IKAnalyzer.cfg.xml │ ├── custom │ ├── ext_stopword.dic │ ├── mydict.dic │ ├── single_word.dic │ ├── single_word_full.dic │ ├── single_word_low_freq.dic │ └── sougou.dic │ ├── main.dic │ ├── preposition.dic │ ├── quantifier.dic │ ├── stopword.dic │ ├── suffix.dic │ └── surname.dic ├── libs ├── opennlp-tools-1.6.0.jar └── stanford-corenlp-3.6.0.jar ├── pom.xml ├── src └── main │ ├── assemblies │ └── plugin.xml │ ├── java │ ├── ir │ │ └── areka │ │ │ └── analyzer │ │ │ └── lucene │ │ │ ├── FarsiAnalyzer.java │ │ │ ├── FarsiNormilizerFilter.java │ │ │ ├── FarsiStemFilter.java │ │ │ ├── FarsiStemmer.java │ │ │ ├── FarsiStopWordTokenFilter.java │ │ │ └── perstem.pl │ └── org │ │ └── elasticsearch │ │ ├── index │ │ └── analysis │ │ │ ├── FarsiAnalysisBinderProcessor.java │ │ │ └── FarsiAnalyzerProvider.java │ │ ├── indices │ │ └── analysis │ │ │ ├── FarsiIndicesAnalysis.java │ │ │ └── FarsiIndicesAnalysisModule.java │ │ └── plugin │ │ └── analysis │ │ └── farsi │ │ └── AnalysisFarsiPlugin.java │ ├── resources │ └── plugin-descriptor.properties │ └── uml │ └── IKAnalysisBinderProcessor.uml └── testSrc └── org └── elasticsearch └── analysis └── test └── TestStemmer.java /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | script: mvn clean package 3 | deploy: 4 | provider: releases 5 | api_key: 6 | secure: llxJZlRYBIWINl5XI42RpEe+jTxlmSP6MX+oTNZa4oFjEeN9Kdd1G8+S3HSIhCc31RoF/2zeNsM9OehRi1O6bweNSQ9vjlKZQPD8FYcHaHpYW0U7h/OMbEeC794fAghm9ZsmOTNymdvbAXL14nJTrwOW9W8VqoZT9Jx7Ejad63Y= 7 | file: target/releases/elasticsearch-analysis-ik-*.zip 8 | file_glob: true 9 | on: 10 | repo: medcl/elasticsearch-analysis-ik 11 | tags: true 12 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Persian Analysis for Elasticsearch 2 | ============================= 3 | 4 | The Persian Analysis plugin integrates Lucene Farsi analyzer into elasticsearch. 5 | 6 | Analyzer: `farsi_analyzer` 7 | 8 | Install 9 | ------- 10 | 11 | 1.change `elasticsearch.version` property in `pom.xml` file to your elasticsearch version. by default it's 2.2.1 12 | 13 | 2.compile 14 | 15 | `mvn package` 16 | 17 | copy and unzip `target/releases/elasticsearch-analysis-farsi-{version}.zip` to `your-es-root/plugins/farsi` 18 | 19 | 3.restart elasticsearch 20 | 21 | 22 | Quick Example 23 | ------- 24 | 1.create a index 25 | 26 | curl -XPUT http://localhost:9200/index 27 | 28 | 2.create a mapping 29 | 30 | `curl -XPOST http://localhost:9200/index/fulltext/_mapping -d' 31 | { 32 | "fulltext": { 33 | "_all": { 34 | "analyzer": "farsi_analyzer", 35 | "search_analyzer": "farsi_analyzer", 36 | "term_vector": "no", 37 | "store": "false" 38 | }, 39 | "properties": { 40 | "content": { 41 | "type": "string", 42 | "store": "no", 43 | "term_vector": "with_positions_offsets", 44 | "analyzer": "farsi_analyzer", 45 | "search_analyzer": "farsi_analyzer", 46 | "include_in_all": "true", 47 | "boost": 8 48 | } 49 | } 50 | } 51 | }'` 52 | 53 | 54 | 3.index some docs 55 | 56 | `curl -XPOST http://localhost:9200/index/fulltext/1 -d' 57 | {"content":"سلام امروز هوا خیلی خوبه"} 58 | '` 59 | -------------------------------------------------------------------------------- /config/ik/IKAnalyzer.cfg.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | IK Analyzer 扩展配置 5 | 6 | custom/mydict.dic;custom/single_word_low_freq.dic 7 | 8 | custom/ext_stopword.dic 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /config/ik/custom/ext_stopword.dic: -------------------------------------------------------------------------------- 1 | 也 2 | 了 3 | 仍 4 | 从 5 | 以 6 | 使 7 | 则 8 | 却 9 | 又 10 | 及 11 | 对 12 | 就 13 | 并 14 | 很 15 | 或 16 | 把 17 | 是 18 | 的 19 | 着 20 | 给 21 | 而 22 | 被 23 | 让 24 | 在 25 | 还 26 | 比 27 | 等 28 | 当 29 | 与 30 | 于 31 | 但 -------------------------------------------------------------------------------- /config/ik/custom/mydict.dic: -------------------------------------------------------------------------------- 1 | medcl 2 | elastic 3 | elasticsearch 4 | kogstash 5 | kibana 6 | marvel 7 | shield 8 | watcher 9 | beats 10 | packetbeat 11 | filebeat 12 | topbeat 13 | metrixbeat 14 | kimchy -------------------------------------------------------------------------------- /config/ik/custom/single_word_low_freq.dic: -------------------------------------------------------------------------------- 1 | 踧 2 | 覢 3 | 觓 4 | 覛 5 | 覅 6 | 覟 7 | 覗 8 | 覣 9 | 覭 10 | 覂 11 | 觡 12 | 覝 13 | 觟 14 | 褱 15 | 褰 16 | 襒 17 | 覞 18 | 袨 19 | 觏 20 | 赒 21 | 觇 22 | 謍 23 | 讙 24 | 襦 25 | 袤 26 | 誸 27 | 诮 28 | 衩 29 | 茷 30 | 趒 31 | 襌 32 | 诰 33 | 譠 34 | 袄 35 | 聱 36 | 豸 37 | 蠓 38 | 讵 39 | 袅 40 | 诂 41 | 裞 42 | 訄 43 | 荺 44 | 褂 45 | 蠡 46 | 裐 47 | 諴 48 | 芫 49 | 赧 50 | 触 51 | 跫 52 | 褫 53 | 赝 54 | 褡 55 | 衪 56 | 裎 57 | 豜 58 | 褶 59 | 裟 60 | 跏 61 | 袪 62 | 袈 63 | 觐 64 | 跄 65 | 坏 66 | 肱 67 | 裾 68 | 考 69 | 豝 70 | 踰 71 | 覃 72 | 蹓 73 | 黾 74 | 褴 75 | 轲 76 | 裨 77 | 蜇 78 | 鮆 79 | 褥 80 | 誊 81 | 貉 82 | 褊 83 | 蜉 84 | 衔 85 | 詄 86 | 豋 87 | 胼 88 | 荞 89 | 踫 90 | 谗 91 | 耦 92 | 誏 93 | 衮 94 | 胝 95 | 幔 96 | 轭 97 | 赈 98 | 贲 99 | 蓼 100 | 褛 101 | 迵 102 | 觊 103 | 蚜 104 | 讫 105 | 颢 106 | 葄 107 | 觎 108 | 诎 109 | 謢 110 | 蹧 111 | 邬 112 | 芊 113 | 赣 114 | 囱 115 | 蝎 116 | 夆 117 | 蠋 118 | 蠕 119 | 蹼 120 | 臊 121 | 蛭 122 | 颚 123 | 讴 124 | 踽 125 | 菫 126 | 臾 127 | 薮 128 | 蹒 129 | 谀 130 | 菀 131 | 佶 132 | 摀 133 | 佚 134 | 邸 135 | 跺 136 | 豊 137 | 荔 138 | 锌 139 | 诿 140 | 蕤 141 | 诳 142 | 芩 143 | 蹴 144 | 褉 145 | 觔 146 | 舴 147 | 腋 148 | 颍 149 | 膊 150 | 脯 151 | 荪 152 | 郢 153 | 坛 154 | 轫 155 | 醺 156 | 捺 157 | 姝 158 | 胭 159 | 饷 160 | 谪 161 | 驮 162 | 僮 163 | 踯 164 | 忪 165 | 驷 166 | 躅 167 | 忑 168 | 彧 169 | 衲 170 | 唠 171 | 跚 172 | 吃 173 | 诩 174 | 褓 175 | 诤 176 | 豨 177 | 诋 178 | 菈 179 | 逖 180 | 荟 181 | 裆 182 | 喋 183 | 忖 184 | 闾 185 | 诌 186 | 啻 187 | 铀 188 | 菡 189 | 胱 190 | 蹬 191 | 隹 192 | 鹬 193 | 诒 194 | 轧 195 | 萏 196 | 舶 197 | 鳅 198 | 药 199 | 酯 200 | 夯 201 | 偬 202 | 酝 203 | 跻 204 | 咤 205 | 掬 206 | 呆 207 | 蹶 208 | 踞 209 | 蝌 210 | 咋 211 | 谧 212 | 舫 213 | 啐 214 | 茸 215 | 谟 216 | 嵌 217 | 蜿 218 | 魇 219 | 帷 220 | 觑 221 | 鳍 222 | 谏 223 | 哽 224 | 乓 225 | 蚌 226 | 嗙 227 | 巿 228 | 刽 229 | 踱 230 | 腆 231 | 薏 232 | 蜃 233 | 谑 234 | 躄 235 | 鸾 236 | 齁 237 | 腼 238 | 呷 239 | 吆 240 | 荀 241 | 裱 242 | 辇 243 | 睫 244 | 伎 245 | 妲 246 | 菠 247 | 鼐 248 | 麾 249 | 芮 250 | 鲑 251 | 辉 252 | 啜 253 | 苞 254 | 踼 255 | 荃 256 | 杞 257 | 浣 258 | 沬 259 | 胤 260 | 恿 261 | 驭 262 | 逵 263 | 钛 264 | 徕 265 | 贮 266 | 蔫 267 | 锚 268 | 衙 269 | 肄 270 | 豺 271 | 闸 272 | 隋 273 | 腑 274 | 脐 275 | 脓 276 | 叱 277 | 迥 278 | 踝 279 | 馥 280 | 佣 281 | 喳 282 | 迩 283 | 贻 284 | 诙 285 | 椭 286 | 琬 287 | 赂 288 | 诧 289 | 苯 290 | 怂 291 | 蟆 292 | 龊 293 | 漳 294 | 迭 295 | 垛 296 | 铲 297 | 馊 298 | 娓 299 | 葆 300 | 赑 301 | 卍 302 | 遽 303 | 谯 304 | 賏 305 | 蛹 306 | 锤 307 | 粟 308 | 衿 309 | 渥 310 | 铳 311 | 刍 312 | 镳 313 | 匮 314 | 万 315 | 骁 316 | 酣 317 | 酉 318 | 骥 319 | 寨 320 | 蓁 321 | 诽 322 | 钡 323 | 浙 324 | 酗 325 | 跩 326 | 拗 327 | 坷 328 | 雱 329 | 闺 330 | 喈 331 | 晔 332 | 螳 333 | 谙 334 | 蹂 335 | 鞑 336 | 蔗 337 | 账 338 | 垚 339 | 瞩 340 | 谩 341 | 掳 342 | 媲 343 | 葾 344 | 鳗 345 | 钣 346 | 檀 347 | 阕 348 | 聿 349 | 蜍 350 | 仆 351 | 嗅 352 | 峥 353 | 蜈 354 | 垠 355 | 蚓 356 | 麓 357 | 殉 358 | 弩 359 | 朴 360 | 胥 361 | 瘴 362 | 篑 363 | 镍 364 | 鹂 365 | 暐 366 | 榷 367 | 咀 368 | 佯 369 | 蚣 370 | 荻 371 | 鬓 372 | 仝 373 | 裴 374 | 讷 375 | 孺 376 | 咨 377 | 俑 378 | 遴 379 | 吽 380 | 笋 381 | 耀 382 | 霾 383 | 绎 384 | 咿 385 | 骸 386 | 霭 387 | 昕 388 | 漩 389 | 浒 390 | 轼 391 | 婿 392 | 嗳 393 | 钙 394 | 谲 395 | 蛾 396 | 跛 397 | 惺 398 | 翎 399 | 炽 400 | 晒 401 | 钳 402 | 鞘 403 | 谚 404 | 钊 405 | 背 406 | 瀛 407 | 槌 408 | 臀 409 | 跋 410 | 窒 411 | 藤 412 | 噬 413 | 蓊 414 | 褐 415 | 蔺 416 | 鲍 417 | 鲨 418 | 舔 419 | 箔 420 | 萦 421 | 诏 422 | 褔 423 | 咄 424 | 俘 425 | 彪 426 | 饪 427 | 嘱 428 | 诬 429 | 踮 430 | 囝 431 | 佢 432 | 汶 433 | 讹 434 | 踅 435 | 咐 436 | 讼 437 | 玟 438 | 迂 439 | 亵 440 | 婵 441 | 馁 442 | 崭 443 | 惦 444 | 蠹 445 | 濒 446 | 匈 447 | 蟋 448 | 谕 449 | 酪 450 | 眛 451 | 煦 452 | 甭 453 | 谄 454 | 妾 455 | 梧 456 | 芜 457 | 蛎 458 | 颐 459 | 雌 460 | 褒 461 | 臼 462 | 圳 463 | 剔 464 | 噶 465 | 耨 466 | 嗈 467 | 勋 468 | 冶 469 | 扑 470 | 膺 471 | 腺 472 | 荤 473 | 坞 474 | 羲 475 | 栾 476 | 傌 477 | 幌 478 | 噗 479 | 蛀 480 | 觞 481 | 塾 482 | 耙 483 | 枭 484 | 擞 485 | 缅 486 | 踌 487 | 蟀 488 | 侥 489 | 诣 490 | 姜 491 | 甸 492 | 俭 493 | 泠 494 | 躇 495 | 萌 496 | 虏 497 | 匕 498 | 藩 499 | 嗽 500 | 蜻 501 | 咛 502 | 艹 503 | 跎 504 | 蔬 505 | 鸠 506 | 跆 507 | 肋 508 | 巅 509 | 芯 510 | 荐 511 | 荼 512 | 慵 513 | 咸 514 | 杭 515 | 樟 516 | 夸 517 | 戮 518 | 吱 519 | 模 520 | 葔 521 | 迢 522 | 砰 523 | 须 524 | 蒜 525 | 骐 526 | 茱 527 | 痊 528 | 蛤 529 | 蜴 530 | 诟 531 | 俾 532 | 疮 533 | 悴 534 | 袒 535 | 蒹 536 | 镖 537 | 娥 538 | 鹉 539 | 婊 540 | 噫 541 | 矜 542 | 岳 543 | 鹦 544 | 葭 545 | 褚 546 | 嵩 547 | 丫 548 | 凛 549 | 峦 550 | 惚 551 | 懊 552 | 韶 553 | 憋 554 | 聋 555 | 讪 556 | 瘫 557 | 霓 558 | 哺 559 | 蝙 560 | 靥 561 | 堇 562 | 铺 563 | 趾 564 | 褪 565 | 缆 566 | 媛 567 | 胧 568 | 肛 569 | 珈 570 | 畴 571 | 驹 572 | 熔 573 | 臆 574 | 肘 575 | 豁 576 | 冕 577 | 吊 578 | 韧 579 | 炜 580 | 舱 581 | 恁 582 | 巳 583 | 舵 584 | 臻 585 | 戊 586 | 稽 587 | 诲 588 | 隽 589 | 铐 590 | 鲫 591 | 畸 592 | 饥 593 | 茉 594 | 蒲 595 | 矶 596 | 峨 597 | 蚵 598 | 蔼 599 | 诛 600 | 焰 601 | 偈 602 | 蚱 603 | 骯 604 | 盔 605 | 巩 606 | 折 607 | 偕 608 | 嗓 609 | 辙 610 | 鸶 611 | 酵 612 | 莘 613 | 耘 614 | 汹 615 | 楞 616 | 陡 617 | 裳 618 | 憎 619 | 讳 620 | 荆 621 | 笃 622 | 屉 623 | 霈 624 | 恬 625 | 蹦 626 | 扬 627 | 侃 628 | 艳 629 | 璇 630 | 韬 631 | 烬 632 | 傀 633 | 铮 634 | 曦 635 | 搂 636 | 蝠 637 | 霄 638 | 胺 639 | 遐 640 | 飨 641 | 郡 642 | 困 643 | 呎 644 | 墅 645 | 鞠 646 | 瘤 647 | 藻 648 | 咆 649 | 踹 650 | 狷 651 | 镀 652 | 桐 653 | 赘 654 | 揽 655 | 炬 656 | 氢 657 | 膛 658 | 搪 659 | 湿 660 | 唆 661 | 兑 662 | 暸 663 | 厮 664 | 懈 665 | 媳 666 | 塘 667 | 靡 668 | 鹭 669 | 祟 670 | 冀 671 | 豚 672 | 蹄 673 | 橙 674 | 阎 675 | 硫 676 | 埠 677 | 噱 678 | 妃 679 | 搓 680 | 啃 681 | 俞 682 | 龚 683 | 橄 684 | 嚎 685 | 椎 686 | 蓦 687 | 朔 688 | 痘 689 | 鳞 690 | 铠 691 | 叽 692 | 跤 693 | 裔 694 | 诃 695 | 岫 696 | 怯 697 | 讥 698 | 聂 699 | 垢 700 | 藐 701 | 濑 702 | 莒 703 | 淇 704 | 毯 705 | 礁 706 | 赃 707 | 庐 708 | 辕 709 | 瞌 710 | 锯 711 | 莓 712 | 涡 713 | 昼 714 | 捌 715 | 嗡 716 | 倌 717 | 禹 718 | 蹋 719 | 卯 720 | 粪 721 | 耽 722 | 闰 723 | 曳 724 | 苔 725 | 诵 726 | 菇 727 | 斟 728 | 芥 729 | 莅 730 | 喀 731 | 麒 732 | 颊 733 | 扛 734 | 曜 735 | 咎 736 | 缮 737 | 诫 738 | 躁 739 | 茜 740 | 缤 741 | 暧 742 | 郄 743 | 酥 744 | 僻 745 | 躬 746 | 峙 747 | 驯 748 | 噎 749 | 厦 750 | 澜 751 | 杏 752 | 樽 753 | 勘 754 | 煤 755 | 茎 756 | 嚷 757 | 昆 758 | 铸 759 | 烘 760 | 邹 761 | 廓 762 | 拚 763 | 俐 764 | 裘 765 | 饵 766 | 恃 767 | 蔓 768 | 笙 769 | 茁 770 | 楷 771 | 嚼 772 | 锻 773 | 蕊 774 | 脖 775 | 茍 776 | 壤 777 | 琮 778 | 莽 779 | 塌 780 | 蚤 781 | 膳 782 | 磋 783 | 蓓 784 | 澈 785 | 萎 786 | 擒 787 | 禄 788 | 儡 789 | 懦 790 | 瞻 791 | 虔 792 | 粥 793 | 赦 794 | 畜 795 | 彷 796 | 寥 797 | 揣 798 | 嫖 799 | 朽 800 | 挂 801 | 啄 802 | 浇 803 | 崖 804 | 棠 805 | 禽 806 | 台 807 | 邂 808 | 矫 809 | 茅 810 | 惫 811 | 吠 812 | 苟 813 | 叩 814 | 徊 815 | 巍 816 | 舆 817 | 邵 818 | 彗 819 | 萃 820 | 拱 821 | 嘶 822 | 貂 823 | 趴 824 | 愿 825 | 脊 826 | 冗 827 | 杆 828 | 蕙 829 | 铎 830 | 囚 831 | 啼 832 | 谤 833 | 徘 834 | 芹 835 | 骆 836 | 夭 837 | 饺 838 | 馒 839 | 溺 840 | 咫 841 | 屐 842 | 绅 843 | 诅 844 | 缉 845 | 渣 846 | 敞 847 | 萱 848 | 丰 849 | 俏 850 | 螃 851 | 蜀 852 | 徽 853 | 逞 854 | 跪 855 | 虞 856 | 隙 857 | 匀 858 | 憧 859 | 辄 860 | 鸳 861 | 疵 862 | 跷 863 | 呱 864 | 穆 865 | 阑 866 | 搏 867 | 肾 868 | 靶 869 | 阱 870 | 囡 871 | 寰 872 | 庄 873 | 蟾 874 | 怠 875 | 腕 876 | 烟 877 | 巾 878 | 奢 879 | 垄 880 | 姨 881 | 躯 882 | 肺 883 | 钰 884 | 佰 885 | 阙 886 | 雏 887 | 溉 888 | 焚 889 | 丑 890 | 锥 891 | 诘 892 | 瞪 893 | 茹 894 | 绊 895 | 蚀 896 | 袱 897 | 煽 898 | 窕 899 | 掷 900 | 沮 901 | 钞 902 | 涕 903 | 浏 904 | 仄 905 | 孰 906 | 峻 907 | 皱 908 | 芦 909 | 膏 910 | 晰 911 | 衬 912 | 谍 913 | 丞 914 | 绽 915 | 蔽 916 | 呕 917 | 轿 918 | 隶 919 | 楠 920 | 匣 921 | 葵 922 | 沫 923 | 刃 924 | 禧 925 | 晦 926 | 哔 927 | 晖 928 | 绣 929 | 仟 930 | 窟 931 | 谛 932 | 瀚 933 | 黛 934 | 忿 935 | 姚 936 | 蜘 937 | 耸 938 | 捍 939 | 斐 940 | 卜 941 | 辗 942 | 刁 943 | 涅 944 | 泓 945 | 梵 946 | 扳 947 | 暇 948 | 袜 949 | 柠 950 | 傍 951 | 逮 952 | 呃 953 | 蜗 954 | 窍 955 | 琉 956 | 喃 957 | 溢 958 | 抉 959 | 旷 960 | 卅 961 | 亟 962 | 膝 963 | 伶 964 | 闇 965 | 莺 966 | 蔚 967 | 醋 968 | 瑛 969 | 拭 970 | 绮 971 | 鑫 972 | 圭 973 | 脂 974 | 酿 975 | 诈 976 | 膨 977 | 隧 978 | 惭 979 | 庚 980 | 衅 981 | 哨 982 | 凋 983 | 里 984 | 祯 985 | 撼 986 | 谭 987 | 稻 988 | 迋 989 | 碌 990 | 罕 991 | 逾 992 | 嗜 993 | 蹲 994 | 檬 995 | 肖 996 | 辖 997 | 襟 998 | 扎 999 | 槟 1000 | 缔 1001 | 袂 1002 | 敷 1003 | 腥 1004 | 喘 1005 | 簿 1006 | 鳖 1007 | 出 1008 | 噢 1009 | 炫 1010 | 佑 1011 | 贷 1012 | 粮 1013 | 荳 1014 | 桦 1015 | 颉 1016 | 哑 1017 | 倪 1018 | 颤 1019 | 御 1020 | 芽 1021 | 朦 1022 | 裹 1023 | 贬 1024 | 蕉 1025 | 蝉 1026 | 赎 1027 | 崔 1028 | 滔 1029 | 茵 1030 | 径 1031 | 克 1032 | 啤 1033 | 拯 1034 | 坟 1035 | 葱 1036 | 芋 1037 | 瞒 1038 | 掠 1039 | 绳 1040 | 蛛 1041 | 匠 1042 | 凸 1043 | 苛 1044 | 押 1045 | 楣 1046 | 芙 1047 | 酌 1048 | 俺 1049 | 掏 1050 | 倡 1051 | 唾 1052 | 瞄 1053 | 磊 1054 | 吼 1055 | 搅 1056 | 溃 1057 | 聆 1058 | 沌 1059 | 蝇 1060 | 鸥 1061 | 妒 1062 | 焕 1063 | 拙 1064 | 夷 1065 | 迄 1066 | 绰 1067 | 锵 1068 | 耿 1069 | 祺 1070 | 吶 1071 | 惶 1072 | 廊 1073 | 兜 1074 | 倩 1075 | 杖 1076 | 窄 1077 | 僚 1078 | 竖 1079 | 芷 1080 | 咚 1081 | 鲢 1082 | 沛 1083 | 挪 1084 | 柄 1085 | 顷 1086 | 璞 1087 | 裸 1088 | 鵰 1089 | 郊 1090 | 屿 1091 | 仕 1092 | 艘 1093 | 铅 1094 | 铝 1095 | 饲 1096 | 黯 1097 | 疫 1098 | 栽 1099 | 喉 1100 | 逗 1101 | 祇 1102 | 阪 1103 | 侍 1104 | 抒 1105 | 弗 1106 | 尬 1107 | 浦 1108 | 鄙 1109 | 盏 1110 | 喽 1111 | 炳 1112 | 卵 1113 | 肌 1114 | 迦 1115 | 擅 1116 | 豹 1117 | 胏 1118 | 炼 1119 | 悸 1120 | 谴 1121 | 贾 1122 | 胀 1123 | 疋 1124 | 矿 1125 | 梨 1126 | 碑 1127 | 髓 1128 | 巢 1129 | 叹 1130 | 屡 1131 | 滩 1132 | 侮 1133 | 橘 1134 | 嘲 1135 | 酬 1136 | 枚 1137 | 氓 1138 | 菌 1139 | 颁 1140 | 萝 1141 | 谘 1142 | 曝 1143 | 薯 1144 | 襄 1145 | 辽 1146 | 萄 1147 | 寇 1148 | 舜 1149 | 颂 1150 | 撰 1151 | 腻 1152 | 崩 1153 | 咕 1154 | 癌 1155 | 歇 1156 | 汰 1157 | 烁 1158 | 撇 1159 | 宴 1160 | 惩 1161 | 烛 1162 | 贰 1163 | 呻 1164 | 呒 1165 | 翩 1166 | 绑 1167 | 捞 1168 | 爹 1169 | 秉 1170 | 棉 1171 | 妓 1172 | 尉 1173 | 霍 1174 | 甫 1175 | 尝 1176 | 葡 1177 | 蒸 1178 | 鸦 1179 | 挚 1180 | 奸 1181 | 纬 1182 | 艰 1183 | 履 1184 | 葬 1185 | 滨 1186 | 耕 1187 | 婴 1188 | 醇 1189 | 堵 1190 | 钉 1191 | 喧 1192 | 遂 1193 | 锣 1194 | 垮 1195 | 蓬 1196 | 薛 1197 | 虐 1198 | 睁 1199 | 厨 1200 | 娶 1201 | 浆 1202 | 挨 1203 | 矢 1204 | 蕾 1205 | 伺 1206 | 券 1207 | 鹏 1208 | 削 1209 | 蓄 1210 | 琦 1211 | 熄 1212 | 湘 1213 | 慌 1214 | 枕 1215 | 衍 1216 | 薇 1217 | 囊 1218 | 喂 1219 | 蕴 1220 | 倘 1221 | 峡 1222 | 浊 1223 | 窃 1224 | 颈 1225 | 裙 1226 | 晕 1227 | 缚 1228 | 获 1229 | 帕 1230 | 脾 1231 | 莹 1232 | 逍 1233 | 姬 1234 | 韦 1235 | 畔 1236 | 伐 1237 | 霞 1238 | 嘘 1239 | 盐 1240 | 摧 1241 | 债 1242 | 佩 1243 | 畏 1244 | 驴 1245 | 氧 1246 | 奴 1247 | 瘦 1248 | 菊 1249 | 廿 1250 | 狭 1251 | 赴 1252 | 碳 1253 | 坊 1254 | 盆 1255 | 趟 1256 | 匿 1257 | 肇 1258 | 溶 1259 | 揭 1260 | 剥 1261 | 沦 1262 | 秃 1263 | 郝 1264 | 唔 1265 | 锡 1266 | 娇 1267 | 抚 1268 | 屎 1269 | 甩 1270 | 娱 1271 | 表 1272 | 犬 1273 | 魁 1274 | 蒂 1275 | 皓 1276 | 祷 1277 | 瞎 1278 | 瘾 1279 | 煎 1280 | 螺 1281 | 遮 1282 | 坠 1283 | 剎 1284 | 筝 1285 | 棵 1286 | 冤 1287 | 崎 1288 | 昔 1289 | 驼 1290 | 竿 1291 | 甄 1292 | 斑 1293 | 歹 1294 | 骏 1295 | 缝 1296 | 鞭 1297 | 垫 1298 | 淹 1299 | 并 1300 | 遨 1301 | 宠 1302 | 掰 1303 | 枯 1304 | 艇 1305 | 豫 1306 | 募 1307 | 郁 1308 | 稚 1309 | 懿 1310 | 辐 1311 | 酱 1312 | 恕 1313 | 范 1314 | 涂 1315 | 滤 1316 | 肃 1317 | 膜 1318 | 佬 1319 | 哼 1320 | 慨 1321 | 穗 1322 | 辰 1323 | 雁 1324 | 瑟 1325 | 帆 1326 | 拢 1327 | 汁 1328 | 蝴 1329 | 冈 1330 | 诠 1331 | 蹈 1332 | 黏 1333 | 痞 1334 | 屑 1335 | 潇 1336 | 觅 1337 | 钧 1338 | 挣 1339 | 谐 1340 | 霜 1341 | 诊 1342 | 熬 1343 | 讽 1344 | 歧 1345 | 戈 1346 | 闯 1347 | 饶 1348 | 斤 1349 | 婉 1350 | 致 1351 | 贿 1352 | 苑 1353 | 矮 1354 | 毋 1355 | 詹 1356 | 祈 1357 | 咳 1358 | 昱 1359 | 佐 1360 | 帖 1361 | 猩 1362 | 尹 1363 | 诇 1364 | 肆 1365 | 亭 1366 | 丘 1367 | 淘 1368 | 颠 1369 | 勃 1370 | 讶 1371 | 抖 1372 | 袁 1373 | 柱 1374 | 僧 1375 | 蚊 1376 | 匹 1377 | 辣 1378 | 螂 1379 | 澡 1380 | 昧 1381 | 诡 1382 | 槽 1383 | 穴 1384 | 斩 1385 | 聘 1386 | 扶 1387 | 熙 1388 | 驰 1389 | 棍 1390 | 兆 1391 | 蟑 1392 | 矩 1393 | 谬 1394 | 贫 1395 | 鼎 1396 | 践 1397 | 盲 1398 | 眷 1399 | 尿 1400 | 伫 1401 | 饿 1402 | 砸 1403 | 妄 1404 | 荡 1405 | 炒 1406 | 冥 1407 | 偿 1408 | 墓 1409 | 骄 1410 | 毙 1411 | 淋 1412 | 芝 1413 | 胃 1414 | 宅 1415 | 董 1416 | 梭 1417 | 凑 1418 | 宰 1419 | 卑 1420 | 丛 1421 | 纠 1422 | 肢 1423 | 闽 1424 | 铜 1425 | 寺 1426 | 瞬 1427 | 澳 1428 | 庞 1429 | 腔 1430 | 泼 1431 | 昂 1432 | 梁 1433 | 躺 1434 | 姻 1435 | 潭 1436 | 吋 1437 | 撤 1438 | 殖 1439 | 轴 1440 | 颖 1441 | 冻 1442 | 琼 1443 | 恳 1444 | 衫 1445 | 譬 1446 | 猎 1447 | 衰 1448 | 桶 1449 | 辜 1450 | 筒 1451 | 赫 1452 | 仗 1453 | 膀 1454 | 乳 1455 | 嚣 1456 | 划 1457 | 玮 1458 | 卿 1459 | 枉 1460 | 埃 1461 | 跨 1462 | 粹 1463 | 猴 1464 | 愤 1465 | 壹 1466 | 卢 1467 | 尧 1468 | 翰 1469 | 叮 1470 | 媚 1471 | 钮 1472 | 袖 1473 | 斌 1474 | 卓 1475 | 粽 1476 | 雀 1477 | 谦 1478 | 傅 1479 | 殿 1480 | 睹 1481 | 菁 1482 | 桂 1483 | 诱 1484 | 舌 1485 | 惟 1486 | 岗 1487 | 衷 1488 | 屈 1489 | 陋 1490 | 陌 1491 | 宵 1492 | 麟 1493 | 魏 1494 | 贸 1495 | 几 1496 | 埔 1497 | 谎 1498 | 袍 1499 | 卸 1500 | 仓 1501 | 匪 1502 | 叛 1503 | 肠 1504 | 肝 1505 | 俄 1506 | 孕 1507 | 庙 1508 | 嫁 1509 | 肤 1510 | 拦 1511 | 羯 1512 | 匙 1513 | 咏 1514 | 蠢 1515 | 纽 1516 | 拘 1517 | 旨 1518 | 胁 1519 | 馨 1520 | 珊 1521 | 签 1522 | 赔 1523 | 秩 1524 | 喻 1525 | 谜 1526 | 翠 1527 | 芭 1528 | 摊 1529 | 侣 1530 | 灿 1531 | 寡 1532 | 罐 1533 | 贼 1534 | 叙 1535 | 谨 1536 | 体 1537 | 敲 1538 | 浴 1539 | 吻 1540 | 臂 1541 | 袭 1542 | 煮 1543 | 腹 1544 | 暮 1545 | 曹 1546 | 虹 1547 | 抑 1548 | 贩 1549 | 踩 1550 | 澎 1551 | 糖 1552 | 催 1553 | 萍 1554 | 垂 1555 | 斥 1556 | 侬 1557 | 拷 1558 | 唤 1559 | 匆 1560 | 阮 1561 | 飙 1562 | 柴 1563 | 剂 1564 | 妖 1565 | 添 1566 | 畅 1567 | 汗 1568 | 鸭 1569 | 稀 1570 | 晋 1571 | 埋 1572 | 弊 1573 | 返 1574 | 叡 1575 | 娟 1576 | 玻 1577 | 腾 1578 | 栋 1579 | 歪 1580 | 邓 1581 | 渴 1582 | 粒 1583 | 泣 1584 | 疾 1585 | 蓉 1586 | 塑 1587 | 祂 1588 | 储 1589 | 劣 1590 | 柯 1591 | 陶 1592 | 患 1593 | 蛇 1594 | 腐 1595 | 琳 1596 | 慎 1597 | 泊 1598 | 牢 1599 | 呈 1600 | 趁 1601 | 恶 1602 | 浑 1603 | 扮 1604 | 樱 1605 | 臣 1606 | 遵 1607 | 缠 1608 | 虫 1609 | 撒 1610 | 叉 1611 | 刑 1612 | 苗 1613 | 脉 1614 | 盈 1615 | 津 1616 | 愧 1617 | 摔 1618 | 盒 1619 | 丧 1620 | 鹤 1621 | 呦 1622 | 厕 1623 | 斜 1624 | 芒 1625 | 翅 1626 | 悄 1627 | 晃 1628 | 茂 1629 | 寸 1630 | 杉 1631 | 旺 1632 | 俩 1633 | 雯 1634 | 霖 1635 | 递 1636 | 胶 1637 | 氛 1638 | 谣 1639 | 捉 1640 | 虾 1641 | 秘 1642 | 漠 1643 | 扭 1644 | 贞 1645 | 陵 1646 | 叔 1647 | 轨 1648 | 鹅 1649 | 液 1650 | 妥 1651 | 贱 1652 | 涨 1653 | 滥 1654 | 痕 1655 | 沿 1656 | 秤 1657 | 措 1658 | 巡 1659 | 丈 1660 | 魅 1661 | 欲 1662 | 缸 1663 | 鹿 1664 | 汝 1665 | 迁 1666 | 矣 1667 | 肩 1668 | 烤 1669 | 笛 1670 | 迅 1671 | 劫 1672 | 趋 1673 | 披 1674 | 荷 1675 | 卒 1676 | 丙 1677 | 碗 1678 | 伙 1679 | 椅 1680 | 赞 1681 | 侦 1682 | 灾 1683 | 秦 1684 | 蛙 1685 | 禅 1686 | 慰 1687 | 余 1688 | 朗 1689 | 辱 1690 | 征 1691 | 愚 1692 | 抛 1693 | 挺 1694 | 彭 1695 | 允 1696 | 靖 1697 | 滋 1698 | 凝 1699 | 赠 1700 | 莎 1701 | 顽 1702 | 狠 1703 | 堕 1704 | 翘 1705 | 惹 1706 | 纲 1707 | 贯 1708 | 饼 1709 | 抬 1710 | 逆 1711 | 堪 1712 | 坤 1713 | 斗 1714 | 钦 1715 | 疏 1716 | 羞 1717 | 扇 1718 | 蜂 1719 | 赌 1720 | 驻 1721 | 屏 1722 | 爵 1723 | 轰 1724 | 契 1725 | 悦 1726 | 邻 1727 | 哉 1728 | 陀 1729 | 裂 1730 | 刷 1731 | 毅 1732 | 拾 1733 | 疼 1734 | 阔 1735 | 耍 1736 | 亏 1737 | 吟 1738 | 锐 1739 | 惧 1740 | 锅 1741 | 蝶 1742 | 壳 1743 | 糕 1744 | 舟 1745 | 牧 1746 | 妮 1747 | 粗 1748 | 仇 1749 | 驶 1750 | 促 1751 | 孝 1752 | 裤 1753 | 誉 1754 | 家 1755 | 迈 1756 | 姿 1757 | 踪 1758 | 兔 1759 | 综 1760 | 旭 1761 | 韵 1762 | 齿 1763 | 乔 1764 | 怖 1765 | 晴 1766 | 闷 1767 | 墨 1768 | 咬 1769 | 侧 1770 | 狱 1771 | 琪 1772 | 梯 1773 | 宾 1774 | 枫 1775 | 锦 1776 | 瑜 1777 | 敦 1778 | 矛 1779 | 弘 1780 | 玛 1781 | 茫 1782 | 迪 1783 | 览 1784 | 挤 1785 | 雳 1786 | 岚 1787 | 卷 1788 | 黎 1789 | 薄 1790 | 柳 1791 | 咦 1792 | 廷 1793 | 瞧 1794 | 幅 1795 | 挖 1796 | 唬 1797 | 侯 1798 | 祸 1799 | 饰 1800 | 儒 1801 | 捡 1802 | 筋 1803 | 融 1804 | 耗 1805 | 铃 1806 | 奉 1807 | 鼻 1808 | 坜 1809 | 曼 1810 | 贡 1811 | 嗨 1812 | 炎 1813 | 啡 1814 | 捐 1815 | 炮 1816 | 霹 1817 | 貌 1818 | 鸣 1819 | 饱 1820 | 廉 1821 | 绘 1822 | 咪 1823 | 吝 1824 | 肚 1825 | 云 1826 | 翼 1827 | 氏 1828 | 骚 1829 | 爷 1830 | 寿 1831 | 绕 1832 | 唷 1833 | 牺 1834 | 屠 1835 | 谋 1836 | 彻 1837 | 俱 1838 | 粉 1839 | 雾 1840 | 涵 1841 | 侨 1842 | 础 1843 | 疗 1844 | 署 1845 | 稿 1846 | 涉 1847 | 稣 1848 | 誓 1849 | 箭 1850 | 涯 1851 | 锺 1852 | 迹 1853 | 抄 1854 | 踢 1855 | 贪 1856 | 咖 1857 | 莱 1858 | 夺 1859 | 勉 1860 | 焦 1861 | 蒋 1862 | 桑 1863 | 沧 1864 | 恰 1865 | 泳 1866 | 牲 1867 | 戒 1868 | 恼 1869 | 夕 1870 | 棚 1871 | 爬 1872 | 菲 1873 | 翁 1874 | 奔 1875 | 滴 1876 | 玄 1877 | 捷 1878 | 曰 1879 | 愉 1880 | 逊 1881 | 憾 1882 | 钓 1883 | 壁 1884 | 躲 1885 | 嫌 1886 | 姆 1887 | 乏 1888 | 洛 1889 | 逼 1890 | 磨 1891 | 剪 1892 | 逝 1893 | 亨 1894 | 盼 1895 | 杯 1896 | 敝 1897 | 碍 1898 | 痴 1899 | 植 1900 | 瑰 1901 | 勤 1902 | 悟 1903 | 彬 1904 | 删 1905 | 薪 1906 | 悠 1907 | 胎 1908 | 侵 1909 | 坪 1910 | 赋 1911 | 弯 1912 | 丹 1913 | 巫 1914 | 轩 1915 | 辨 1916 | 吐 1917 | 么 1918 | 盾 1919 | 扯 1920 | 割 1921 | 艾 1922 | 幼 1923 | 捕 1924 | 召 1925 | 怒 1926 | 坡 1927 | 缓 1928 | 猛 1929 | 驾 1930 | 莉 1931 | 彦 1932 | 韩 1933 | 鞋 1934 | 碧 1935 | 泽 1936 | 泉 1937 | 缴 1938 | 跃 1939 | 喇 1940 | 腿 1941 | 糟 1942 | 胆 1943 | 摘 1944 | 朵 1945 | 逛 1946 | 甜 1947 | 拔 1948 | 劲 1949 | 悉 1950 | 穷 1951 | 汤 1952 | 唐 1953 | 臭 1954 | 玲 1955 | 怡 1956 | 舍 1957 | 欺 1958 | 蜜 1959 | 耻 1960 | 坦 1961 | 叭 1962 | 亿 1963 | 忌 1964 | 鲁 1965 | 繁 1966 | 泥 1967 | 伸 1968 | 壮 1969 | 串 1970 | 圾 1971 | 币 1972 | 荒 1973 | 垃 1974 | 妇 1975 | 旦 1976 | 截 1977 | 喷 1978 | 碎 1979 | 吕 1980 | 犹 1981 | 抹 1982 | 脆 1983 | 煞 1984 | 胞 1985 | 晶 1986 | 潜 1987 | 玫 1988 | 妻 1989 | 估 1990 | 陷 1991 | 孔 1992 | 娃 1993 | 兽 1994 | 肥 1995 | 凉 1996 | 岂 1997 | 逻 1998 | 胸 1999 | 杜 2000 | 袋 2001 | 甘 2002 | 邀 2003 | 培 2004 | 龄 2005 | 辆 2006 | 廖 2007 | 冲 2008 | 渡 2009 | 羽 2010 | 秒 2011 | 辞 2012 | 倾 2013 | 窝 2014 | 柏 2015 | 淑 2016 | 诞 2017 | 漏 2018 | 姑 2019 | 托 2020 | 吾 2021 | 纷 2022 | 拆 2023 | 浩 2024 | 税 2025 | 邱 2026 | 迟 2027 | 筹 2028 | 监 2029 | 汪 2030 | 擎 2031 | 衡 2032 | 狐 2033 | 灰 2034 | 尖 2035 | 番 2036 | 罚 2037 | 证 2038 | 盗 2039 | 祥 2040 | 毫 2041 | 彰 2042 | 扩 2043 | 幽 2044 | 阐 2045 | 喊 2046 | 菩 2047 | 赐 2048 | 奋 2049 | 鲜 2050 | 劝 2051 | 栏 2052 | 慈 2053 | 扫 2054 | 尽 2055 | 穹 2056 | 丌 2057 | 绪 2058 | 砂 2059 | 勿 2060 | 抢 2061 | 啪 2062 | 庸 2063 | 赤 2064 | 饮 2065 | 萨 2066 | 兼 2067 | 访 2068 | 舒 2069 | 裕 2070 | 逸 2071 | 宙 2072 | 丸 2073 | 准 2074 | 魂 2075 | 厚 2076 | 励 2077 | 仰 2078 | 糊 2079 | 顿 2080 | 闭 2081 | 塔 2082 | 枪 2083 | 睛 2084 | 斋 2085 | 奥 2086 | 恭 2087 | 翔 2088 | 遥 2089 | 航 2090 | 孟 2091 | 昌 2092 | 卧 2093 | 颇 2094 | 革 2095 | 邪 2096 | 阻 2097 | 蟹 2098 | 裁 2099 | 后 2100 | 函 2101 | 于 2102 | 拳 2103 | 宽 2104 | 锋 2105 | 州 2106 | 葛 2107 | 拒 2108 | 池 2109 | 镇 2110 | 芬 2111 | 岸 2112 | 寞 2113 | 凭 2114 | 姊 2115 | 殊 2116 | 板 2117 | 勒 2118 | 慕 2119 | 跌 2120 | 踏 2121 | 填 2122 | 陪 2123 | 逐 2124 | 洽 2125 | 描 2126 | 妨 2127 | 仪 2128 | 摄 2129 | 紫 2130 | 谅 2131 | 阅 2132 | 邦 2133 | 麦 2134 | 莲 2135 | 闪 2136 | 纵 2137 | 庭 2138 | 圈 2139 | 榜 2140 | 滑 2141 | 舰 2142 | 面 2143 | 献 2144 | 浅 2145 | 飘 2146 | 宋 2147 | 俗 2148 | 沟 2149 | 巷 2150 | 眠 2151 | 帽 2152 | 惑 2153 | 羊 2154 | 牵 2155 | 净 2156 | 厉 2157 | 撞 2158 | 崇 2159 | 竞 2160 | 回 2161 | 乙 2162 | 聪 2163 | 桃 2164 | 伍 2165 | 役 2166 | 潮 2167 | 损 2168 | 凯 2169 | 锁 2170 | 震 2171 | 醉 2172 | 屁 2173 | 牠 2174 | 孙 2175 | 酷 2176 | 染 2177 | 尺 2178 | 摸 2179 | 盛 2180 | 闹 2181 | 棋 2182 | 吓 2183 | 迫 2184 | 瓜 2185 | 松 2186 | 搬 2187 | 戴 2188 | 瞭 2189 | 乌 2190 | 谱 2191 | 滚 2192 | 赚 2193 | 障 2194 | 逃 2195 | 齐 2196 | 牙 2197 | 怨 2198 | 拖 2199 | 皇 2200 | 贺 2201 | 横 2202 | 塞 2203 | 摆 2204 | 农 2205 | 倍 2206 | 额 2207 | 乘 2208 | 户 2209 | 奈 2210 | 川 2211 | 徐 2212 | 井 2213 | 寝 2214 | 洞 2215 | 劳 2216 | 船 2217 | 域 2218 | 屋 2219 | 胖 2220 | 藉 2221 | 销 2222 | 拼 2223 | 桌 2224 | 忧 2225 | 违 2226 | 拟 2227 | 吵 2228 | 媒 2229 | 辩 2230 | 妙 2231 | 鸿 2232 | 恩 2233 | 映 2234 | 耳 2235 | 傻 2236 | 京 2237 | 搭 2238 | 残 2239 | 稍 2240 | 颜 2241 | 固 2242 | 眉 2243 | 龟 2244 | 哀 2245 | 发 2246 | 沈 2247 | 拨 2248 | 丁 2249 | 愁 2250 | 耐 2251 | 宪 2252 | 覆 2253 | 盟 2254 | 昭 2255 | 握 2256 | 萧 2257 | 延 2258 | 豆 2259 | 弱 2260 | 隆 2261 | 页 2262 | 烧 2263 | 遍 2264 | 距 2265 | 摩 2266 | 祖 2267 | 探 2268 | 倚 2269 | 寂 2270 | 阴 2271 | 悔 2272 | 库 2273 | 嘴 2274 | 沉 2275 | 伊 2276 | 暂 2277 | 霸 2278 | 喵 2279 | 频 2280 | 鼓 2281 | 冒 2282 | 鼠 2283 | 企 2284 | 副 2285 | 菜 2286 | 款 2287 | 忽 2288 | 尾 2289 | 租 2290 | 椰 2291 | 隔 2292 | 狼 2293 | 浮 2294 | 惠 2295 | 峰 2296 | 索 2297 | 芳 2298 | 摇 2299 | 洪 2300 | 伦 2301 | 骨 2302 | 吹 2303 | 郑 2304 | 哩 2305 | 珍 2306 | 纳 2307 | 零 2308 | 哲 2309 | 遭 2310 | 瓶 2311 | 亡 2312 | 振 2313 | 予 2314 | 村 2315 | 旅 2316 | 惨 2317 | 汽 2318 | 爸 2319 | 隐 2320 | 械 2321 | 寒 2322 | 危 2323 | 邮 2324 | 贝 2325 | 阶 2326 | 赖 2327 | 茶 2328 | 谊 2329 | 涛 2330 | 惯 2331 | 尘 2332 | 丝 2333 | 森 2334 | 询 2335 | 露 2336 | 稳 2337 | 桥 2338 | 夏 2339 | 哭 2340 | 坚 2341 | 籍 2342 | 厌 2343 | 苍 2344 | 析 2345 | 冰 2346 | 仙 2347 | 布 2348 | 箱 2349 | 脱 2350 | 贤 2351 | 途 2352 | 订 2353 | 财 2354 | 欧 2355 | 赢 2356 | 枢 2357 | 泪 2358 | 废 2359 | 钢 2360 | 渐 2361 | 泡 2362 | 刊 2363 | 肯 2364 | 恨 2365 | 砍 2366 | 抽 2367 | 股 2368 | 咧 2369 | 婆 2370 | 禁 2371 | 郎 2372 | 默 2373 | 符 2374 | 缩 2375 | 童 2376 | 绿 2377 | 骗 2378 | 辈 2379 | 尼 2380 | 届 2381 | 彼 2382 | 兮 2383 | 聚 2384 | 宇 2385 | 辛 2386 | 疯 2387 | 减 2388 | 米 2389 | 念 2390 | 降 2391 | 街 2392 | 临 2393 | 敏 2394 | 洗 2395 | 玉 2396 | 伴 2397 | 辅 2398 | 诺 2399 | 鸡 2400 | 侠 2401 | 健 2402 | 熊 2403 | 顶 2404 | 挑 2405 | 替 2406 | 豪 2407 | 掌 2408 | 饭 2409 | 银 2410 | 圆 2411 | 志 2412 | 休 2413 | 材 2414 | 灭 2415 | 烈 2416 | 爆 2417 | 透 2418 | 遗 2419 | 虚 2420 | 醒 2421 | 货 2422 | 雅 2423 | 宏 2424 | 帅 2425 | 宫 2426 | 港 2427 | 偶 2428 | 丢 2429 | 篮 2430 | 凡 2431 | 瑞 2432 | 硕 2433 | 雪 2434 | 忠 2435 | 蔡 2436 | 插 2437 | 积 2438 | 乖 2439 | 挥 2440 | 抗 2441 | 察 2442 | 末 2443 | 盖 2444 | 厅 2445 | 移 2446 | 吸 2447 | 括 2448 | 笨 2449 | 孤 2450 | 译 2451 | 避 2452 | 秀 2453 | 富 2454 | 漂 2455 | 柔 2456 | 私 2457 | 围 2458 | 狮 2459 | 祝 2460 | 庆 2461 | 序 2462 | 拥 2463 | 洲 2464 | 徒 2465 | 借 2466 | 晓 2467 | 嘉 2468 | 诗 2469 | 淡 2470 | 束 2471 | 姓 2472 | 颗 2473 | 勇 2474 | 犯 2475 | 喝 2476 | 食 2477 | 镜 2478 | 偏 2479 | 猜 2480 | 层 2481 | 帐 2482 | 仅 2483 | 购 2484 | 衣 2485 | 申 2486 | 伯 2487 | 紧 2488 | 县 2489 | 婚 2490 | 季 2491 | 敬 2492 | 弃 2493 | 尊 2494 | 蛋 2495 | 鹰 2496 | 熟 2497 | 冠 2498 | 唯 2499 | 混 2500 | 藏 2501 | 河 2502 | 忍 2503 | 窗 2504 | 朝 2505 | 轮 2506 | 册 2507 | 乡 2508 | 敌 2509 | 散 2510 | 沙 2511 | 幻 2512 | 短 2513 | 略 2514 | 批 2515 | 游 2516 | 奖 2517 | 岛 2518 | 逢 2519 | 脸 2520 | 顾 2521 | 督 2522 | 协 2523 | 雷 2524 | 详 2525 | 穿 2526 | 慧 2527 | 巧 2528 | 罢 2529 | 呼 2530 | 暗 2531 | 贴 2532 | 纸 2533 | 歉 2534 | 郭 2535 | 努 2536 | 担 2537 | 蓝 2538 | 训 2539 | 享 2540 | 架 2541 | 济 2542 | 猪 2543 | 派 2544 | 均 2545 | 妈 2546 | 哦 2547 | 宣 2548 | 检 2549 | 鬼 2550 | 灯 2551 | 策 2552 | 梅 2553 | 启 2554 | 嘿 2555 | 洋 2556 | 伟 2557 | 萤 2558 | 磁 2559 | 啰 2560 | 付 2561 | 弄 2562 | 寄 2563 | 钟 2564 | 播 2565 | 险 2566 | 载 2567 | 赏 2568 | 汉 2569 | 块 2570 | 刀 2571 | 铭 2572 | 施 2573 | 卫 2574 | 弹 2575 | 售 2576 | 叶 2577 | 皆 2578 | 罪 2579 | 虎 2580 | 归 2581 | 毛 2582 | 昨 2583 | 荣 2584 | 律 2585 | 树 2586 | 奏 2587 | 注 2588 | 扁 2589 | 笔 2590 | 旁 2591 | 键 2592 | 制 2593 | 莫 2594 | 堆 2595 | 射 2596 | 承 2597 | 波 2598 | 皮 2599 | 释 2600 | 判 2601 | 含 2602 | 既 2603 | 退 2604 | 纪 2605 | 刻 2606 | 肉 2607 | 靠 2608 | 麻 2609 | 湖 2610 | 继 2611 | 诚 2612 | 姐 2613 | 益 2614 | 置 2615 | 惜 2616 | 艺 2617 | 尚 2618 | 纯 2619 | 骂 2620 | 琴 2621 | 漫 2622 | 援 2623 | 缺 2624 | 诸 2625 | 尤 2626 | 忆 2627 | 景 2628 | 府 2629 | 委 2630 | 刘 2631 | 绍 2632 | 虑 2633 | 暴 2634 | 草 2635 | 充 2636 | 授 2637 | 防 2638 | 素 2639 | 房 2640 | 搞 2641 | 典 2642 | 仔 2643 | 父 2644 | 吉 2645 | 招 2646 | 剑 2647 | 脚 2648 | 突 2649 | 牌 2650 | 餐 2651 | 仁 2652 | 酒 2653 | 礼 2654 | 巴 2655 | 丽 2656 | 亮 2657 | 恐 2658 | 述 2659 | 周 2660 | 杂 2661 | 旧 2662 | 套 2663 | 赵 2664 | 堂 2665 | 创 2666 | 母 2667 | 辑 2668 | 络 2669 | 俊 2670 | 毒 2671 | 威 2672 | 冷 2673 | 蛮 2674 | 普 2675 | 登 2676 | 微 2677 | 控 2678 | 爽 2679 | 香 2680 | 坐 2681 | 缘 2682 | 幕 2683 | 兰 2684 | 悲 2685 | 势 2686 | 午 2687 | 睡 2688 | 密 2689 | 垒 2690 | 警 2691 | 宗 2692 | 严 2693 | 阵 2694 | 江 2695 | 亚 2696 | 攻 2697 | 静 2698 | 抱 2699 | 啥 2700 | 急 2701 | 宿 2702 | 剧 2703 | 词 2704 | 忙 2705 | 牛 2706 | 吴 2707 | 陆 2708 | 维 2709 | 激 2710 | 增 2711 | 聊 2712 | 浪 2713 | 状 2714 | 良 -------------------------------------------------------------------------------- /config/ik/preposition.dic: -------------------------------------------------------------------------------- 1 | 不 2 | 也 3 | 了 4 | 仍 5 | 从 6 | 以 7 | 使 8 | 则 9 | 却 10 | 又 11 | 及 12 | 对 13 | 就 14 | 并 15 | 很 16 | 或 17 | 把 18 | 是 19 | 的 20 | 着 21 | 给 22 | 而 23 | 被 24 | 让 25 | 但 -------------------------------------------------------------------------------- /config/ik/quantifier.dic: -------------------------------------------------------------------------------- 1 | 丈 2 | 下 3 | 世 4 | 世纪 5 | 两 6 | 个 7 | 中 8 | 串 9 | 亩 10 | 人 11 | 介 12 | 付 13 | 代 14 | 件 15 | 任 16 | 份 17 | 伏 18 | 伙 19 | 位 20 | 位数 21 | 例 22 | 倍 23 | 像素 24 | 元 25 | 克 26 | 克拉 27 | 公亩 28 | 公克 29 | 公分 30 | 公升 31 | 公尺 32 | 公担 33 | 公斤 34 | 公里 35 | 公顷 36 | 具 37 | 册 38 | 出 39 | 刀 40 | 分 41 | 分钟 42 | 分米 43 | 划 44 | 列 45 | 则 46 | 刻 47 | 剂 48 | 剑 49 | 副 50 | 加仑 51 | 勺 52 | 包 53 | 匙 54 | 匹 55 | 区 56 | 千克 57 | 千米 58 | 升 59 | 卷 60 | 厅 61 | 厘 62 | 厘米 63 | 双 64 | 发 65 | 口 66 | 句 67 | 只 68 | 台 69 | 叶 70 | 号 71 | 名 72 | 吨 73 | 听 74 | 员 75 | 周 76 | 周年 77 | 品 78 | 回 79 | 团 80 | 圆 81 | 圈 82 | 地 83 | 场 84 | 块 85 | 坪 86 | 堆 87 | 声 88 | 壶 89 | 处 90 | 夜 91 | 大 92 | 天 93 | 头 94 | 套 95 | 女 96 | 孔 97 | 字 98 | 宗 99 | 室 100 | 家 101 | 寸 102 | 对 103 | 封 104 | 尊 105 | 小时 106 | 尺 107 | 尾 108 | 局 109 | 层 110 | 届 111 | 岁 112 | 师 113 | 帧 114 | 幅 115 | 幕 116 | 幢 117 | 平方 118 | 平方公尺 119 | 平方公里 120 | 平方分米 121 | 平方厘米 122 | 平方码 123 | 平方米 124 | 平方英寸 125 | 平方英尺 126 | 平方英里 127 | 平米 128 | 年 129 | 年代 130 | 年级 131 | 度 132 | 座 133 | 式 134 | 引 135 | 张 136 | 成 137 | 战 138 | 截 139 | 户 140 | 房 141 | 所 142 | 扇 143 | 手 144 | 打 145 | 批 146 | 把 147 | 折 148 | 担 149 | 拍 150 | 招 151 | 拨 152 | 拳 153 | 指 154 | 掌 155 | 排 156 | 撮 157 | 支 158 | 文 159 | 斗 160 | 斤 161 | 方 162 | 族 163 | 日 164 | 时 165 | 曲 166 | 月 167 | 月份 168 | 期 169 | 本 170 | 朵 171 | 村 172 | 束 173 | 条 174 | 来 175 | 杯 176 | 枚 177 | 枝 178 | 枪 179 | 架 180 | 柄 181 | 柜 182 | 栋 183 | 栏 184 | 株 185 | 样 186 | 根 187 | 格 188 | 案 189 | 桌 190 | 档 191 | 桩 192 | 桶 193 | 梯 194 | 棵 195 | 楼 196 | 次 197 | 款 198 | 步 199 | 段 200 | 毛 201 | 毫 202 | 毫升 203 | 毫米 204 | 毫克 205 | 池 206 | 洲 207 | 派 208 | 海里 209 | 滴 210 | 炮 211 | 点 212 | 点钟 213 | 片 214 | 版 215 | 环 216 | 班 217 | 瓣 218 | 瓶 219 | 生 220 | 男 221 | 画 222 | 界 223 | 盆 224 | 盎司 225 | 盏 226 | 盒 227 | 盘 228 | 相 229 | 眼 230 | 石 231 | 码 232 | 碗 233 | 碟 234 | 磅 235 | 种 236 | 科 237 | 秒 238 | 秒钟 239 | 窝 240 | 立方公尺 241 | 立方分米 242 | 立方厘米 243 | 立方码 244 | 立方米 245 | 立方英寸 246 | 立方英尺 247 | 站 248 | 章 249 | 笔 250 | 等 251 | 筐 252 | 筒 253 | 箱 254 | 篇 255 | 篓 256 | 篮 257 | 簇 258 | 米 259 | 类 260 | 粒 261 | 级 262 | 组 263 | 维 264 | 缕 265 | 缸 266 | 罐 267 | 网 268 | 群 269 | 股 270 | 脚 271 | 船 272 | 艇 273 | 艘 274 | 色 275 | 节 276 | 英亩 277 | 英寸 278 | 英尺 279 | 英里 280 | 行 281 | 袋 282 | 角 283 | 言 284 | 课 285 | 起 286 | 趟 287 | 路 288 | 车 289 | 转 290 | 轮 291 | 辆 292 | 辈 293 | 连 294 | 通 295 | 遍 296 | 部 297 | 里 298 | 重 299 | 针 300 | 钟 301 | 钱 302 | 锅 303 | 门 304 | 间 305 | 队 306 | 阶段 307 | 隅 308 | 集 309 | 页 310 | 顶 311 | 顷 312 | 项 313 | 顿 314 | 颗 315 | 餐 316 | 首 -------------------------------------------------------------------------------- /config/ik/stopword.dic: -------------------------------------------------------------------------------- 1 | a 2 | an 3 | and 4 | are 5 | as 6 | at 7 | be 8 | but 9 | by 10 | for 11 | if 12 | in 13 | into 14 | is 15 | it 16 | no 17 | not 18 | of 19 | on 20 | or 21 | such 22 | that 23 | the 24 | their 25 | then 26 | there 27 | these 28 | they 29 | this 30 | to 31 | was 32 | will 33 | with -------------------------------------------------------------------------------- /config/ik/suffix.dic: -------------------------------------------------------------------------------- 1 | 乡 2 | 井 3 | 亭 4 | 党 5 | 区 6 | 厅 7 | 县 8 | 园 9 | 塔 10 | 家 11 | 寺 12 | 局 13 | 巷 14 | 市 15 | 弄 16 | 所 17 | 斯基 18 | 楼 19 | 江 20 | 河 21 | 海 22 | 湖 23 | 省 24 | 维奇 25 | 署 26 | 苑 27 | 街 28 | 觀 29 | 观 30 | 诺夫 31 | 路 32 | 部 33 | 镇 34 | 阁 35 | 山 36 | 子 37 | 娃 -------------------------------------------------------------------------------- /config/ik/surname.dic: -------------------------------------------------------------------------------- 1 | 丁 2 | 万 3 | 万俟 4 | 上官 5 | 东方 6 | 乔 7 | 于 8 | 令狐 9 | 仲孙 10 | 任 11 | 何 12 | 余 13 | 候 14 | 傅 15 | 公冶 16 | 公孙 17 | 公羊 18 | 冯 19 | 刘 20 | 单 21 | 单于 22 | 卢 23 | 史 24 | 叶 25 | 司徒 26 | 司空 27 | 司马 28 | 吕 29 | 吴 30 | 周 31 | 唐 32 | 夏 33 | 夏侯 34 | 太叔 35 | 姚 36 | 姜 37 | 孔 38 | 孙 39 | 孟 40 | 宇文 41 | 宋 42 | 宗政 43 | 尉迟 44 | 尹 45 | 崔 46 | 常 47 | 康 48 | 廖 49 | 张 50 | 彭 51 | 徐 52 | 慕容 53 | 戴 54 | 文 55 | 方 56 | 易 57 | 曹 58 | 曾 59 | 朱 60 | 李 61 | 杜 62 | 杨 63 | 林 64 | 梁 65 | 欧阳 66 | 武 67 | 段 68 | 毛 69 | 江 70 | 汤 71 | 沈 72 | 淳于 73 | 潘 74 | 澹台 75 | 濮阳 76 | 熊 77 | 王 78 | 田 79 | 申屠 80 | 白 81 | 皇甫 82 | 石 83 | 秦 84 | 程 85 | 罗 86 | 肖 87 | 胡 88 | 苏 89 | 范 90 | 董 91 | 蒋 92 | 薛 93 | 袁 94 | 许 95 | 诸葛 96 | 谢 97 | 谭 98 | 贺 99 | 贾 100 | 赖 101 | 赫连 102 | 赵 103 | 轩辕 104 | 邓 105 | 邱 106 | 邵 107 | 邹 108 | 郑 109 | 郝 110 | 郭 111 | 金 112 | 钟 113 | 钟离 114 | 钱 115 | 长孙 116 | 闻人 117 | 闾丘 118 | 阎 119 | 陆 120 | 陈 121 | 雷 122 | 韩 123 | 顾 124 | 马 125 | 高 126 | 魏 127 | 鲜于 128 | 黄 129 | 黎 130 | 龙 131 | 龚 -------------------------------------------------------------------------------- /libs/opennlp-tools-1.6.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlkmhd/persian-analyzer-elasticsearch/21f2a9fe323a1c97d3d0a063579195e4d2e347d3/libs/opennlp-tools-1.6.0.jar -------------------------------------------------------------------------------- /libs/stanford-corenlp-3.6.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlkmhd/persian-analyzer-elasticsearch/21f2a9fe323a1c97d3d0a063579195e4d2e347d3/libs/stanford-corenlp-3.6.0.jar -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | elasticsearch-analysis-farsi 5 | 4.0.0 6 | org.elasticsearch 7 | elasticsearch-analysis-farsi 8 | 1.0.0 9 | jar 10 | persian analyzer for Elasticsearch 11 | 2016 12 | 13 | 14 | 2.2.1 15 | 1.7 16 | ${project.basedir}/src/main/assemblies/plugin.xml 17 | analysis-farsi 18 | org.elasticsearch.plugin.analysis.farsi.AnalysisFarsiPlugin 19 | true 20 | false 21 | true 22 | 23 | 24 | 25 | 26 | The Apache Software License, Version 2.0 27 | http://www.apache.org/licenses/LICENSE-2.0.txt 28 | repo 29 | 30 | 31 | 32 | 33 | org.sonatype.oss 34 | oss-parent 35 | 7 36 | 37 | 38 | 39 | 40 | oss.sonatype.org 41 | OSS Sonatype 42 | 43 | true 44 | 45 | 46 | true 47 | 48 | http://oss.sonatype.org/content/repositories/releases/ 49 | 50 | 51 | 52 | 53 | 54 | org.elasticsearch 55 | elasticsearch 56 | ${elasticsearch.version} 57 | compile 58 | 59 | 60 | 61 | org.apache.httpcomponents 62 | httpclient 63 | 4.4.1 64 | 65 | 66 | 67 | log4j 68 | log4j 69 | 1.2.16 70 | runtime 71 | 72 | 73 | 74 | org.hamcrest 75 | hamcrest-core 76 | 1.3.RC2 77 | test 78 | 79 | 80 | 81 | org.hamcrest 82 | hamcrest-library 83 | 1.3.RC2 84 | test 85 | 86 | 87 | junit 88 | junit 89 | 4.10 90 | test 91 | 92 | 93 | 94 | commons-io 95 | commons-io 96 | 2.4 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | org.apache.maven.plugins 105 | maven-compiler-plugin 106 | 2.3.2 107 | 108 | 1.6 109 | 1.6 110 | 111 | 112 | 113 | org.apache.maven.plugins 114 | maven-surefire-plugin 115 | 2.11 116 | 117 | 118 | **/*Tests.java 119 | 120 | 121 | 122 | 123 | org.apache.maven.plugins 124 | maven-source-plugin 125 | 2.1.2 126 | 127 | 128 | attach-sources 129 | 130 | jar 131 | 132 | 133 | 134 | 135 | 136 | maven-assembly-plugin 137 | 138 | ${project.build.directory}/releases/ 139 | 140 | ${basedir}/src/main/assemblies/plugin.xml 141 | 142 | 143 | 144 | fully.qualified.MainClass 145 | 146 | 147 | 148 | 149 | 150 | package 151 | 152 | single 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /src/main/assemblies/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | zip 5 | 6 | false 7 | 8 | 9 | ${project.basedir}/config 10 | /config 11 | 12 | 13 | 14 | 15 | 16 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 17 | 18 | true 19 | 20 | 21 | 22 | 23 | / 24 | true 25 | true 26 | 27 | org.elasticsearch:elasticsearch 28 | 29 | 30 | 31 | / 32 | true 33 | true 34 | 35 | org.apache.httpcomponents:httpclient 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /src/main/java/ir/areka/analyzer/lucene/FarsiAnalyzer.java: -------------------------------------------------------------------------------- 1 | package ir.areka.analyzer.lucene; 2 | 3 | import java.io.StringReader; 4 | 5 | import org.apache.lucene.analysis.Analyzer; 6 | import org.apache.lucene.analysis.TokenFilter; 7 | import org.apache.lucene.analysis.TokenStream; 8 | import org.apache.lucene.analysis.Tokenizer; 9 | import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; 10 | import org.apache.lucene.analysis.core.LowerCaseFilter; 11 | import org.apache.lucene.analysis.fa.PersianNormalizationFilter; 12 | import org.apache.lucene.analysis.standard.StandardTokenizer; 13 | 14 | public class FarsiAnalyzer extends Analyzer { 15 | 16 | @Override 17 | protected TokenStreamComponents createComponents(String paramString) { 18 | 19 | /** 20 | * tokenization 21 | */ 22 | Tokenizer tokenizer = new StandardTokenizer(); 23 | tokenizer.setReader(new StringReader(paramString)); 24 | 25 | /** 26 | * normilization 27 | */ 28 | TokenStream res = new ArabicNormalizationFilter(tokenizer); 29 | res = new LowerCaseFilter(res); 30 | res = new PersianNormalizationFilter(res); 31 | res = new FarsiNormilizerFilter(res); 32 | 33 | /** 34 | * filter stop words 35 | */ 36 | TokenFilter filter = new FarsiStopWordTokenFilter(res); 37 | 38 | return new TokenStreamComponents(tokenizer, filter); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/ir/areka/analyzer/lucene/FarsiNormilizerFilter.java: -------------------------------------------------------------------------------- 1 | package ir.areka.analyzer.lucene; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | 9 | public class FarsiNormilizerFilter extends TokenFilter { 10 | 11 | private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); 12 | 13 | protected FarsiNormilizerFilter(TokenStream input) { 14 | super(input); 15 | } 16 | 17 | @Override 18 | public boolean incrementToken() throws IOException { 19 | if (input.incrementToken()) { 20 | for(int i=0; i 0) 37 | posIncrementAtt.setPositionIncrement(posIncrementAtt.getPositionIncrement() + extraIncrement); 38 | 39 | return returnValue; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/ir/areka/analyzer/lucene/perstem.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Written by Jon Dehdari 2004-2013 3 | # Perstem: Stemmer and Morphological Parser for Persian 4 | # The license is the GPL v.3 (www.fsf.org) 5 | # Usage: perl perstem.pl [options] < input > output 6 | 7 | use 5.8.0; 8 | use strict; 9 | #use warnings; 10 | #use diagnostics; 11 | use Getopt::Long; 12 | 13 | my $version = '2.2'; 14 | my $date = '2013-10-21'; 15 | my $copyright = '(c) 2004-2013 Jon Dehdari - GPL v3'; 16 | my $title = "Perstem: Persian stemmer $version, $date - $copyright"; 17 | my ( $flush, $use_irreg_stems, $no_roman, $pos, $recall, $show_infinitival_form, $show_only_stem, $skip_comments, $tokenize, $unvowel, $zwnj ) = undef; 18 | my ( $pos_aj, $pos_aux, $pos_n, $pos_v, $pos_other, $before_resolve ) = undef; 19 | my (%resolve, %irreg_stems) = undef; 20 | my $ar_chars = 'BEqHSTDZLVU'; 21 | #my $longvowel = 'AuiO'; 22 | ### Temporary placement here 23 | my $irreg_stems = "O\tOm\nOmuz\tOmux\nAndAz\tAndAx\nAst\tbu\nbA\tbAis\nbnd\tbs\nbAC\tbu\npz\tpx\npLir\tpLirf\nprdAz\tprdAx\npiund\tpius\ntuAn\ttuAns\nju\tjs\nxuAh\txuAs\ndh\tdA\ndAr\tdAC\ndAn\tdAns\nbin\tdi\nru\trf\nzn\tz\nsAz\tsAx\nspAr\tspr\nCA\tCAis\nCu\tC\nCkn\tCks\nCmAr\tCmr\nCnAs\tCnAx\nCnu\tCni\nfruC\tfrux\nfCAr\tfCr\nkn\tkr\ngLAr\tgLAC\ngLr\tgLC\ngir\tgrf\ngrd\tgC\ngu\tgf\nmir\tmr\nnmA\tnmu\nnuis\tnuC\nhs\tbu\niAb\tiAf\n"; 24 | ## The "+idan and +Adan" verbs are regular going from past to present, but not the other way around (which is what we must do) 25 | my $semi_reg_stems = "Aft\tAftA\nAist\tAistA\nfrst\tfrstA\nbxC\tbxCi\nprs\tprsi\npic\tpici\ntrs\ttrsi\ncrx\tcrxi\nxr\txri\nrs\trsi\nfhm\tfhmi\nkC\tkCi\nkuC\tkuCi\n"; 26 | 27 | ### Defaults 28 | my $form = 'dict'; 29 | my $pos_sep = '/'; 30 | my $input_type = 'utf8'; # default input is UTF-8 31 | my $output_type = 'utf8'; # default output is UTF-8 32 | $tokenize = 1; 33 | $use_irreg_stems = 1; 34 | $zwnj = 1; 35 | 36 | my $usage = <<"END_OF_USAGE"; 37 | ${title} 38 | 39 | Usage: perl $0 [options] < input > output 40 | 41 | Function: Persian (Farsi) stemmer, morphological analyzer, transliterator, 42 | and partial part-of-speech tagger. 43 | 44 | Options: 45 | -f, --form Output forms as one of the following: 46 | dict: as they appear in a dictionary (default) 47 | linked: show all morphemes, linked together 48 | unlinked: show all morphemes as separate tokens 49 | untouched: don't stem/analyze; mostly for char-set conversion 50 | --flush Autoflush buffer output after every line 51 | -h, --help Print this usage 52 | -i, --input Input character encoding type {cp1256,isiri3342,ncr, 53 | translit,utf8} (default: $input_type) 54 | --irreg-stem {0|1} Resolve irregular present-tense verb stems to their 55 | past-tense stems (eg. kon -> kar). (default: 1 == true) 56 | -n, --noroman Delete all non-Arabic script characters (eg. HTML tags) 57 | -o, --output Output character encoding type {arabtex,cp1256, 58 | isiri3342,ncr,translit,utf8} (default: $output_type) 59 | -p, --pos Tag inflected words for parts of speech 60 | --pos-sep Separate words from their parts of speech by 61 | (default: "$pos_sep" ) 62 | -r, --recall Increase recall by parsing ambiguous affixes; may lower 63 | precision 64 | --skip-comments Skip commented-out lines, without printing them 65 | -s, --stem Return only word stems 66 | -t, --tokenize {0|1} Tokenize punctuation (default: 1 == true) 67 | -u, --unvowel Remove short vowels 68 | -v, --version Print version ($version) 69 | -z, --zwnj {0|1} Insert Zero Width Non-Joiners where they should be (default: 1 == true) 70 | 71 | END_OF_USAGE 72 | # -s, --stoplist Use external stopword list 73 | 74 | GetOptions( 75 | 'f|form=s' => \$form, 76 | 'flush' => \$flush, 77 | 'h|help|?' => sub { print $usage; exit; }, 78 | 'infinitive' => \$show_infinitival_form, 79 | 'i|input=s' => \$input_type, 80 | 'irreg-stem=i' => \$use_irreg_stems, 81 | 'n|noroman' => \$no_roman, 82 | 'o|output=s' => \$output_type, 83 | 'p|pos' => \$pos, 84 | 'pos-sep:s' => \$pos_sep, 85 | 'r|recall' => \$recall, 86 | 'skip-comments' => \$skip_comments, 87 | # 's|stoplist:s' => \$resolve_file, 88 | 's|stem' => \$show_only_stem, 89 | 't|tokenize=i' => \$tokenize, 90 | 'u|unvowel' => \$unvowel, 91 | 'v|version' => sub { print "$version\n"; exit; }, 92 | 'z|zwnj=i' => \$zwnj, 93 | ) or die $usage; 94 | 95 | ### Postprocess command-line arguments 96 | $input_type =~ s/.*1256/cp1256/; # equates win1256 with cp1256 97 | $output_type =~ s/.*1256/cp1256/; # equates win1256 with cp1256 98 | $input_type =~ tr/[A-Z]/[a-z]/; # recognizes more encoding spelling variants 99 | $output_type =~ tr/[A-Z]/[a-z]/; # recognizes more encoding spelling variants 100 | $input_type =~ tr/-//; # eg. UTF-8 & utf8 101 | $output_type =~ tr/-//; # eg. UTF-8 & utf8 102 | 103 | if ($form eq 'dict') { 104 | $use_irreg_stems = 1; 105 | $show_only_stem = 1; 106 | $show_infinitival_form = 1; 107 | } 108 | 109 | 110 | ### Open Resolve section 111 | while (my $resolve = ) { 112 | next if $resolve =~ /^#/; 113 | chomp $resolve; 114 | my @resolve = split /\t/, $resolve; 115 | $resolve{"$resolve[0]"} = [$resolve[1], $resolve[2]]; 116 | } 117 | 118 | ### Open Irregular Verb Stem section 119 | if ($use_irreg_stems) { 120 | $irreg_stems .= $semi_reg_stems; 121 | my @lines = split "\n", $irreg_stems; 122 | foreach (@lines) { 123 | next if m/^#/; 124 | chomp; 125 | my @line = split /\t/, $_; 126 | $irreg_stems{"$line[0]"} = [ $line[1] ]; 127 | } 128 | } 129 | 130 | 131 | ### A hack for what Perl should have already done: support at runtime BOTH utf8 & other input/output types 132 | if ($input_type eq 'utf8') { # UTF-8 input 133 | use encoding "utf8"; 134 | open STDIN, "<:encoding(UTF-8)" ; 135 | } 136 | elsif ($output_type eq 'utf8') { # UTF-8 output 137 | use encoding "utf8"; 138 | open STDOUT, "<:encoding(UTF-8)" ; 139 | } 140 | else { unimport encoding "utf8";} 141 | 142 | 143 | ### Autoflush buffers, for piping STDOUT 144 | $| = 1 if $flush; 145 | 146 | 147 | while (<>) { 148 | 149 | my $full_line; 150 | 151 | if ( /^$/ | /^\s+$/ | /^#/ ) { # Treat empty or commented-out lines 152 | if ($skip_comments) { next; } # Don't even print them out 153 | else { print; next; } # At least print them out 154 | } 155 | tr/\r/\n/d; # Deletes lame DOS carriage returns 156 | s/\n/ ====/; # Converts newlines to temporary placeholder ==== 157 | 158 | ### Tokenizes punctuation 159 | if ( $tokenize ) { 160 | s/([,.;:!?(){}«»"#\/])/ $1 /g; # Pads punctuation w/ spaces 161 | s/(?/\n/g; 177 | s/

/\n/g; 178 | tr/\x01-\x09\x1b-\x1f\x21-\x2d\x2f-\x5a\x5c\x5e-\x9f//d; # Deletes all chars below xa0 except: 0a,20,2e,5b,5d 179 | } 180 | 181 | if ($input_type eq 'utf8') { 182 | tr/اأبپتثجچحخدذرزژسشصضطظعغفقكگلمنوهيَُِآةکیءىۀئؤًّ،؛؟٪‍‌/ABbptVjcHxdLrzJsCSDTZEGfqkglmnuhiaoeOPkiMiXIUN~,;?%*\-/; 183 | } 184 | 185 | elsif ($input_type eq 'ncr') { 186 | my %unihtml2roman = ( 187 | 'ا' => 'A', '☿' => 'A', 'أ' => 'B', 'ب' => 'b', 'ة' => 'P', 'پ' => 'p', 'ت' => 't', 'ث' => 'V', 'ج' => 'j', 'چ' => 'c', 'ح' => 'H', 'خ' => 'x', 'د' => 'd', 'ذ' => 'L', 'ر' => 'r', 'ز' => 'z', 'ژ' => 'J', 'س' => 's', 'ش' => 'C', 'ص' => 'S', 'ض' => 'D', 'ط' => 'T', 'ظ' => 'Z', 'ع' => 'E', 'غ' => 'G', 'ف' => 'f', 'ق' => 'q', 'ك' => 'k', 'ک' => 'k', 'گ' => 'g', 'ل' => 'l', 'م' => 'm', 'ن' => 'n', 'و' => 'u', 'ه' => 'h', 'ي' => 'i', 'ی' => 'i', 'ى' => 'A', 'َ' => 'a', 'ُ' => 'o', 'ِ' => 'e', 'ّ' => '~', 'آ' => 'O', 'ء' => 'M', 'ً' => 'N', 'أ' => 'A', 'ؤ' => 'U', 'إ' => 'A', 'ئ' => 'I', 'ۀ' => 'X', '٪' => '%', '،' => ',', '؛' => ';', '؟' => '?', '‌' => "-", ' ' => ' ', '.' => '.', ':' => ':', ); 188 | my @charx = split(/(?=\&\#)|(?=\s)|(?=\n)/, $_); 189 | $_ = ""; 190 | foreach my $charx (@charx) { 191 | $_ .= $unihtml2roman{$charx}; 192 | } 193 | } # ends elsif ($input_type eq 'ncr') 194 | 195 | elsif ($input_type eq 'cp1256') { 196 | tr/\xc7\xc3\xc8\x81\xca\xcb\xcc\x8d\xcd\xce\xcf\xd0\xd1\xd2\x8e\xd3\xd4\xd5\xd6\xd8\xd9\xda\xdb\xdd\xde\xdf\x90\xe1\xe3\xe4\xe6\xe5\xed\xf3\xf5\xf6\xc2\xc9\x98\xc1\xc0\xc6\xc4\xf0\xf8\xa1\xba\xbf\xab\xbb\x9d\xec/ABbptVjcHxdLrzJsCSDTZEGfqkglmnuhiaoeOPkMXIUN~,;?{}\-i/; } 197 | 198 | elsif ($input_type eq 'isiri3342') { 199 | tr/\xc1\xf8\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xfe\xf0\xf2\xf1\xc0\xc1\xfc\xda\xe1\xc2\xfb\xfa\xf3\xf6\xac\xbb\xbf\xa5\xe7\xe6\xa1/ABbptVjcHxdLrzJsCSDTZEGfqKglmnuhyaoeO\x7cPkiMIUN~,;?%{}\-/; } 200 | 201 | else { die "Perstem error: unrecognized --input type\n\n" . $usage } 202 | 203 | } # if ($input_type) 204 | 205 | @_ = split(/(?\S*?)(?:\S{3}(? mA Ast, but sentence-final punctuation not necessary 337 | 338 | ### Non-verbal ### 339 | s/\b([^+ ]{3,}?)(? 'ا', '|' => 'ا', 'B' => 'أ', 'b' => 'ب', 'p' => 'پ', 't' => 'ت', 'V' => 'ث', 'j' => 'ج', 'c' => 'چ', 'H' => 'ح', 'x' => 'خ', 'd' => 'د', 'L' => 'ذ', 'r' => 'ر', 'z' => 'ز', 'J' => 'ژ', 's' => 'س', 'C' => 'ش', 'S' => 'ص', 'D' => 'ض', 'T' => 'ط', 'Z' => 'ظ', 'E' => 'ع', 'G' => 'غ', 'f' => 'ف', 'q' => 'ق', 'k' => 'ک', 'K' => 'ك', 'g' => 'گ', 'l' => 'ل', 'm' => 'م', 'n' => 'ن', 'u' => 'و', 'v' => 'و', 'w' => 'و', 'h' => 'ه', 'X' => 'ۀ', 'i' => 'ی', 'I' => 'ئ', 'a' => 'َ', 'o' => 'ُ', 'e' => 'ِ', '~' => 'ّ', ',' => '،', ';' => '؛', '?' => '؟', 'O' => 'آ', 'M' => 'ء', 'N' => 'ً', 'U' => 'ؤ', '-' => '‌', ' ' => ' ', '_' => '_', '+' => '+', "\n" => '
', '.' => '‫.‪', ); 457 | my @charx = split(//, $_); 458 | $_ = ''; 459 | foreach my $charx (@charx) { 460 | $_ .= $roman2unihtml{$charx}; 461 | } 462 | } # ends elsif (ncr) 463 | 464 | elsif ($output_type eq 'cp1256') { 465 | tr/ABbptVjcHxdLrzJsCSDTZEGfqKglmnuhyaoeOPkMXIUN~,;?{}\-i/\xc7\xc3\xc8\x81\xca\xcb\xcc\x8d\xcd\xce\xcf\xd0\xd1\xd2\x8e\xd3\xd4\xd5\xd6\xd8\xd9\xda\xdb\xdd\xde\xdf\x90\xe1\xe3\xe4\xe6\xe5\xed\xf3\xf5\xf6\xc2\xc9\x98\xc1\xc0\xc6\xc4\xf0\xf8\xa1\xba\xbf\xab\xbb\x9d\xec/; 466 | 467 | # s/\x2e/\xfe\x2e\xfd/g; # Corrects periods to be RTL embedded; broken 468 | } 469 | 470 | elsif ($output_type eq 'isiri3342') { 471 | tr/ABbptVjcHxdLrzJsCSDTZEGfqKglmnuhyaoeO\x7cPkiMIUN~,;?%{}\-/\xc1\xf8\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xfe\xf0\xf2\xf1\xc0\xc1\xfc\xda\xe1\xc2\xfb\xfa\xf3\xf6\xac\xbb\xbf\xa5\xe7\xe6\xa1/; } 472 | 473 | elsif ($output_type eq 'arabtex') { 474 | my %roman2arabtex = ( 475 | 'A' => 'A', '|' => 'a', 'b' => 'b', 'p' => 'p', 't' => 't', 'V' => '_t', 'j' => 'j', 'c' => '^c', 'H' => '.h', 'x' => 'x', 'd' => 'd', 'L' => '_d', 'r' => 'r', 'z' => 'z', 'J' => '^z', 's' => 's', 'C' => '^s', 'S' => '.s', 'D' => '.d', 'T' => '.t', 'Z' => '.z', 'E' => '`', 'G' => '.g', 'f' => 'f', 'q' => 'q', 'K' => 'k', 'k' => 'k', 'g' => 'g', 'l' => 'l', 'm' => 'm', 'n' => 'n', 'u' => 'U', 'v' => 'w', 'w' => 'w', 'h' => 'h', 'X' => 'H-i', 'i' => 'I', 'I' => '\'y', 'a' => 'a', 'o' => 'o', 'e' => 'e', 'P' => 'T', '~' => '', ',' => ',', ';' => ';', '?' => '?', 'O' => '^A', 'M' => '\'', 'N' => 'aN', 'U' => 'U\'', '{' => '\lq ', '}' => '\rq ', '-' => '\hspace{0ex}', '.' => '.', ' ' => ' ', '_' => '_', '+' => '+', ); 476 | my @charx = split(//, $_); 477 | $_ = ''; 478 | foreach my $charx (@charx) { 479 | $_ .= $roman2arabtex{$charx}; 480 | } 481 | 482 | # $_ .= '\\\\'; # Appends LaTeX newline '\\' after each line 483 | } # ends elsif (arabtex) 484 | 485 | else { die "Perstem error: unrecognized --output type\n\n" . $usage } 486 | 487 | ## Restore temporary Latin doppelgaenger characters to their normal forms 488 | tr/ⓐ-ⓩⒶ-Ⓩ⓿①-⑨⁆⁓‚;⁇‰⁎‐✢/a-zA-Z01-9~,;?%*\-+/; 489 | 490 | if ($output_type eq 'utf8' && m/[^ \n]/) { # If utf8 & non-empty 491 | binmode(STDOUT, ":utf8"); # Uses the :utf8 output layer 492 | $full_line .= "$_ "; 493 | } 494 | elsif ( /[^ \n]/ ) { # if arabic-script line is non-empty 495 | $full_line .= "$_ "; 496 | } 497 | 498 | } # ends if ($output_type ne 'translit') -- for native Perso-Arabic-script input 499 | elsif ( /[^ \n]/ ) { # if latin-script line is non-empty 500 | if ($input_type ne 'translit') { 501 | ## Deal with latin-script strings from arabic-script input 502 | tr/ⓐ-ⓩⒶ-Ⓩ⓿①-⑨⁆⁓‚;⁇‰⁎‐✢/a-zA-Z01-9~,;?%*\-+/; 503 | } 504 | $full_line .= "$_ "; 505 | } 506 | 507 | } # ends foreach @_ 508 | 509 | $full_line =~ s/ $//; 510 | print $full_line; 511 | 512 | } # ends while (<>) 513 | 514 | ### Resolve section 515 | ## The format of the Resolve section ( __DATA__ ) is as follows: 516 | ## 1. Mokassar (broken plurals): 'ktb ktAb' OR 'ktb ktAb_+PL' 517 | ## 2. Preparsed (speed): 'krdn kr_+dn' 518 | ## 3. Don't stem (false positive): 'bArAn bArAn' 519 | ## 4. Stop word (delete): 'u ' 520 | __DATA__ 521 | u u CONJ 522 | iA iA CONJ 523 | AmA AmA CONJ 524 | uli uli CONJ 525 | dr dr P 526 | bh bh P 527 | Az Az P 528 | bA bA P 529 | tA tA P 530 | bi bi P 531 | br br P 532 | brAi brAi P 533 | rui ru_+e P+EZ 534 | Hti Hti P 535 | sui su_+e P+EZ 536 | kh kh C 537 | Ain Ain DT+PROX 538 | On On DT+DIST 539 | ik ik DT 540 | hr hr DT 541 | rA rA ACC 542 | rAi rA_+e ACC+EZ 543 | mi mi MORPH.IPFV 544 | hA hA MORPH.PL 545 | Ai Ai MORPH 546 | hm hm 547 | mn mn PRON+1.SG 548 | tu tu PRON+2.SG 549 | Au Au PRON+3.SG 550 | mA mA PRON+1.PL 551 | CmA CmA PRON+2 552 | AiCAn AiCAn PRON+3.PL 553 | OnhA OnhA PRON+3.PL 554 | OnAn OnAn PRON+3.PL 555 | iki iki PRON+3.SG 556 | Agr Agr PRT+COND 557 | ps ps INTJ 558 | ch ch 559 | hic hic NEG 560 | nh nh NEG 561 | bArAn bArAn N 562 | tim tim N 563 | hfth hfth N 564 | kihAn kihAn N 565 | zndgi zndgi N 566 | sAzmAn sAzmAn N 567 | EnuAn EnuAn N 568 | nZAm nZAm N 569 | jhAn jhAn N 570 | pAiAn pAiAn N 571 | miAn miAn N 572 | frmAndh frmAndh N 573 | nmAindh nmAindh N 574 | nmAiC nmAiC N 575 | nuisndh nuisndh N 576 | prundh prundh N 577 | xndh xndh N 578 | bzrgi bzrg_+i N+ATTR 579 | bEid bEid A 580 | biCtr biC A 581 | digr digr A 582 | nhAii nhAii A 583 | nhAIi nhAii A 584 | frxndh frxndh A 585 | milAdi milAdi A 586 | Oindh O_+ndh A+PRPT 587 | frhngi frhngi 588 | tnhA tnhA 589 | AntxAbAt AntxAbAt N 590 | AstfAdh AstfAdh N 591 | iAzdh iAzdh NUM 592 | duAzdh duAzdh NUM 593 | pAnzdh pAnzdh NUM 594 | sizdh sizdh NUM 595 | CAnzdh CAnzdh NUM 596 | nuzdh nuzdh NUM 597 | miliArd miliArd NUM 598 | rIis rIis N 599 | lndn lndn N 600 | mEdn mEdn N 601 | tmdn tmdn 602 | grdn grdn N 603 | lAdn lAdn 604 | kudn kudn 605 | mAdh mAdh 606 | kilumtr kilumtr N 607 | jAdh jAdh 608 | ktb ktAb N 609 | AfkAr fkr N 610 | AEDA EDu 611 | AfGAnstAn AfGAnstAn N 612 | AslAmi AslAm_+i N 613 | Ardn Ardn N 614 | OmrikA OmrikA N 615 | OmrikAii OmrikA_+i 616 | AnsAni AnsAn_+i N 617 | bnglAdC bnglAdC N 618 | thrAn thrAn N 619 | pArlmAn pArlmAn N 620 | zbAnhAi zbAn_+hA_+e N+PL+EZ 621 | zbAnhA zbAn_+hA N+PL 622 | kCurhAi kCur_+hA_+e N+PL+EZ 623 | kCurhA kCur_+hA N+PL 624 | tBsisAt tBsis_+At N+PL 625 | mrdm mrdm N 626 | dftr dftr N 627 | dfAtr dftr N 628 | dktr dktr N 629 | jAi jA_+e N+EZ 630 | ksAni ks N+PL+INDEF 631 | OVAr AVr N+PL.BROKEN 632 | Amur Amr N+PL.BROKEN 633 | AfrAd frd N+PL.BROKEN 634 | AfrAdi frd_+i N+PL.BROKEN+INDEF 635 | muAd mAdh N+PL.BROKEN 636 | ruAbT rAbTh N+PL.BROKEN 637 | CrAiT CrT N+PL.BROKEN 638 | mnATq mnTqh N+PL.BROKEN 639 | mnAbE mnbE N+PL.BROKEN 640 | msAIl msIlh N+PL.BROKEN 641 | SnAiE SniEh N+PL.BROKEN 642 | ntAij ntijh N+PL.BROKEN 643 | mll mlt N+PL.BROKEN 644 | Hdud Hd N+PL.BROKEN 645 | Hquq Hq N+PL.BROKEN 646 | mrAsm rsm N+PL.BROKEN 647 | AnuAE nuE N+PL.BROKEN 648 | muArd murd N+PL.BROKEN 649 | EuAml EAml N+PL.BROKEN 650 | mrAkz mrkz N+PL.BROKEN 651 | Elum Elm N+PL.BROKEN 652 | nqAT nqTh N+PL.BROKEN 653 | AfkAr fkr N+PL.BROKEN 654 | ASul ASl N+PL.BROKEN 655 | quAnin qAnun N+PL.BROKEN 656 | mnAfE mnfEt N+PL.BROKEN 657 | EnASr EnSr N+PL.BROKEN 658 | ATrAf Trf N+PL.BROKEN 659 | xTuT xT N+PL.BROKEN 660 | EuArD EArDh N+PL.BROKEN 661 | AHzAb Hzb N+PL.BROKEN 662 | AEDAi EDu_+e N+PL.BROKEN+EZ 663 | mrA mn rA 664 | trA tu rA 665 | cist ch Ast 666 | kjAst kjA Ast 667 | xuAhd xuAh_+d AUX+3.SG 668 | bAid bA_+d AUX+3.SG 669 | CAid CA_+d AUX+3.SG 670 | Omdh Om_+dh V+PSPT 671 | Ourdh Our_+dh V+PSPT 672 | Ast Ast V.3.SG.PRS 673 | bAxt bAx_+t V+PST.3.SG 674 | brdh br_+dh V+PSPT 675 | bud bu_+d V+PST.3.SG 676 | budh bu_+dh V+PSPT 677 | budn bu_+dn V+GER 678 | budnd bu_+d_+nd V+PST+3.PL 679 | Cdh C_+dh V+PSPT 680 | Cdn C_+dn V+GER 681 | Cud Cu_+d V.PRS+3.SG 682 | Cundh Cu_+ndh V.PRS+PRPT 683 | dACt dAC_+t V+PST.3.SG 684 | dACth dAC_+th V+PSPT 685 | dAdh dA_+dh V+PSPT 686 | dAdn dA_+dn V+GER 687 | dAdnd dA_+d_+nd V+PST+3.PL 688 | midAd mi-+_dA_+d V+IPFV+PST.3.SG 689 | mi-dAd mi-+_dA_+d V+IPFV+PST.3.SG 690 | dAnst dAns_+t V+PST.3.SG 691 | dArd dAr_+d V.PRS+3.SG 692 | dhd dh_+d V.PRS+3.SG 693 | dhndh dh_+ndh V.PRS+PRPT 694 | didn di_+dn V+GER 695 | didh di_+dh V+PSPT 696 | binndh bin_+ndh V.PRS+PRPT 697 | gft gf_+t V+PST.3.SG 698 | gLACt gLAC_+t V+PST.3.SG 699 | gLACth gLAC_+th V+PSPT 700 | gLCth gLC_+th V+PSPT 701 | grfth grf_+th V+PSPT 702 | grft grf_+t V+PST.3.SG 703 | iAft iAf_+t V+PST.3.SG 704 | kCt kC_+t V+PST.3.SG 705 | knnd kn_+nd V.PRS+3.PL 706 | knndh kn_+ndh V.PRS+PRPT 707 | knd kn_+d V.PRS+3.SG 708 | krdn kr_+dn V+GER 709 | krdh kr_+dh V+PSPT 710 | krdnd kr_+d_+nd V V+PST+3.PL 711 | hst hs_+t V+PST.3.SG 712 | nCdh n+_C_+dh V+NEG+PSPT 713 | nist n+_Ast V+NEG+3.SG.PRS 714 | ntuAnst ntuAns_+t V+PST.3.SG 715 | prdAxt prdAx_+t V+PST.3.SG 716 | rft rf_+t V+PST.3.SG 717 | sAxt sAx_+t V+PST.3.SG 718 | sAxth sAx_+th V+PSPT 719 | tuAnst tuAns_+t V+PST.3.SG 720 | xuAst xuAs_+t V+PST.3.SG 721 | zdh z_+dh V+PSPT 722 | zdn z_+dn V+GER 723 | zdnd z_+d_+nd V+PST+3.PL 724 | znndh zn_+ndh V.PRS+PRPT 725 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/FarsiAnalysisBinderProcessor.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis; 2 | 3 | public class FarsiAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { 4 | 5 | 6 | @Override 7 | public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { 8 | 9 | } 10 | 11 | 12 | @Override 13 | public void processAnalyzers(AnalyzersBindings analyzersBindings) { 14 | analyzersBindings.processAnalyzer("farsi", FarsiAnalyzerProvider.class); 15 | } 16 | 17 | } -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/FarsiAnalyzerProvider.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis; 2 | 3 | import org.elasticsearch.common.inject.Inject; 4 | import org.elasticsearch.common.inject.assistedinject.Assisted; 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.env.Environment; 7 | import org.elasticsearch.index.Index; 8 | import org.elasticsearch.index.settings.IndexSettingsService; 9 | 10 | import ir.areka.analyzer.lucene.FarsiAnalyzer; 11 | 12 | public class FarsiAnalyzerProvider extends AbstractIndexAnalyzerProvider { 13 | 14 | protected FarsiAnalyzer analyzer = new FarsiAnalyzer(); 15 | public static final String NAME = "custome_analyzer"; 16 | 17 | @Inject 18 | public FarsiAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) { 19 | super(index, indexSettingsService.getSettings(), name, settings); 20 | } 21 | 22 | @Override 23 | public FarsiAnalyzer get() { 24 | return analyzer; 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/indices/analysis/FarsiIndicesAnalysis.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.indices.analysis; 2 | 3 | import org.elasticsearch.common.component.AbstractComponent; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.env.Environment; 7 | import org.elasticsearch.index.analysis.AnalyzerScope; 8 | import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory; 9 | 10 | import ir.areka.analyzer.lucene.FarsiAnalyzer; 11 | 12 | public class FarsiIndicesAnalysis extends AbstractComponent { 13 | 14 | @Inject 15 | public FarsiIndicesAnalysis(final Settings settings, 16 | IndicesAnalysisService indicesAnalysisService,Environment env) { 17 | super(settings); 18 | 19 | indicesAnalysisService.analyzerProviderFactories().put("farsi_analyzer", 20 | new PreBuiltAnalyzerProviderFactory("farsi_analyzer", AnalyzerScope.GLOBAL, 21 | new FarsiAnalyzer())); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/indices/analysis/FarsiIndicesAnalysisModule.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.indices.analysis; 2 | 3 | import org.elasticsearch.common.inject.AbstractModule; 4 | 5 | public class FarsiIndicesAnalysisModule extends AbstractModule { 6 | 7 | @Override 8 | protected void configure() { 9 | bind(FarsiIndicesAnalysis.class).asEagerSingleton(); 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/plugin/analysis/farsi/AnalysisFarsiPlugin.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.analysis.farsi; 2 | 3 | import java.util.Collection; 4 | import java.util.Collections; 5 | 6 | import org.elasticsearch.common.inject.Module; 7 | import org.elasticsearch.index.analysis.AnalysisModule; 8 | import org.elasticsearch.index.analysis.FarsiAnalysisBinderProcessor; 9 | import org.elasticsearch.indices.analysis.FarsiIndicesAnalysisModule; 10 | import org.elasticsearch.plugins.Plugin; 11 | 12 | public class AnalysisFarsiPlugin extends Plugin { 13 | 14 | @Override 15 | public String description() { 16 | return "an analyzer for persian"; 17 | } 18 | 19 | @Override 20 | public String name() { 21 | return "farsi_analyzer"; 22 | } 23 | 24 | @Override 25 | public Collection nodeModules() { 26 | return Collections. singletonList(new FarsiIndicesAnalysisModule()); 27 | } 28 | 29 | public void onModule(AnalysisModule module) { 30 | module.addProcessor(new FarsiAnalysisBinderProcessor()); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | # Elasticsearch plugin descriptor file 2 | # This file must exist as 'plugin-descriptor.properties' at 3 | # the root directory of all plugins. 4 | # 5 | # A plugin can be 'site', 'jvm', or both. 6 | # 7 | ### example site plugin for "foo": 8 | # 9 | # foo.zip <-- zip file for the plugin, with this structure: 10 | # _site/ <-- the contents that will be served 11 | # plugin-descriptor.properties <-- example contents below: 12 | # 13 | # site=true 14 | # description=My cool plugin 15 | # version=1.0 16 | # 17 | ### example jvm plugin for "foo" 18 | # 19 | # foo.zip <-- zip file for the plugin, with this structure: 20 | # .jar <-- classes, resources, dependencies 21 | # .jar <-- any number of jars 22 | # plugin-descriptor.properties <-- example contents below: 23 | # 24 | # jvm=true 25 | # classname=foo.bar.BazPlugin 26 | # description=My cool plugin 27 | # version=2.0.0-rc1 28 | # elasticsearch.version=2.0 29 | # java.version=1.7 30 | # 31 | ### mandatory elements for all plugins: 32 | # 33 | # 'description': simple summary of the plugin 34 | description=${project.description} 35 | # 36 | # 'version': plugin's version 37 | version=${project.version} 38 | # 39 | # 'name': the plugin name 40 | name=${elasticsearch.plugin.name} 41 | 42 | ### mandatory elements for site plugins: 43 | # 44 | # 'site': set to true to indicate contents of the _site/ 45 | # directory in the root of the plugin should be served. 46 | site=${elasticsearch.plugin.site} 47 | # 48 | ### mandatory elements for jvm plugins : 49 | # 50 | # 'jvm': true if the 'classname' class should be loaded 51 | # from jar files in the root directory of the plugin. 52 | # Note that only jar files in the root directory are 53 | # added to the classpath for the plugin! If you need 54 | # other resources, package them into a resources jar. 55 | jvm=${elasticsearch.plugin.jvm} 56 | # 57 | # 'classname': the name of the class to load, fully-qualified. 58 | classname=${elasticsearch.plugin.classname} 59 | # 60 | # 'java.version' version of java the code is built against 61 | # use the system property java.specification.version 62 | # version string must be a sequence of nonnegative decimal integers 63 | # separated by "."'s and may have leading zeros 64 | java.version=${maven.compiler.target} 65 | # 66 | # 'elasticsearch.version' version of elasticsearch compiled against 67 | # You will have to release a new version of the plugin for each new 68 | # elasticsearch release. This version is checked when the plugin 69 | # is loaded so Elasticsearch will refuse to start in the presence of 70 | # plugins with the incorrect elasticsearch.version. 71 | elasticsearch.version=${elasticsearch.version} 72 | # 73 | ### deprecated elements for jvm plugins : 74 | # 75 | # 'isolated': true if the plugin should have its own classloader. 76 | # passing false is deprecated, and only intended to support plugins 77 | # that have hard dependencies against each other. If this is 78 | # not specified, then the plugin is isolated by default. 79 | isolated=${elasticsearch.plugin.isolated} 80 | # -------------------------------------------------------------------------------- /src/main/uml/IKAnalysisBinderProcessor.uml: -------------------------------------------------------------------------------- 1 | 2 | 3 | JAVA 4 | org.elasticsearch.index.analysis.IKAnalysisBinderProcessor 5 | 6 | org.elasticsearch.index.analysis.IKAnalysisBinderProcessor 7 | org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.AnalyzersBindings 8 | java.lang.Object 9 | org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor 10 | org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenFiltersBindings 11 | org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.CharFiltersBindings 12 | org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenizersBindings 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | Fields 77 | Methods 78 | Constructors 79 | Inner Classes 80 | Properties 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /testSrc/org/elasticsearch/analysis/test/TestStemmer.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.analysis.test; 2 | 3 | import java.io.StringReader; 4 | 5 | import org.apache.lucene.analysis.Tokenizer; 6 | import org.apache.lucene.analysis.standard.StandardTokenizer; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.junit.Test; 9 | 10 | public class TestStemmer { 11 | 12 | @Test 13 | public void testName() throws Exception { 14 | String txt = "انبار دار , دیپلمه حسابداری , با 2سال سابقه آشنا به"; 15 | 16 | // PTBTokenizer ptb = new PTBTokenizer(new StringReader(txt), new CoreLabelTokenFactory(), null); 17 | // while(ptb.hasNext()) 18 | // System.out.println(ptb.next()); 19 | 20 | Tokenizer tokenizer = new StandardTokenizer(); 21 | tokenizer.setReader(new StringReader(txt)); 22 | tokenizer.reset(); 23 | CharTermAttribute attr = tokenizer.addAttribute(CharTermAttribute.class); 24 | while(tokenizer.incrementToken()) { 25 | System.out.println(attr.toString()); 26 | } 27 | // FarsiStemmer stem = new FarsiStemmer(); 28 | // stem.stem("کتابها"); 29 | // stem.stem("شهرها"); 30 | } 31 | 32 | } 33 | --------------------------------------------------------------------------------