├── .github └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── config ├── ansj.cfg.yml └── dic │ └── default.dic ├── pom.xml └── src ├── main ├── assembly │ └── plugin.xml ├── java │ └── org │ │ └── ansj │ │ └── elasticsearch │ │ ├── action │ │ ├── AnsjAction.java │ │ ├── AnsjRequest.java │ │ ├── AnsjResponse.java │ │ └── TransportAnsjAction.java │ │ ├── cat │ │ ├── AnalyzerCatAction.java │ │ ├── AnsjCatAction.java │ │ └── ChineseRestTable.java │ │ ├── index │ │ ├── analysis │ │ │ ├── AnsjAnalyzerProvider.java │ │ │ └── AnsjTokenizerTokenizerFactory.java │ │ └── config │ │ │ └── AnsjElasticConfigurator.java │ │ ├── plugin │ │ └── AnalysisAnsjPlugin.java │ │ └── rest │ │ └── RestAnsjAction.java ├── plugin-metadata │ └── plugin-security.policy └── resources │ └── plugin-descriptor.properties └── test ├── java └── org │ └── ansj │ └── test │ └── ESAnalysisAnsjTests.java └── resource ├── datas.json └── mapping.json /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release elastic ansj plugin 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | tag: 6 | required: true 7 | description: "git tag to release" 8 | es-version: 9 | required: true 10 | description: 'es version' 11 | 12 | jobs: 13 | release: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up JDK 8 18 | uses: actions/setup-java@v3 19 | with: 20 | java-version: '8' 21 | distribution: 'temurin' 22 | cache: 'maven' 23 | - name: Build with Maven 24 | run: mvn package -Dmaven.test.skip=true 25 | - run: mkdir staging && cp target/releases/* staging 26 | - uses: actions/upload-artifact@v3 27 | with: 28 | name: Package 29 | path: staging 30 | - uses: "marvinpinto/action-automatic-releases@latest" 31 | with: 32 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 33 | automatic_release_tag: "${{github.event.inputs.tag}}" 34 | title: "支持es${{github.event.inputs.es-version}}版本的分词插件" 35 | prerelease: false 36 | files: | 37 | staging/*.zip -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/* 2 | /.idea 3 | *.iml 4 | elasticsearch-analysis-ansj.imi 5 | *.class 6 | .settings/ 7 | # Mobile Tools for Java (J2ME) 8 | .mtj.tmp/ 9 | 10 | # Package Files # 11 | *.jar 12 | *.war 13 | *.ear 14 | .classpath 15 | .project 16 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 17 | hs_err_pid* 18 | /bin/ 19 | /target/ 20 | log/ 21 | .DS_Store 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elasticsearch-analysis-ansj: elasticsearch 的中文分词插件 2 | 3 | # 前言 4 | elasticsearch-analysis-ansj 是一个基于 [ansj](https://github.com/NLPchina/ansj_seg) 分词算法 的 elasticsearch 的中文分词插件。 5 | 6 | # 编译 7 | ```bash 8 | mvn package 9 | ``` 10 | 编译成功后,将会生成打包好的插件压缩包:`target/releases/elasticsearch-analysis-ansj-<版本号>-release.zip`。 11 | 12 | # 安装 13 | ## 安装命令 14 | 在 es 安装目录下执行下面的命令安装插件: 15 | ```bash 16 | ./bin/elasticsearch-plugin install file:///<你的路径>/elasticsearch-analysis-ansj-<版本号>-release.zip 17 | ``` 18 | 19 | > 安装完成后,会生成一个默认的配置文件: `/config/elasticsearch-analysis-ansj/ansj.cfg.yml`,根据需要修改此文件即可。 20 | 21 | ## 测试 22 | 安装完成后,启动 es 集群。通过以下方式测试是否安装正确: 23 | **方法一:** 24 | 通过 `kibana` 执行 `GET /_cat/ansj?text=中国&type=index_ansj` 命令,测试 `index_ansj` 分词器,返回内容如下: 25 | ```json 26 | { 27 | "result": [ 28 | { 29 | "name": "中国", 30 | "nature": "ns", 31 | "offe": 0, 32 | "realName": "中国", 33 | "synonyms": null 34 | }, 35 | { 36 | "name": "中", 37 | "nature": "f", 38 | "offe": 0, 39 | "realName": "中", 40 | "synonyms": null 41 | }, 42 | { 43 | "name": "国", 44 | "nature": "n", 45 | "offe": 1, 46 | "realName": "国", 47 | "synonyms": null 48 | } 49 | ] 50 | } 51 | ``` 52 | **方法二:** 53 | 通过 `kibana` 执行 `GET /_cat/ansj/config` 命令,获取配置文件内容如下: 54 | ```json 55 | { 56 | "ambiguity": [ 57 | "ambiguity" 58 | ], 59 | "stop": [ 60 | "stop" 61 | ], 62 | "synonyms": [ 63 | "synonyms" 64 | ], 65 | "crf": [ 66 | "crf" 67 | ], 68 | "isQuantifierRecognition": "true", 69 | "isRealName": "false", 70 | "isNumRecognition": "true", 71 | "isNameRecognition": "true", 72 | "dic": [ 73 | "dic" 74 | ] 75 | } 76 | ``` 77 | 78 | # 使用 79 | + 第一步:创建索引 80 | ```text 81 | PUT /test_index?pretty 82 | { 83 | "settings" : { 84 | "index" : { 85 | "number_of_shards" : 16, 86 | "number_of_replicas" : 1, 87 | "refresh_interval":"5s" 88 | } 89 | }, 90 | "mappings" : { 91 | "properties" : { 92 | "test_field": { 93 | "type": "text", 94 | "analyzer": "index_ansj", 95 | "search_analyzer": "query_ansj" 96 | } 97 | } 98 | } 99 | } 100 | ``` 101 | 102 | > **说明:** 103 | > + `test_index`: 用于测试的索引名称; 104 | > + `test_field`: 用于测试的字段; 105 | > + 指定字段的索引分词器为: `index_ansj` ; 106 | > + 指定字段的搜索分词器为: `query_ansj` ; 107 | 108 | 测试索引配置是否正确: 109 | ```text 110 | POST /test_index/_analyze 111 | { 112 | "field": "test_field", 113 | "text": "中国" 114 | } 115 | ``` 116 | 117 | + 第二步:添加数据 118 | ```text 119 | PUT test_index/_bulk?refresh 120 | {"create":{ }} 121 | { "test_field" : "中国" } 122 | {"create":{ }} 123 | { "test_field" : "中华人民共和国" } 124 | {"create":{ }} 125 | { "test_field" : "中国有56个民族" } 126 | {"create":{ }} 127 | { "test_field" : "中国是社会主义国家" } 128 | ``` 129 | 130 | + 第三步:执行搜索 131 | ```text 132 | GET test_index/_search 133 | { 134 | "query": { 135 | "match": { 136 | "test_field": { 137 | "query": "中国" 138 | } 139 | } 140 | } 141 | } 142 | ``` 143 | 144 | > **注意:** 145 | > + 上述操作语句都是在 `kibana` 的 `dev_tools` 里执行的; 146 | > + 上述操作语句仅在 es `8.x` 版本上测试过,其它版本请根据实际情况调整。 147 | 148 | # 插件功能 149 | 安装插件后,在 es 集群中会增加以下功能: 150 | 151 | **三个 analyzer:** 152 | + index_ansj (建议索引使用) 153 | + query_ansj (建议搜索使用) 154 | + dic_ansj 155 | 156 | **三个 tokenizer:** 157 | + index_ansj (建议索引使用) 158 | + query_ansj (建议搜索使用) 159 | + dic_ansj 160 | 161 | **http 接口:** 162 | + /_cat/ansj: 执行分词 163 | + /_cat/ansj/config: 显示全部配置 164 | + /_ansj/flush/config: 刷新全部配置 165 | + /_ansj/flush/dic: 更新全部词典。包括用户自定义词典,停用词典,同义词典,歧义词典,crf 166 | 167 | # 配置文件 168 | ## 配置文件格式 169 | ```yaml 170 | ansj: 171 | #默认参数配置 172 | isNameRecognition: true #开启姓名识别 173 | isNumRecognition: true #开启数字识别 174 | isQuantifierRecognition: true #是否数字和量词合并 175 | isRealName: false #是否保留真实词语,建议保留false 176 | 177 | #用户自定词典配置 178 | #dic: default.dic #也可以写成 file://default.dic , 如果未配置dic,则此词典默认加载 179 | # http方式加载 180 | #dic_d1: http://xxx/xx.dic 181 | # jar中文件加载 182 | #dic_d2: jar://org.ansj.dic.DicReader|/dic2.dic 183 | # 从数据库中加载 184 | #dic_d3: jdbc://jdbc:mysql://xxxx:3306/ttt?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull|username|password|select name as name,nature,freq from dic where type=1 185 | # 从自定义类中加载,YourClas extends PathToStream 186 | #dic_d3: class://xxx.xxx.YourClas|ohterparam 187 | 188 | #过滤词典配置 189 | #stop: http,file,jar,class,jdbc 都支持 190 | #stop_key1: ... 191 | 192 | #歧义词典配置 193 | #ambiguity: http,file,jar,class,jdbc 都支持 194 | #ambiguity_key1: ... 195 | 196 | #同义词词典配置 197 | #synonyms: http,file,jar,class,jdbc 都支持 198 | #synonyms_key1: ... 199 | ``` 200 | 201 | ## 配置文件示例 202 | ### 使用本地文件词库 203 | ```yaml 204 | ansj: 205 | # 开启姓名识别 206 | isNameRecognition: false 207 | # 开启数字识别 208 | isNumRecognition: true 209 | # 是否数字和量词合并 210 | isQuantifierRecognition: false 211 | # 是否保留真实词语 212 | isRealName: false 213 | # 词典 214 | dic: file:///data/elasticsearch-dic/ansj/main.dic 215 | # 停词(过滤词)词典 216 | stop: file:///data/elasticsearch-dic/ansj/stop.dic 217 | # 歧义词词典配置 218 | ambiguity: file:///data/elasticsearch-dic/ansj/ambiguity.dic 219 | # 同义词词典配置 220 | synonyms: file:///data/elasticsearch-dic/ansj/synonyms.dic 221 | ``` 222 | 223 | ### 使用 HTTP 协议加载词库 224 | ```yaml 225 | ansj: 226 | # 开启姓名识别 227 | isNameRecognition: false 228 | # 开启数字识别 229 | isNumRecognition: true 230 | # 是否数字和量词合并 231 | isQuantifierRecognition: false 232 | # 是否保留真实词语 233 | isRealName: false 234 | # 词典 235 | dic: http://example.com/elasticsearch-dic/ansj/main.dic 236 | # 停词(过滤词)词典 237 | stop: http://example.com/elasticsearch-dic/ansj/stop.dic 238 | # 歧义词词典配置 239 | ambiguity: http://example.com/elasticsearch-dic/ansj/ambiguity.dic 240 | # 同义词词典配置 241 | synonyms: http://example.com/elasticsearch-dic/ansj/synonyms.dic 242 | ``` 243 | 244 | # 插件版本与 ES 版本的对应关系 245 | 246 | | plugin | elasticsearch| 247 | |---------| -----: | 248 | | 1.0.0 | 0.90.2 | 249 | | 1.x | 1.x | 250 | | 2.1.1 | 2.1.1 | 251 | | 2.3.1 | 2.3.1 | 252 | | 2.3.2 | 2.3.2 | 253 | | 2.3.3 | 2.3.3 | 254 | | 2.3.4 | 2.3.4 | 255 | | 2.3.5 | 2.3.5 | 256 | | 2.4.0 | 2.4.0 | 257 | | 2.4.1 | 2.4.1 | 258 | | 2.4.2 | 2.4.2 | 259 | | 2.4.3 | 2.4.3 | 260 | | 2.4.4 | 2.4.4 | 261 | | 2.4.5 | 2.4.5 | 262 | | 2.4.6 | 2.4.6 | 263 | | 5.0.0 | 5.0.0 | 264 | | 5.0.1 | 5.0.1 | 265 | | 5.0.2 | 5.0.2 | 266 | | 5.1.1 | 5.1.1 | 267 | | 5.1.2 | 5.1.2 | 268 | | 5.2.0 | 5.2.0 | 269 | | 5.2.1 | 5.2.1 | 270 | | 5.2.2 | 5.2.2 | 271 | | 5.3.0 | 5.3.0 | 272 | | 5.3.1 | 5.3.1 | 273 | | 5.3.2 | 5.3.2 | 274 | | 5.3.3 | 5.3.3 | 275 | | 5.4.0 | 5.4.0 | 276 | | 5.4.1 | 5.4.1 | 277 | | 5.4.2 | 5.4.2 | 278 | | 5.4.3 | 5.4.3 | 279 | | 5.5.0 | 5.5.0 | 280 | | 5.5.1 | 5.5.1 | 281 | | 5.5.2 | 5.5.2 | 282 | | 5.5.3 | 5.5.3 | 283 | | 5.6.0 | 5.6.0 | 284 | | 5.6.1 | 5.6.1 | 285 | | 5.6.2 | 5.6.2 | 286 | | 5.6.3 | 5.6.3 | 287 | | 5.6.4 | 5.6.4 | 288 | | 5.6.5 | 5.6.5 | 289 | | 5.6.6 | 5.6.6 | 290 | | 5.6.7 | 5.6.7 | 291 | | 5.6.8 | 5.6.8 | 292 | | 5.6.9 | 5.6.9 | 293 | | 5.6.10 | 5.6.10 | 294 | | 5.6.11 | 5.6.11 | 295 | | 5.6.12 | 5.6.12 | 296 | | 5.6.13 | 5.6.13 | 297 | | 5.6.14 | 5.6.14 | 298 | | 5.6.15 | 5.6.15 | 299 | | 5.6.16 | 5.6.16 | 300 | | 6.0.0 | 6.0.0 | 301 | | 6.0.1 | 6.0.1 | 302 | | 6.1.0 | 6.1.0 | 303 | | 6.1.1 | 6.1.1 | 304 | | 6.1.2 | 6.1.2 | 305 | | 6.1.3 | 6.1.3 | 306 | | 6.1.4 | 6.1.4 | 307 | | 6.2.0 | 6.2.0 | 308 | | 6.2.1 | 6.2.1 | 309 | | 6.2.2 | 6.2.2 | 310 | | 6.2.3 | 6.2.3 | 311 | | 6.2.4 | 6.2.4 | 312 | | 6.3.0 | 6.3.0 | 313 | | 6.3.1 | 6.3.1 | 314 | | 6.3.2 | 6.3.2 | 315 | | 6.4.0 | 6.4.0 | 316 | | 6.4.1 | 6.4.1 | 317 | | 6.4.2 | 6.4.2 | 318 | | 6.4.3 | 6.4.3 | 319 | | 6.5.0 | 6.5.0 | 320 | | 6.5.1 | 6.5.1 | 321 | | 6.5.2 | 6.5.2 | 322 | | 6.5.3 | 6.5.3 | 323 | | 6.5.4 | 6.5.4 | 324 | | 6.6.0 | 6.6.0 | 325 | | 6.6.1 | 6.6.1 | 326 | | 6.6.2 | 6.6.2 | 327 | | 6.7.0 | 6.7.0 | 328 | | 6.7.1 | 6.7.1 | 329 | | 6.7.2 | 6.7.2 | 330 | | 6.8.0 | 6.8.0 | 331 | | 6.8.1 | 6.8.1 | 332 | | 6.8.2 | 6.8.2 | 333 | | 6.8.3 | 6.8.3 | 334 | | 6.8.4 | 6.8.4 | 335 | | 6.8.5 | 6.8.5 | 336 | | 6.8.6 | 6.8.6 | 337 | | 6.8.7 | 6.8.7 | 338 | | 6.8.8 | 6.8.8 | 339 | | 6.8.9 | 6.8.9 | 340 | | 6.8.10 | 6.8.10 | 341 | | 6.8.11 | 6.8.11 | 342 | | 6.8.12 | 6.8.12 | 343 | | 6.8.13 | 6.8.13 | 344 | | 6.8.14 | 6.8.14 | 345 | | 6.8.15 | 6.8.15 | 346 | | 6.8.16 | 6.8.16 | 347 | | 6.8.17 | 6.8.17 | 348 | | 6.8.18 | 6.8.18 | 349 | | 6.8.19 | 6.8.19 | 350 | | 6.8.20 | 6.8.20 | 351 | | 6.8.21 | 6.8.21 | 352 | | 6.8.22 | 6.8.22 | 353 | | 6.8.23 | 6.8.23 | 354 | | 7.0.0 | 7.0.0 | 355 | | 7.0.1 | 7.0.1 | 356 | | 7.1.0 | 7.1.0 | 357 | | 7.1.1 | 7.1.1 | 358 | | 7.2.0 | 7.2.0 | 359 | | 7.2.1 | 7.2.1 | 360 | | 7.3.0 | 7.3.0 | 361 | | 7.3.1 | 7.3.1 | 362 | | 7.3.2 | 7.3.2 | 363 | | 7.4.0 | 7.4.0 | 364 | | 7.4.1 | 7.4.1 | 365 | | 7.4.2 | 7.4.2 | 366 | | 7.5.0 | 7.5.0 | 367 | | 7.5.1 | 7.5.1 | 368 | | 7.5.2 | 7.5.2 | 369 | | 7.6.0 | 7.6.0 | 370 | | 7.6.1 | 7.6.1 | 371 | | 7.6.2 | 7.6.2 | 372 | | 7.7.0 | 7.7.0 | 373 | | 7.7.1 | 7.7.1 | 374 | | 7.8.0 | 7.8.0 | 375 | | 7.8.1 | 7.8.1 | 376 | | 7.9.0 | 7.9.0 | 377 | | 7.9.1 | 7.9.1 | 378 | | 7.9.2 | 7.9.2 | 379 | | 7.9.3 | 7.9.3 | 380 | | 7.17.5 | 7.17.5 | 381 | | 7.17.7 | 7.17.7 | 382 | | 7.17.8 | 7.17.8 | 383 | | 7.17.9 | 7.17.9 | 384 | | 7.17.10 | 7.17.10 | 385 | | 7.17.11 | 7.17.11 | 386 | | 7.17.12 | 7.17.12 | 387 | | 7.17.13 | 7.17.13 | 388 | | 7.17.14 | 7.17.14 | 389 | | 7.17.15 | 7.17.15 | 390 | | 7.17.16 | 7.17.16 | 391 | | 7.17.17 | 7.17.17 | 392 | | 7.17.18 | 7.17.18 | 393 | | 7.17.19 | 7.17.19 | 394 | | 7.17.20 | 7.17.20 | 395 | | 7.17.21 | 7.17.21 | 396 | | 7.17.22 | 7.17.22 | 397 | | 7.17.23 | 7.17.23 | 398 | | 7.17.24 | 7.17.24 | 399 | | 7.17.25 | 7.17.25 | 400 | | 7.17.26 | 7.17.26 | 401 | | 7.17.27 | 7.17.27 | 402 | | 7.17.28 | 7.17.28 | 403 | | 8.3.3 | 8.3.3 | 404 | | 8.5.3 | 8.5.3 | 405 | | 8.6.0 | 8.6.0 | 406 | | 8.6.1 | 8.6.1 | 407 | | 8.6.2 | 8.6.2 | 408 | | 8.7.0 | 8.7.0 | 409 | | 8.7.1 | 8.7.1 | 410 | | 8.8.0 | 8.8.0 | 411 | | 8.8.1 | 8.8.1 | 412 | | 8.8.2 | 8.8.2 | 413 | | 8.9.0 | 8.9.0 | 414 | | 8.9.1 | 8.9.1 | 415 | | 8.9.2 | 8.9.2 | 416 | | 8.10.0 | 8.10.0 | 417 | | 8.10.1 | 8.10.1 | 418 | | 8.10.2 | 8.10.2 | 419 | | 8.10.3 | 8.10.3 | 420 | | 8.10.4 | 8.10.4 | 421 | | 8.11.0 | 8.11.0 | 422 | | 8.11.1 | 8.11.1 | 423 | | 8.11.2 | 8.11.2 | 424 | | 8.11.3 | 8.11.3 | 425 | | 8.11.4 | 8.11.4 | 426 | | 8.12.0 | 8.12.0 | 427 | | 8.12.1 | 8.12.1 | 428 | | 8.12.2 | 8.12.2 | 429 | | 8.13.0 | 8.13.0 | 430 | | 8.13.1 | 8.13.1 | 431 | | 8.13.2 | 8.13.2 | 432 | | 8.13.3 | 8.13.3 | 433 | | 8.13.4 | 8.13.4 | 434 | | 8.14.0 | 8.14.0 | 435 | | 8.14.1 | 8.14.1 | 436 | | 8.14.2 | 8.14.2 | 437 | | 8.14.3 | 8.14.3 | 438 | | 8.15.0 | 8.15.0 | 439 | | 8.15.1 | 8.15.1 | 440 | | 8.15.2 | 8.15.2 | 441 | | 8.15.3 | 8.15.3 | 442 | | 8.15.4 | 8.15.4 | 443 | | 8.15.5 | 8.15.5 | 444 | | 8.16.0 | 8.16.0 | 445 | | 8.16.1 | 8.16.1 | 446 | | 8.16.2 | 8.16.2 | 447 | | 8.16.3 | 8.16.3 | 448 | | 8.17.0 | 8.17.0 | 449 | | 8.17.1 | 8.17.1 | 450 | | 8.17.2 | 8.17.2 | 451 | | 8.17.3 | 8.17.3 | 452 | | 8.17.4 | 8.17.4 | 453 | | 8.17.5 | 8.17.5 | 454 | | 8.17.6 | 8.17.6 | 455 | | 8.18.0 | 8.18.0 | 456 | | 8.18.1 | 8.18.1 | 457 | | 9.0.0 | 9.0.0 | 458 | | 9.0.1 | 9.0.1 | 459 | 460 | # 版权 461 | `elasticsearch-analysis-ansj` is licenced under the Apache License Version 2.0. See the [LICENSE](https://github.com/NLPchina/elasticsearch-analysis-ansj/blob/master/LICENSE) file for details. 462 | -------------------------------------------------------------------------------- /config/ansj.cfg.yml: -------------------------------------------------------------------------------- 1 | # 全局变量配置方式一 2 | ansj: 3 | #默认参数配置 4 | isNameRecognition: true #开启姓名识别 5 | isNumRecognition: true #开启数字识别 6 | isQuantifierRecognition: true #是否数字和量词合并 7 | isRealName: false #是否保留真实词语,建议保留false 8 | 9 | #用户自定词典配置 10 | #dic: default.dic #也可以写成 file://default.dic , 如果未配置dic,则此词典默认加载 11 | # http方式加载 12 | #dic_d1: http://xxx/xx.dic 13 | # jar中文件加载 14 | #dic_d2: jar://org.ansj.dic.DicReader|/dic2.dic 15 | # 从数据库中加载 16 | #dic_d3: jdbc://jdbc:mysql://xxxx:3306/ttt?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull|username|password|select name as name,nature,freq from dic where type=1 17 | # 从自定义类中加载,YourClas extends PathToStream 18 | #dic_d3: class://xxx.xxx.YourClas|ohterparam 19 | 20 | #过滤词典配置 21 | #stop: http,file,jar,class,jdbc 都支持 22 | #stop_key1: ... 23 | 24 | #歧义词典配置 25 | #ambiguity: http,file,jar,class,jdbc 都支持 26 | #ambiguity_key1: ... 27 | 28 | #同义词词典配置 29 | #synonyms: http,file,jar,class,jdbc 都支持 30 | #synonyms_key1: ... 31 | 32 | 33 | 34 | # 全局变量配置方式二 通过配置文件的方式配置,优先级高于es本身的配置 35 | # ansj_config: ansj_library.properties #http,file,jar,class,jdbc 都支持,格式参见ansj_library.properties 36 | 37 | # 配置自定义分词器 38 | 39 | 40 | 41 | #index: 42 | # analysis: 43 | # tokenizer : 44 | # my_dic : 45 | # type : dic_ansj 46 | # dic: dic 47 | # stop: stop 48 | # ambiguity: ambiguity 49 | # synonyms: synonyms 50 | # isNameRecognition: true 51 | # isNumRecognition: true 52 | # isQuantifierRecognition: true 53 | # isRealName: false 54 | # 55 | # analyzer: 56 | # my_dic: 57 | # type: custom 58 | # tokenizer: my_dic 59 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | org.ansj 6 | elasticsearch-analysis-ansj 7 | 9.0.1.0 8 | elasticsearch analysis by ansj 9 | elasticsearch-analysis-ansj 10 | http://maven.nlpcn.org 11 | 12 | 13 | UTF-8 14 | **/MainTestSuite.class 15 | elasticsearch-analysis-ansj 16 | 17 17 | 9.0.1 18 | org.ansj.elasticsearch.plugin.AnalysisAnsjPlugin 19 | 20 | 21 | 22 | 23 | org.ansj 24 | ansj_seg 25 | 5.1.6 26 | 27 | 28 | 29 | org.ansj 30 | ansj_lucene9_plug 31 | 5.1.5.2 32 | 33 | 34 | 35 | org.nlpcn 36 | nlp-lang 37 | 1.7.9 38 | 39 | 40 | 41 | org.elasticsearch 42 | elasticsearch 43 | ${elasticsearch.version} 44 | provided 45 | 46 | 47 | org.elasticsearch 48 | elasticsearch-preallocate 49 | 50 | 51 | 52 | 53 | 54 | org.apache.logging.log4j 55 | log4j-api 56 | 2.17.1 57 | provided 58 | 59 | 60 | 61 | junit 62 | junit 63 | 4.13.1 64 | test 65 | 66 | 67 | 68 | 69 | 70 | 71 | org.apache.maven.plugins 72 | maven-compiler-plugin 73 | 3.3 74 | 75 | 8 76 | 8 77 | 78 | 79 | 80 | 81 | maven-assembly-plugin 82 | 83 | ${project.build.directory}/releases/ 84 | 85 | ${basedir}/src/main/assembly/plugin.xml 86 | 87 | 88 | 89 | 90 | package 91 | 92 | single 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /src/main/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | release 7 | 8 | zip 9 | 10 | false 11 | 12 | 13 | ${project.basedir}/config/dic 14 | / 15 | 16 | 17 | ${project.basedir}/config 18 | /config 19 | 20 | **/dic/** 21 | 22 | 23 | 24 | 25 | ${project.basedir}/src/main/plugin-metadata 26 | / 27 | 28 | 29 | 30 | 31 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 32 | true 33 | 34 | 35 | 36 | 37 | true 38 | 39 | org.elasticsearch:elasticsearch 40 | org.apache.lucene:lucene* 41 | com.spatial4j:spatial4j 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/action/AnsjAction.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.action; 2 | 3 | import org.elasticsearch.action.ActionType; 4 | 5 | /** 6 | * Created by zhangqinghua on 16/2/2. 7 | */ 8 | public class AnsjAction extends ActionType { 9 | 10 | static final String NAME = "cluster:admin/ansj/analyze"; 11 | 12 | public static final AnsjAction INSTANCE = new AnsjAction(NAME); 13 | 14 | public AnsjAction(String name) { 15 | super(name); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/action/AnsjRequest.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.action; 2 | 3 | import org.elasticsearch.ElasticsearchGenerationException; 4 | import org.elasticsearch.action.ActionRequestValidationException; 5 | import org.elasticsearch.action.support.single.shard.SingleShardRequest; 6 | import org.elasticsearch.common.bytes.BytesArray; 7 | import org.elasticsearch.common.bytes.BytesReference; 8 | import org.elasticsearch.common.io.stream.StreamInput; 9 | import org.elasticsearch.common.io.stream.StreamOutput; 10 | import org.elasticsearch.xcontent.XContentBuilder; 11 | import org.elasticsearch.xcontent.XContentFactory; 12 | import org.elasticsearch.xcontent.XContentType; 13 | 14 | import java.io.IOException; 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | 18 | /** 19 | * Created by zhangqinghua on 16/2/2. 20 | */ 21 | public class AnsjRequest extends SingleShardRequest { 22 | 23 | private String path; 24 | 25 | private Map args = new HashMap<>(); 26 | 27 | private BytesReference source; 28 | 29 | public AnsjRequest() { 30 | } 31 | 32 | public AnsjRequest(String path) { 33 | this.path = path; 34 | } 35 | 36 | public AnsjRequest(StreamInput in) throws IOException { 37 | super(in); 38 | path = in.readString(); 39 | args = in.readGenericMap(); 40 | source = in.readBytesReference(); 41 | } 42 | 43 | public String getPath() { 44 | return path; 45 | } 46 | 47 | public String get(String key) { 48 | return (String) args.get(key); 49 | } 50 | 51 | public String put(String key, String value) { 52 | return (String) args.put(key, value); 53 | } 54 | 55 | @Override 56 | public ActionRequestValidationException validate() { 57 | return null; 58 | } 59 | 60 | @Override 61 | public void writeTo(StreamOutput out) throws IOException { 62 | super.writeTo(out); 63 | out.writeString(path); 64 | out.writeGenericMap(args); 65 | out.writeBytesReference(source); 66 | } 67 | 68 | public Map asMap() { 69 | return args; 70 | } 71 | 72 | public BytesReference source() { 73 | return source; 74 | } 75 | 76 | public AnsjRequest source(Map querySource) { 77 | try { 78 | XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); 79 | builder.map(querySource); 80 | return source(builder); 81 | } catch (IOException e) { 82 | throw new ElasticsearchGenerationException("Failed to generate [" + querySource + "]", e); 83 | } 84 | } 85 | 86 | public AnsjRequest source(XContentBuilder builder) { 87 | this.source = BytesReference.bytes(builder); 88 | return this; 89 | } 90 | 91 | public AnsjRequest source(String querySource) { 92 | this.source = new BytesArray(querySource); 93 | return this; 94 | } 95 | 96 | public AnsjRequest source(byte[] querySource) { 97 | return source(querySource, 0, querySource.length); 98 | } 99 | 100 | public AnsjRequest source(byte[] querySource, int offset, int length) { 101 | return source(new BytesArray(querySource, offset, length)); 102 | } 103 | 104 | public AnsjRequest source(BytesReference querySource) { 105 | this.source = querySource; 106 | return this; 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/action/AnsjResponse.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.action; 2 | 3 | import org.elasticsearch.action.ActionResponse; 4 | import org.elasticsearch.common.io.stream.StreamInput; 5 | import org.elasticsearch.common.io.stream.StreamOutput; 6 | import org.elasticsearch.xcontent.ToXContentObject; 7 | import org.elasticsearch.xcontent.XContentBuilder; 8 | 9 | import java.io.IOException; 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | 13 | /** 14 | * Created by zhangqinghua on 16/2/2. 15 | */ 16 | public class AnsjResponse extends ActionResponse implements ToXContentObject { 17 | 18 | private final Map map; 19 | 20 | public AnsjResponse() { 21 | this.map = new HashMap<>(); 22 | } 23 | 24 | public AnsjResponse(Map map) { 25 | this.map = new HashMap<>(); 26 | this.map.putAll(map); 27 | } 28 | 29 | public AnsjResponse(StreamInput in) throws IOException { 30 | super(in); 31 | map = in.readGenericMap(); 32 | } 33 | 34 | public AnsjResponse put(String key, Object value) { 35 | map.put(key, value); 36 | return this; 37 | } 38 | 39 | public AnsjResponse putAll(Map map) { 40 | this.map.putAll(map); 41 | return this; 42 | } 43 | 44 | public Map asMap() { 45 | return map; 46 | } 47 | 48 | @Override 49 | public XContentBuilder toXContent(XContentBuilder builder, Params params) { 50 | return builder; 51 | } 52 | 53 | @Override 54 | public void writeTo(StreamOutput out) throws IOException { 55 | out.writeGenericMap(map); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/action/TransportAnsjAction.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.action; 2 | 3 | import org.ansj.domain.Result; 4 | import org.ansj.domain.Term; 5 | import org.ansj.elasticsearch.index.config.AnsjElasticConfigurator; 6 | import org.ansj.library.AmbiguityLibrary; 7 | import org.ansj.library.CrfLibrary; 8 | import org.ansj.library.DicLibrary; 9 | import org.ansj.library.StopLibrary; 10 | import org.ansj.library.SynonymsLibrary; 11 | import org.ansj.lucene9.AnsjAnalyzer; 12 | import org.ansj.recognition.impl.StopRecognition; 13 | import org.ansj.recognition.impl.SynonymsRecgnition; 14 | import org.ansj.splitWord.Analysis; 15 | import org.ansj.splitWord.analysis.BaseAnalysis; 16 | import org.ansj.splitWord.analysis.DicAnalysis; 17 | import org.ansj.splitWord.analysis.IndexAnalysis; 18 | import org.ansj.splitWord.analysis.NlpAnalysis; 19 | import org.ansj.splitWord.analysis.ToAnalysis; 20 | import org.ansj.util.MyStaticValue; 21 | import org.apache.logging.log4j.LogManager; 22 | import org.apache.logging.log4j.Logger; 23 | import org.elasticsearch.action.support.ActionFilters; 24 | import org.elasticsearch.action.support.single.shard.TransportSingleShardAction; 25 | import org.elasticsearch.cluster.ClusterState; 26 | import org.elasticsearch.cluster.block.ClusterBlockException; 27 | import org.elasticsearch.cluster.block.ClusterBlockLevel; 28 | import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; 29 | import org.elasticsearch.cluster.node.DiscoveryNode; 30 | import org.elasticsearch.cluster.node.DiscoveryNodes; 31 | import org.elasticsearch.cluster.routing.ShardsIterator; 32 | import org.elasticsearch.cluster.service.ClusterService; 33 | import org.elasticsearch.common.io.stream.StreamInput; 34 | import org.elasticsearch.common.io.stream.Writeable; 35 | import org.elasticsearch.index.shard.ShardId; 36 | import org.elasticsearch.injection.guice.Inject; 37 | import org.elasticsearch.threadpool.ThreadPool; 38 | import org.elasticsearch.transport.TransportException; 39 | import org.elasticsearch.transport.TransportResponseHandler; 40 | import org.elasticsearch.transport.TransportService; 41 | import org.nlpcn.commons.lang.tire.domain.Forest; 42 | import org.nlpcn.commons.lang.tire.domain.SmartForest; 43 | import org.nlpcn.commons.lang.util.StringUtil; 44 | 45 | import java.io.IOException; 46 | import java.util.ArrayList; 47 | import java.util.HashMap; 48 | import java.util.LinkedHashMap; 49 | import java.util.List; 50 | import java.util.Map; 51 | import java.util.concurrent.CountDownLatch; 52 | import java.util.concurrent.Executor; 53 | import java.util.concurrent.TimeUnit; 54 | 55 | /** 56 | * Created by zhangqinghua on 16/2/2. 57 | */ 58 | public class TransportAnsjAction extends TransportSingleShardAction { 59 | 60 | private static final Logger LOG = LogManager.getLogger(); 61 | 62 | private static final String MESSAGE = "flush ok"; 63 | 64 | private final AnsjElasticConfigurator cfg; 65 | 66 | @Inject 67 | public TransportAnsjAction(ThreadPool threadPool, ClusterService clusterService, 68 | TransportService transportService, ActionFilters actionFilters, 69 | IndexNameExpressionResolver indexNameExpressionResolver, 70 | AnsjElasticConfigurator cfg) { 71 | super(AnsjAction.NAME, threadPool, clusterService, transportService, actionFilters, indexNameExpressionResolver, AnsjRequest::new, threadPool.executor(ThreadPool.Names.GENERIC)); 72 | 73 | this.cfg = cfg; 74 | } 75 | 76 | @Override 77 | protected AnsjResponse shardOperation(AnsjRequest request, ShardId shardId) { 78 | String path = request.getPath(); 79 | if ("/_cat/ansj".equals(path)) { 80 | // 执行分词 81 | return executeAnalyzer(request); 82 | } else if ("/_cat/ansj/config".equals(path)) { 83 | // 显示全部配置 84 | return showConfig(); 85 | } else if ("/_ansj/flush/config".equals(path)) { 86 | // 刷新全部配置 87 | return flushConfigAll(); 88 | } else if ("/_ansj/flush/config/single".equals(path)) { 89 | // 执行刷新配置 90 | return flushConfig(); 91 | } else if ("/_ansj/flush/dic".equals(path)) { 92 | // 更新全部词典 93 | return flushDicAll(request); 94 | } else if ("/_ansj/flush/dic/single".equals(path)) { 95 | // 执行更新词典 96 | return flushDic(request); 97 | } 98 | 99 | return new AnsjResponse().put("message", "not find any by path " + path); 100 | } 101 | 102 | /** 103 | * 执行分词 104 | */ 105 | private AnsjResponse executeAnalyzer(AnsjRequest request) { 106 | AnsjResponse response = new AnsjResponse(); 107 | 108 | if (!request.asMap().containsKey("text") || !request.asMap().containsKey("type")) { 109 | return response.put("message", "err args example: /_cat/ansj?text=中国&type=index_ansj&dic=dic&stop=stop&ambiguity=ambiguity&synonyms=synonyms"); 110 | } 111 | 112 | Analysis analysis; 113 | 114 | String temp; 115 | String type = request.get("type"); 116 | 117 | if (type == null) { 118 | type = AnsjAnalyzer.TYPE.base_ansj.name(); 119 | } 120 | 121 | switch (AnsjAnalyzer.TYPE.valueOf(type)) { 122 | case base_ansj: 123 | analysis = new BaseAnalysis(); 124 | break; 125 | case index_ansj: 126 | analysis = new IndexAnalysis(); 127 | break; 128 | case dic_ansj: 129 | analysis = new DicAnalysis(); 130 | break; 131 | case query_ansj: 132 | analysis = new ToAnalysis(); 133 | break; 134 | case nlp_ansj: 135 | analysis = new NlpAnalysis(); 136 | if (StringUtil.isNotBlank(temp = request.get(CrfLibrary.DEFAULT))) { 137 | ((NlpAnalysis) analysis).setCrfModel(CrfLibrary.get(temp)); 138 | } 139 | break; 140 | default: 141 | analysis = new BaseAnalysis(); 142 | } 143 | 144 | // 用户自定义词典 145 | if (StringUtil.isNotBlank(temp = request.get(DicLibrary.DEFAULT))) { 146 | String[] split = temp.split(","); 147 | Forest[] forests = new Forest[split.length]; 148 | for (int i = 0; i < forests.length; i++) { 149 | if (StringUtil.isBlank(split[i])) { 150 | continue; 151 | } 152 | forests[i] = DicLibrary.get(split[i]); 153 | } 154 | analysis.setForests(forests); 155 | } 156 | 157 | // 歧义词典 158 | if (StringUtil.isNotBlank(temp = request.get(AmbiguityLibrary.DEFAULT))) { 159 | analysis.setAmbiguityForest(AmbiguityLibrary.get(temp.trim())); 160 | } 161 | 162 | // 是否开启人名识别 163 | if (StringUtil.isNotBlank(temp = request.get("isNameRecognition"))) { 164 | analysis.setIsNameRecognition(Boolean.valueOf(temp)); 165 | } 166 | 167 | // 是否开启数字识别 168 | if (StringUtil.isNotBlank(temp = request.get("isNumRecognition"))) { 169 | analysis.setIsNumRecognition(Boolean.valueOf(temp)); 170 | } 171 | 172 | // 是否开启量词识别 173 | if (StringUtil.isNotBlank(temp = request.get("isQuantifierRecognition"))) { 174 | analysis.setIsQuantifierRecognition(Boolean.valueOf(temp)); 175 | } 176 | 177 | // 是否保留原字符 178 | if (StringUtil.isNotBlank(temp = request.get("isRealName"))) { 179 | analysis.setIsRealName(Boolean.parseBoolean(temp)); 180 | } 181 | 182 | Result parse = analysis.parseStr(request.get("text")); 183 | 184 | // 停用词词典 185 | if (StringUtil.isNotBlank(temp = request.get(StopLibrary.DEFAULT))) { 186 | String[] split = temp.split(","); 187 | for (String key : split) { 188 | StopRecognition stop = StopLibrary.get(key.trim()); 189 | if (stop != null) { 190 | parse.recognition(stop); 191 | } 192 | } 193 | } 194 | 195 | // 同义词词典 196 | if (StringUtil.isNotBlank(temp = request.get(SynonymsLibrary.DEFAULT))) { 197 | String[] split = temp.split(","); 198 | for (String key : split) { 199 | SmartForest> sf = SynonymsLibrary.get(key.trim()); 200 | if (sf != null) { 201 | parse.recognition(new SynonymsRecgnition(sf)); 202 | } 203 | } 204 | } 205 | 206 | List list = new ArrayList<>(parse.size()); 207 | for (Term term : parse) { 208 | Map map = new LinkedHashMap<>(); 209 | map.put("name", term.getName()); 210 | map.put("nature", term.getNatureStr()); 211 | map.put("offe", term.getOffe()); 212 | map.put("realName", term.getRealName()); 213 | map.put("synonyms", term.getSynonyms()); 214 | list.add(map); 215 | } 216 | 217 | response.put("result", list); 218 | return response; 219 | } 220 | 221 | /** 222 | * 显示全部配置 223 | */ 224 | private AnsjResponse showConfig() { 225 | Map map = new HashMap<>(MyStaticValue.ENV); 226 | map.put("dic", DicLibrary.keys().toArray()); 227 | map.put("stop", StopLibrary.keys().toArray()); 228 | map.put("synonyms", SynonymsLibrary.keys().toArray()); 229 | map.put("ambiguity", AmbiguityLibrary.keys().toArray()); 230 | map.put("crf", CrfLibrary.keys().toArray()); 231 | return new AnsjResponse(map); 232 | } 233 | 234 | /** 235 | * 刷新全部配置 236 | */ 237 | private AnsjResponse flushConfigAll() { 238 | ClusterState clusterState = clusterService.state(); 239 | clusterState.blocks().globalBlockedRaiseException(ClusterBlockLevel.READ); 240 | 241 | DiscoveryNodes nodes = clusterState.nodes(); 242 | 243 | final AnsjRequest req = new AnsjRequest("/_ansj/flush/config/single"); 244 | 245 | final CountDownLatch countDownLatch = new CountDownLatch(nodes.getSize()); 246 | 247 | final Map result = new HashMap<>(16); 248 | final Writeable.Reader reader = getResponseReader(); 249 | 250 | for (final DiscoveryNode node : nodes) { 251 | 252 | result.put(node.getAddress().toString(), "time out"); 253 | 254 | TransportResponseHandler rep = new TransportResponseHandler() { 255 | @Override 256 | public AnsjResponse read(StreamInput in) throws IOException { 257 | return reader.read(in); 258 | } 259 | 260 | @Override 261 | public void handleResponse(AnsjResponse response) { 262 | LOG.info("[{}] response: {}", node, response.asMap()); 263 | result.put(node.getAddress().toString(), "success"); 264 | countDownLatch.countDown(); 265 | } 266 | 267 | @Override 268 | public void handleException(TransportException exp) { 269 | LOG.warn("failed to send request[path:{},args:{}] to [{}]: {}", req.getPath(), req.asMap(), node, exp); 270 | result.put(node.getAddress().toString(), "err :" + exp.getMessage()); 271 | countDownLatch.countDown(); 272 | } 273 | 274 | @Override 275 | public Executor executor() { 276 | return TransportResponseHandler.TRANSPORT_WORKER; 277 | } 278 | }; 279 | 280 | transportService.sendRequest(node, AnsjAction.NAME, req, rep); 281 | } 282 | 283 | try { 284 | countDownLatch.await(20, TimeUnit.SECONDS); 285 | } catch (Exception e) { 286 | LOG.error("failed to send request[path:{},args:{}] nodes [{}]: {}", req.getPath(), req.asMap(), nodes, e); 287 | } 288 | 289 | return new AnsjResponse(result); 290 | } 291 | 292 | /** 293 | * 执行刷新配置 294 | */ 295 | private AnsjResponse flushConfig() { 296 | this.cfg.reloadConfig(); 297 | return showConfig(); 298 | } 299 | 300 | /** 301 | * 更新全部词典 302 | */ 303 | private AnsjResponse flushDicAll(AnsjRequest request) { 304 | 305 | ClusterState clusterState = clusterService.state(); 306 | clusterState.blocks().globalBlockedRaiseException(ClusterBlockLevel.READ); 307 | 308 | DiscoveryNodes nodes = clusterState.nodes(); 309 | 310 | final AnsjRequest req = new AnsjRequest("/_ansj/flush/dic/single"); 311 | 312 | req.put("key", request.get("key")); 313 | 314 | final CountDownLatch countDownLatch = new CountDownLatch(nodes.getSize()); 315 | 316 | final Map result = new HashMap<>(16); 317 | final Writeable.Reader reader = getResponseReader(); 318 | 319 | for (final DiscoveryNode node : nodes) { 320 | 321 | result.put(node.getAddress().toString(), "time out"); 322 | 323 | transportService.sendRequest(node, AnsjAction.NAME, req, new TransportResponseHandler() { 324 | @Override 325 | public AnsjResponse read(StreamInput in) throws IOException { 326 | return reader.read(in); 327 | } 328 | 329 | @Override 330 | public void handleResponse(AnsjResponse response) { 331 | LOG.info("[{}] response: {}", node, response.asMap()); 332 | result.put(node.getAddress().toString(), "success"); 333 | countDownLatch.countDown(); 334 | } 335 | 336 | @Override 337 | public void handleException(TransportException exp) { 338 | LOG.warn("failed to send request[path:{},args:{}] to [{}]: {}", req.getPath(), req.asMap(), node, exp); 339 | result.put(node.getAddress().toString(), "err :" + exp.getMessage()); 340 | countDownLatch.countDown(); 341 | } 342 | 343 | @Override 344 | public Executor executor() { 345 | return TransportResponseHandler.TRANSPORT_WORKER; 346 | } 347 | }); 348 | } 349 | 350 | try { 351 | countDownLatch.await(20, TimeUnit.SECONDS); 352 | } catch (Exception e) { 353 | LOG.error("failed to send request[path:{},args:{}] nodes [{}]: {}", req.getPath(), req.asMap(), nodes, e); 354 | } 355 | 356 | return new AnsjResponse(result); 357 | } 358 | 359 | /** 360 | * 执行更新词典 361 | */ 362 | private AnsjResponse flushDic(AnsjRequest request) { 363 | 364 | Map params = request.asMap(); 365 | 366 | LOG.info("to flush {}", params); 367 | 368 | String key = (String) params.get("key"); 369 | 370 | try { 371 | if (key.startsWith(DicLibrary.DEFAULT) || 372 | key.startsWith(StopLibrary.DEFAULT) || 373 | key.startsWith(SynonymsLibrary.DEFAULT) || 374 | key.startsWith(AmbiguityLibrary.DEFAULT) || 375 | key.startsWith(CrfLibrary.DEFAULT)) { 376 | this.cfg.reloadLibrary(key); 377 | } else if ("ansj_config".equals(key)) { 378 | this.cfg.reloadConfig(); 379 | } else { 380 | return new AnsjResponse().put("status", "not find any by " + key); 381 | } 382 | 383 | LOG.info("flush {} ok", key); 384 | return new AnsjResponse().put("status", MESSAGE); 385 | } catch (Exception e) { 386 | e.printStackTrace(); 387 | LOG.error("flush {} err: {}", key, e); 388 | return new AnsjResponse().put("status", e.getMessage()); 389 | } 390 | } 391 | 392 | @Override 393 | protected Writeable.Reader getResponseReader() { 394 | return AnsjResponse::new; 395 | } 396 | 397 | @Override 398 | protected boolean resolveIndex(AnsjRequest request) { 399 | return false; 400 | } 401 | 402 | @Override 403 | protected ShardsIterator shards(ClusterState clusterState, InternalRequest internalRequest) { 404 | //just execute local 405 | return null; 406 | } 407 | 408 | @Override 409 | protected ClusterBlockException checkRequestBlock(ClusterState state, InternalRequest request) { 410 | return null; 411 | } 412 | } 413 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/cat/AnalyzerCatAction.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.cat; 2 | 3 | import org.ansj.library.AmbiguityLibrary; 4 | import org.ansj.library.CrfLibrary; 5 | import org.ansj.library.DicLibrary; 6 | import org.ansj.library.StopLibrary; 7 | import org.ansj.library.SynonymsLibrary; 8 | import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction; 9 | import org.elasticsearch.client.internal.node.NodeClient; 10 | import org.elasticsearch.common.Table; 11 | import org.elasticsearch.rest.RestRequest; 12 | import org.elasticsearch.rest.RestResponse; 13 | import org.elasticsearch.rest.action.RestResponseListener; 14 | import org.elasticsearch.rest.action.cat.AbstractCatAction; 15 | import org.nlpcn.commons.lang.util.StringUtil; 16 | 17 | import java.util.Arrays; 18 | import java.util.Collections; 19 | import java.util.HashSet; 20 | import java.util.List; 21 | import java.util.Set; 22 | 23 | import static java.util.Arrays.asList; 24 | import static java.util.Collections.unmodifiableList; 25 | 26 | /** 27 | * 分词的cat 28 | * Created by zhangqinghua on 16/2/2. 29 | */ 30 | public class AnalyzerCatAction extends AbstractCatAction { 31 | 32 | @Override 33 | public String getName() { 34 | return "ansj_cat_analyzer_action"; 35 | } 36 | 37 | @Override 38 | public List routes() { 39 | return unmodifiableList(asList( 40 | new Route(RestRequest.Method.GET, "/_cat/analyze"), 41 | new Route(RestRequest.Method.GET, "/_cat/{index}/analyze"))); 42 | } 43 | 44 | @Override 45 | protected RestChannelConsumer doCatRequest(RestRequest request, NodeClient client) { 46 | String[] texts = request.paramAsStringArrayOrEmptyIfAll("text"); 47 | 48 | AnalyzeAction.Request analyzeRequest = new AnalyzeAction.Request(request.param("index")); 49 | analyzeRequest.field(request.param("field")); 50 | 51 | String tokenizer = request.param("tokenizer"); 52 | if (StringUtil.isNotBlank(tokenizer)) { 53 | analyzeRequest.tokenizer(tokenizer); 54 | } 55 | 56 | if (texts == null || texts.length == 0) { 57 | analyzeRequest.text("null"); 58 | analyzeRequest.analyzer("index_ansj"); 59 | return channel -> client.admin().indices().analyze(analyzeRequest, new RestResponseListener(channel) { 60 | @Override 61 | public RestResponse buildResponse(final AnalyzeAction.Response analyzeResponse) throws Exception { 62 | return ChineseRestTable.response(channel, 63 | "err args example : /_cat/analyze?text=中国&analyzer=index_ansj, other params: [field,tokenizer,token_filters,char_filters]"); 64 | } 65 | }); 66 | } else { 67 | analyzeRequest.text(texts); 68 | analyzeRequest.analyzer(request.param("analyzer")); 69 | 70 | String[] filters = request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", new String[0])); 71 | for (String filter : filters) { 72 | analyzeRequest.addTokenFilter(filter); 73 | } 74 | 75 | filters = request.paramAsStringArray("char_filters", new String[0]); 76 | for (String filter : filters) { 77 | analyzeRequest.addCharFilter(filter); 78 | } 79 | 80 | return channel -> client.admin().indices().analyze(analyzeRequest, new RestResponseListener(channel) { 81 | @Override 82 | public RestResponse buildResponse(final AnalyzeAction.Response analyzeResponse) throws Exception { 83 | return ChineseRestTable.buildResponse(buildTable(analyzeResponse, request), channel); 84 | } 85 | }); 86 | } 87 | } 88 | 89 | @Override 90 | protected void documentation(StringBuilder stringBuilder) { 91 | stringBuilder.append("/_cat/analyze\n"); 92 | } 93 | 94 | @Override 95 | protected Table getTableWithHeader(RestRequest restRequest) { 96 | final Table table = new Table(); 97 | table.startHeaders(); 98 | table.addCell("term", "alias:t;desc:term;text-align:left"); 99 | table.addCell("start_offset", "alias:s;desc:start_offset;text-align:left"); 100 | table.addCell("end_offset", "alias:e;desc:end_offset;text-align:left"); 101 | table.addCell("position", "alias:p;desc:position;text-align:left"); 102 | table.addCell("type", "alias:t;desc:type;text-align:left"); 103 | table.endHeaders(); 104 | return table; 105 | } 106 | 107 | @Override 108 | protected Set responseParams() { 109 | Set responseParams = new HashSet<>(super.responseParams()); 110 | responseParams.addAll(Arrays.asList("text", "index", "field", "analyzer", "tokenizer", "filters", "token_filters", "char_filters", "type", "key", 111 | "isNameRecognition", "isNumRecognition", "isQuantifierRecognition", "isRealName", "isSkipUserDefine", 112 | CrfLibrary.DEFAULT, DicLibrary.DEFAULT, AmbiguityLibrary.DEFAULT, StopLibrary.DEFAULT, SynonymsLibrary.DEFAULT)); 113 | return Collections.unmodifiableSet(responseParams); 114 | } 115 | 116 | private Table buildTable(final AnalyzeAction.Response analyzeResponse, final RestRequest request) { 117 | Table t = getTableWithHeader(request); 118 | for (AnalyzeAction.AnalyzeToken token : analyzeResponse.getTokens()) { 119 | t.startRow(); 120 | t.addCell(token.getTerm()); 121 | t.addCell(token.getStartOffset()); 122 | t.addCell(token.getEndOffset()); 123 | t.addCell(token.getPosition()); 124 | t.addCell(token.getType()); 125 | t.endRow(); 126 | } 127 | return t; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/cat/AnsjCatAction.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.cat; 2 | 3 | import org.ansj.elasticsearch.action.AnsjAction; 4 | import org.ansj.elasticsearch.action.AnsjRequest; 5 | import org.ansj.elasticsearch.action.AnsjResponse; 6 | import org.ansj.library.AmbiguityLibrary; 7 | import org.ansj.library.CrfLibrary; 8 | import org.ansj.library.DicLibrary; 9 | import org.ansj.library.StopLibrary; 10 | import org.ansj.library.SynonymsLibrary; 11 | import org.elasticsearch.client.internal.node.NodeClient; 12 | import org.elasticsearch.common.Table; 13 | import org.elasticsearch.rest.RestRequest; 14 | import org.elasticsearch.rest.RestResponse; 15 | import org.elasticsearch.rest.action.RestResponseListener; 16 | import org.elasticsearch.rest.action.cat.AbstractCatAction; 17 | 18 | import java.util.Arrays; 19 | import java.util.Collections; 20 | import java.util.HashSet; 21 | import java.util.List; 22 | import java.util.Set; 23 | 24 | import static java.util.Arrays.asList; 25 | import static java.util.Collections.unmodifiableList; 26 | 27 | /** 28 | * Created by zhangqinghua on 16/2/2. 29 | */ 30 | public class AnsjCatAction extends AbstractCatAction { 31 | 32 | @Override 33 | public String getName() { 34 | return "ansj_cat_action"; 35 | } 36 | 37 | @Override 38 | public List routes() { 39 | return unmodifiableList(asList( 40 | new Route(RestRequest.Method.GET, "/_cat/ansj"), 41 | new Route(RestRequest.Method.GET, "/_cat/ansj/config"), 42 | new Route(RestRequest.Method.GET, "/_ansj/flush/config"), 43 | new Route(RestRequest.Method.GET, "/_ansj/flush/config/single"), 44 | new Route(RestRequest.Method.GET, "/_ansj/flush/dic"), 45 | new Route(RestRequest.Method.GET, "/_ansj/flush/dic/single"))); 46 | } 47 | 48 | @Override 49 | protected RestChannelConsumer doCatRequest(RestRequest request, NodeClient client) { 50 | AnsjRequest ansjRequest = new AnsjRequest(request.path()); 51 | 52 | ansjRequest.asMap().putAll(request.params()); 53 | 54 | return channel -> client.execute(AnsjAction.INSTANCE, ansjRequest, new RestResponseListener(channel) { 55 | @Override 56 | public RestResponse buildResponse(final AnsjResponse ansjResponse) throws Exception { 57 | return ChineseRestTable.response(channel, ansjResponse.asMap()); 58 | } 59 | }); 60 | } 61 | 62 | @Override 63 | protected Set responseParams() { 64 | Set responseParams = new HashSet<>(super.responseParams()); 65 | responseParams.addAll(Arrays.asList("text", "index", "field", "analyzer", "tokenizer", "filters", "token_filters", "char_filters", "type", "key", 66 | "isNameRecognition", "isNumRecognition", "isQuantifierRecognition", "isRealName", "isSkipUserDefine", 67 | CrfLibrary.DEFAULT, DicLibrary.DEFAULT, AmbiguityLibrary.DEFAULT, StopLibrary.DEFAULT, SynonymsLibrary.DEFAULT)); 68 | return Collections.unmodifiableSet(responseParams); 69 | } 70 | 71 | @Override 72 | protected void documentation(StringBuilder stringBuilder) { 73 | 74 | } 75 | 76 | @Override 77 | protected Table getTableWithHeader(RestRequest restRequest) { 78 | return null; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/cat/ChineseRestTable.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.cat; 2 | 3 | /* 4 | * Licensed to Elasticsearch under one or more contributor 5 | * license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright 7 | * ownership. Elasticsearch licenses this file to you under 8 | * the Apache License, Version 2.0 (the "License"); you may 9 | * not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, 15 | * software distributed under the License is distributed on an 16 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | * KIND, either express or implied. See the License for the 18 | * specific language governing permissions and limitations 19 | * under the License. 20 | */ 21 | 22 | import org.elasticsearch.common.Strings; 23 | import org.elasticsearch.common.Table; 24 | import org.elasticsearch.common.io.Streams; 25 | import org.elasticsearch.common.io.stream.BytesStream; 26 | import org.elasticsearch.common.regex.Regex; 27 | import org.elasticsearch.common.unit.ByteSizeValue; 28 | import org.elasticsearch.common.unit.SizeValue; 29 | import org.elasticsearch.core.Booleans; 30 | import org.elasticsearch.core.TimeValue; 31 | import org.elasticsearch.rest.RestChannel; 32 | import org.elasticsearch.rest.RestRequest; 33 | import org.elasticsearch.rest.RestResponse; 34 | import org.elasticsearch.rest.RestStatus; 35 | import org.elasticsearch.xcontent.XContentBuilder; 36 | import org.elasticsearch.xcontent.XContentType; 37 | 38 | import java.io.IOException; 39 | import java.io.OutputStreamWriter; 40 | import java.nio.charset.StandardCharsets; 41 | import java.util.ArrayList; 42 | import java.util.LinkedHashSet; 43 | import java.util.List; 44 | import java.util.Map; 45 | import java.util.Set; 46 | 47 | /** 48 | */ 49 | public class ChineseRestTable { 50 | 51 | public static RestResponse buildResponse(Table table, RestChannel channel) throws Exception { 52 | RestRequest request = channel.request(); 53 | XContentType xContentType = XContentType 54 | .fromMediaType(request.param("format", request.header("Content-Type"))); 55 | if (xContentType != null) { 56 | return buildXContentBuilder(table, channel); 57 | } 58 | return buildTextPlainResponse(table, channel); 59 | } 60 | 61 | public static RestResponse response(RestChannel channel, String text) throws IOException { 62 | BytesStream bytesOut = Streams.flushOnCloseStream(channel.bytesOutput()); 63 | try (OutputStreamWriter out = new OutputStreamWriter(bytesOut, StandardCharsets.UTF_8)) { 64 | out.append(text); 65 | } 66 | return new RestResponse(RestStatus.OK, RestResponse.TEXT_CONTENT_TYPE, bytesOut.bytes()); 67 | } 68 | 69 | public static RestResponse response(RestChannel channel, Map map) throws IOException { 70 | try (XContentBuilder builder = channel.newBuilder()) { 71 | builder.map(map); 72 | return new RestResponse(RestStatus.OK, builder); 73 | } 74 | } 75 | 76 | public static RestResponse buildXContentBuilder(Table table, RestChannel channel) throws Exception { 77 | try (XContentBuilder builder = channel.newBuilder()) { 78 | RestRequest request = channel.request(); 79 | List displayHeaders = buildDisplayHeaders(table, request); 80 | 81 | builder.startArray(); 82 | for (int row = 0; row < table.getRows().size(); row++) { 83 | builder.startObject(); 84 | for (DisplayHeader header : displayHeaders) { 85 | builder.field(header.display, renderValue(request, table.getAsMap().get(header.name).get(row).value)); 86 | } 87 | builder.endObject(); 88 | } 89 | builder.endArray(); 90 | return new RestResponse(RestStatus.OK, builder); 91 | } 92 | } 93 | 94 | public static RestResponse buildTextPlainResponse(Table table, RestChannel channel) throws IOException { 95 | RestRequest request = channel.request(); 96 | boolean verbose = request.paramAsBoolean("v", false); 97 | 98 | List headers = buildDisplayHeaders(table, request); 99 | int[] width = buildWidths(table, request, verbose, headers); 100 | 101 | BytesStream bytesOut = Streams.flushOnCloseStream(channel.bytesOutput()); 102 | try (OutputStreamWriter out = new OutputStreamWriter(bytesOut, StandardCharsets.UTF_8)) { 103 | if (verbose) { 104 | for (int col = 0; col < headers.size(); col++) { 105 | DisplayHeader header = headers.get(col); 106 | pad(new Table.Cell(header.display, table.findHeaderByName(header.name)), width[col], request, out); 107 | out.append("\t\t"); 108 | } 109 | out.append("\n"); 110 | } 111 | for (int row = 0; row < table.getRows().size(); row++) { 112 | for (int col = 0; col < headers.size(); col++) { 113 | DisplayHeader header = headers.get(col); 114 | pad(table.getAsMap().get(header.name).get(row), width[col], request, out); 115 | out.append("\t\t"); 116 | } 117 | out.append("\n"); 118 | } 119 | } 120 | return new RestResponse(RestStatus.OK, RestResponse.TEXT_CONTENT_TYPE, bytesOut.bytes()); 121 | } 122 | 123 | private static List buildDisplayHeaders(Table table, RestRequest request) { 124 | List display = new ArrayList<>(); 125 | if (request.hasParam("h")) { 126 | Set headers = expandHeadersFromRequest(table, request); 127 | 128 | for (String possibility : headers) { 129 | DisplayHeader dispHeader = null; 130 | 131 | if (table.getAsMap().containsKey(possibility)) { 132 | dispHeader = new DisplayHeader(possibility, possibility); 133 | } else { 134 | for (Table.Cell headerCell : table.getHeaders()) { 135 | String aliases = headerCell.attr.get("alias"); 136 | if (aliases != null) { 137 | for (String alias : Strings.splitStringByCommaToArray(aliases)) { 138 | if (possibility.equals(alias)) { 139 | dispHeader = new DisplayHeader(headerCell.value.toString(), alias); 140 | break; 141 | } 142 | } 143 | } 144 | } 145 | } 146 | 147 | if (dispHeader != null) { 148 | // We know we need the header asked for: 149 | display.add(dispHeader); 150 | 151 | // Look for accompanying sibling column 152 | Table.Cell hcell = table.getHeaderMap().get(dispHeader.name); 153 | String siblingFlag = hcell.attr.get("sibling"); 154 | if (siblingFlag != null) { 155 | // ...link the sibling and check that its flag is set 156 | String sibling = siblingFlag + "." + dispHeader.name; 157 | Table.Cell c = table.getHeaderMap().get(sibling); 158 | if (c != null && request.paramAsBoolean(siblingFlag, false)) { 159 | display.add(new DisplayHeader(c.value.toString(), siblingFlag + "." + dispHeader.display)); 160 | } 161 | } 162 | } 163 | } 164 | } else { 165 | for (Table.Cell cell : table.getHeaders()) { 166 | String d = cell.attr.get("default"); 167 | if (Booleans.parseBoolean(d, true)) { 168 | display.add(new DisplayHeader(cell.value.toString(), cell.value.toString())); 169 | } 170 | } 171 | } 172 | return display; 173 | } 174 | 175 | /** 176 | * Extracts all the required fields from the RestRequest 'h' parameter. In 177 | * order to support wildcards like 'bulk.*' this needs potentially parse all 178 | * the configured headers and its aliases and needs to ensure that 179 | * everything is only added once to the returned headers, even if 180 | * 'h=bulk.*.bulk.*' is specified or some headers are contained twice due to 181 | * matching aliases 182 | */ 183 | private static Set expandHeadersFromRequest(Table table, RestRequest request) { 184 | Set headers = new LinkedHashSet<>(table.getHeaders().size()); 185 | 186 | // check headers and aliases 187 | for (String header : Strings.splitStringByCommaToArray(request.param("h"))) { 188 | if (Regex.isSimpleMatchPattern(header)) { 189 | for (Table.Cell tableHeaderCell : table.getHeaders()) { 190 | String configuredHeader = tableHeaderCell.value.toString(); 191 | if (Regex.simpleMatch(header, configuredHeader)) { 192 | headers.add(configuredHeader); 193 | } else if (tableHeaderCell.attr.containsKey("alias")) { 194 | String[] aliases = Strings.splitStringByCommaToArray(tableHeaderCell.attr.get("alias")); 195 | for (String alias : aliases) { 196 | if (Regex.simpleMatch(header, alias)) { 197 | headers.add(configuredHeader); 198 | break; 199 | } 200 | } 201 | } 202 | } 203 | } else { 204 | headers.add(header); 205 | } 206 | } 207 | 208 | return headers; 209 | } 210 | 211 | public static int[] buildHelpWidths(Table table, RestRequest request) { 212 | int[] width = new int[3]; 213 | for (Table.Cell cell : table.getHeaders()) { 214 | String v = renderValue(request, cell.value); 215 | int vWidth = v == null ? 0 : v.length(); 216 | if (width[0] < vWidth) { 217 | width[0] = vWidth; 218 | } 219 | 220 | v = renderValue(request, cell.attr.containsKey("alias") ? cell.attr.get("alias") : ""); 221 | vWidth = v == null ? 0 : v.length(); 222 | if (width[1] < vWidth) { 223 | width[1] = vWidth; 224 | } 225 | 226 | v = renderValue(request, cell.attr.containsKey("desc") ? cell.attr.get("desc") : "not available"); 227 | vWidth = v == null ? 0 : v.length(); 228 | if (width[2] < vWidth) { 229 | width[2] = vWidth; 230 | } 231 | } 232 | return width; 233 | } 234 | 235 | private static int[] buildWidths(Table table, RestRequest request, boolean verbose, List headers) { 236 | int[] width = new int[headers.size()]; 237 | int i; 238 | 239 | if (verbose) { 240 | i = 0; 241 | for (DisplayHeader hdr : headers) { 242 | int vWidth = hdr.display.length(); 243 | if (width[i] < vWidth) { 244 | width[i] = vWidth; 245 | } 246 | i++; 247 | } 248 | } 249 | 250 | i = 0; 251 | for (DisplayHeader hdr : headers) { 252 | for (Table.Cell cell : table.getAsMap().get(hdr.name)) { 253 | String v = renderValue(request, cell.value); 254 | int vWidth = v == null ? 0 : v.length(); 255 | if (width[i] < vWidth) { 256 | width[i] = vWidth; 257 | } 258 | } 259 | i++; 260 | } 261 | return width; 262 | } 263 | 264 | public static void pad(Table.Cell cell, int width, RestRequest request, OutputStreamWriter out) throws IOException { 265 | String sValue = renderValue(request, cell.value); 266 | int length = sValue == null ? 0 : sValue.length(); 267 | byte leftOver = (byte) (width - length); 268 | String textAlign = cell.attr.get("text-align"); 269 | if (textAlign == null) { 270 | textAlign = "left"; 271 | } 272 | if (leftOver > 0 && textAlign.equals("right")) { 273 | for (byte i = 0; i < leftOver; i++) { 274 | out.append(" "); 275 | } 276 | if (sValue != null) { 277 | out.append(sValue); 278 | } 279 | } else { 280 | if (sValue != null) { 281 | out.append(sValue); 282 | } 283 | for (byte i = 0; i < leftOver; i++) { 284 | out.append(" "); 285 | } 286 | } 287 | } 288 | 289 | private static String renderValue(RestRequest request, Object value) { 290 | if (value == null) { 291 | return null; 292 | } 293 | if (value instanceof ByteSizeValue) { 294 | ByteSizeValue v = (ByteSizeValue) value; 295 | String resolution = request.param("bytes"); 296 | if ("b".equals(resolution)) { 297 | return Long.toString(v.getBytes()); 298 | } else if ("k".equals(resolution)) { 299 | return Long.toString(v.getKb()); 300 | } else if ("m".equals(resolution)) { 301 | return Long.toString(v.getMb()); 302 | } else if ("g".equals(resolution)) { 303 | return Long.toString(v.getGb()); 304 | } else if ("t".equals(resolution)) { 305 | return Long.toString(v.getTb()); 306 | } else if ("p".equals(resolution)) { 307 | return Long.toString(v.getPb()); 308 | } else { 309 | return v.toString(); 310 | } 311 | } 312 | if (value instanceof SizeValue) { 313 | SizeValue v = (SizeValue) value; 314 | String resolution = request.param("size"); 315 | if ("b".equals(resolution)) { 316 | return Long.toString(v.singles()); 317 | } else if ("k".equals(resolution)) { 318 | return Long.toString(v.kilo()); 319 | } else if ("m".equals(resolution)) { 320 | return Long.toString(v.mega()); 321 | } else if ("g".equals(resolution)) { 322 | return Long.toString(v.giga()); 323 | } else if ("t".equals(resolution)) { 324 | return Long.toString(v.tera()); 325 | } else if ("p".equals(resolution)) { 326 | return Long.toString(v.peta()); 327 | } else { 328 | return v.toString(); 329 | } 330 | } 331 | if (value instanceof TimeValue) { 332 | TimeValue v = (TimeValue) value; 333 | String resolution = request.param("time"); 334 | if ("ms".equals(resolution)) { 335 | return Long.toString(v.millis()); 336 | } else if ("s".equals(resolution)) { 337 | return Long.toString(v.seconds()); 338 | } else if ("m".equals(resolution)) { 339 | return Long.toString(v.minutes()); 340 | } else if ("h".equals(resolution)) { 341 | return Long.toString(v.hours()); 342 | } else { 343 | return v.toString(); 344 | } 345 | } 346 | // Add additional built in data points we can render based on request 347 | // parameters? 348 | return value.toString(); 349 | } 350 | 351 | static class DisplayHeader { 352 | public final String name; 353 | public final String display; 354 | 355 | DisplayHeader(String name, String display) { 356 | this.name = name; 357 | this.display = display; 358 | } 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/index/analysis/AnsjAnalyzerProvider.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.index.analysis; 2 | 3 | import org.ansj.elasticsearch.index.config.AnsjElasticConfigurator; 4 | import org.ansj.lucene9.AnsjAnalyzer; 5 | import org.apache.logging.log4j.LogManager; 6 | import org.apache.logging.log4j.Logger; 7 | import org.elasticsearch.common.settings.Settings; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; 10 | import org.elasticsearch.injection.guice.Inject; 11 | 12 | import java.util.Map; 13 | import java.util.stream.Collectors; 14 | 15 | public class AnsjAnalyzerProvider extends AbstractIndexAnalyzerProvider { 16 | 17 | private static final Logger LOG = LogManager.getLogger(); 18 | 19 | private final AnsjAnalyzer analyzer; 20 | 21 | @Inject 22 | public AnsjAnalyzerProvider(IndexSettings indexSettings, String name, Settings settings) { 23 | super(name); 24 | 25 | Settings settings2 = indexSettings.getSettings().getAsSettings("index.analysis.tokenizer." + name()); 26 | 27 | Map args = settings2.keySet().stream().collect(Collectors.toMap(k -> k, settings2::get)); 28 | if (args.isEmpty()) { 29 | args.putAll(AnsjElasticConfigurator.getDefaults()); 30 | args.put("type", name()); 31 | } 32 | 33 | LOG.debug("instance analyzer settings : {}", args); 34 | 35 | analyzer = new AnsjAnalyzer(args); 36 | } 37 | 38 | @Override 39 | public AnsjAnalyzer get() { 40 | return analyzer; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/index/analysis/AnsjTokenizerTokenizerFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Elasticsearch under one or more contributor 3 | * license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright 5 | * ownership. Elasticsearch licenses this file to you under 6 | * the Apache License, Version 2.0 (the "License"); you may 7 | * not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.ansj.elasticsearch.index.analysis; 21 | 22 | import org.ansj.elasticsearch.index.config.AnsjElasticConfigurator; 23 | import org.ansj.lucene9.AnsjAnalyzer; 24 | import org.apache.logging.log4j.LogManager; 25 | import org.apache.logging.log4j.Logger; 26 | import org.apache.lucene.analysis.Tokenizer; 27 | import org.elasticsearch.common.settings.Settings; 28 | import org.elasticsearch.index.IndexSettings; 29 | import org.elasticsearch.index.analysis.AbstractTokenizerFactory; 30 | import org.elasticsearch.injection.guice.Inject; 31 | 32 | import java.util.Map; 33 | import java.util.stream.Collectors; 34 | 35 | public class AnsjTokenizerTokenizerFactory extends AbstractTokenizerFactory { 36 | 37 | private static final Logger LOG = LogManager.getLogger(); 38 | 39 | private final IndexSettings indexSettings; 40 | 41 | @Inject 42 | public AnsjTokenizerTokenizerFactory(IndexSettings indexSettings, String name, Settings settings) { 43 | super(name); 44 | 45 | this.indexSettings = indexSettings; 46 | } 47 | 48 | @Override 49 | public Tokenizer create() { 50 | Settings settings = indexSettings.getSettings().getAsSettings("index.analysis.tokenizer." + name()); 51 | 52 | Map args = settings.keySet().stream().collect(Collectors.toMap(k -> k, settings::get)); 53 | if (args.isEmpty()) { 54 | args.putAll(AnsjElasticConfigurator.getDefaults()); 55 | args.put("type", name()); 56 | } 57 | 58 | LOG.debug("instance tokenizer settings : {}", args); 59 | 60 | return AnsjAnalyzer.getTokenizer(null, args); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/index/config/AnsjElasticConfigurator.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.index.config; 2 | 3 | import org.ansj.dic.PathToStream; 4 | import org.ansj.library.AmbiguityLibrary; 5 | import org.ansj.library.CrfLibrary; 6 | import org.ansj.library.DicLibrary; 7 | import org.ansj.library.StopLibrary; 8 | import org.ansj.library.SynonymsLibrary; 9 | import org.ansj.recognition.impl.StopRecognition; 10 | import org.ansj.splitWord.analysis.ToAnalysis; 11 | import org.ansj.util.MyStaticValue; 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | import org.elasticsearch.SpecialPermission; 15 | import org.elasticsearch.common.collect.ImmutableOpenMap; 16 | import org.elasticsearch.common.settings.Settings; 17 | import org.elasticsearch.env.Environment; 18 | import org.nlpcn.commons.lang.tire.domain.SmartForest; 19 | import org.nlpcn.commons.lang.util.IOUtil; 20 | import org.nlpcn.commons.lang.util.StringUtil; 21 | 22 | import java.io.BufferedReader; 23 | import java.io.File; 24 | import java.io.IOException; 25 | import java.nio.file.Files; 26 | import java.nio.file.Path; 27 | import java.nio.file.Paths; 28 | import java.security.AccessController; 29 | import java.security.PrivilegedAction; 30 | import java.util.Map; 31 | import java.util.Optional; 32 | import java.util.stream.Collectors; 33 | 34 | public class AnsjElasticConfigurator { 35 | 36 | private static final Logger LOG = LogManager.getLogger("ansj-initializer"); 37 | 38 | private static final String CONFIG_FILE_NAME = "ansj.cfg.yml"; 39 | 40 | private String path; 41 | 42 | private Settings ansjSettings; 43 | 44 | private File configDir; 45 | 46 | private final Environment env; 47 | 48 | public AnsjElasticConfigurator(Environment env) { 49 | this.env = env; 50 | 51 | // 52 | init(); 53 | 54 | // 进行一次测试分词 55 | preheat(); 56 | 57 | LOG.info("init ansj plugin ok , goodluck youyou"); 58 | } 59 | 60 | private void init() { 61 | Path configFilePath = env.configDir().resolve("elasticsearch-analysis-ansj").resolve(CONFIG_FILE_NAME); 62 | LOG.info("try to load ansj config file: {}", configFilePath); 63 | if (!Files.exists(configFilePath)) { 64 | configFilePath = Paths.get(new File(AnsjElasticConfigurator.class.getProtectionDomain().getCodeSource().getLocation().getPath()).getParent(), "config").resolve(CONFIG_FILE_NAME); 65 | LOG.info("try to load ansj config file: {}", configFilePath); 66 | } 67 | Settings.Builder builder = Settings.builder(); 68 | if (Files.exists(configFilePath)) { 69 | try { 70 | builder.loadFromPath(configFilePath); 71 | LOG.info("load ansj config: {}", configFilePath); 72 | } catch (IOException e) { 73 | LOG.error("load ansj config[{}] error: {}", configFilePath, e); 74 | } 75 | } else { 76 | LOG.warn("can't find ansj config file"); 77 | } 78 | 79 | Settings settings = builder.build(); 80 | path = settings.get("ansj_config"); 81 | ansjSettings = settings.getAsSettings("ansj"); 82 | configDir = env.configDir().toFile(); 83 | 84 | flushConfig(); 85 | } 86 | 87 | private void flushConfig() { 88 | MyStaticValue.ENV.clear(); 89 | 90 | // ansj.cfg.yml文件,插入到变量中 91 | if (ansjSettings != null && !ansjSettings.isEmpty()) { 92 | MyStaticValue.ENV.putAll(ansjSettings.keySet().stream().collect(Collectors.toMap(k -> k, ansjSettings::get))); 93 | } 94 | 95 | // ansj.cfg.yml文件中ansj_config指定的文件或者配置文件目录下的ansj_library.properties 96 | if (path != null) { 97 | initConfig(path, true); 98 | } else { 99 | initConfig(new File(configDir, "ansj_library.properties").getAbsolutePath(), false); 100 | } 101 | 102 | // 设置全局变量 103 | setGlobalVar(MyStaticValue.ENV); 104 | 105 | // 加载词典 106 | for (String k : MyStaticValue.ENV.keySet().toArray(new String[0])) { 107 | reloadLibrary(k); 108 | } 109 | } 110 | 111 | /** 112 | * 读取配置文件并将配置放入MyStaticValue.ENV 113 | * 114 | * @param path 115 | * @param printErr 116 | */ 117 | private void initConfig(String path, boolean printErr) { 118 | SpecialPermission.check(); 119 | AccessController.doPrivileged((PrivilegedAction) () -> { 120 | try (BufferedReader br = IOUtil.getReader(PathToStream.stream(path), "utf-8")) { 121 | String temp; 122 | int index; 123 | while ((temp = br.readLine()) != null) { 124 | if (StringUtil.isBlank(temp) || temp.trim().charAt(0) == '#' || !temp.contains("=")) { 125 | continue; 126 | } 127 | 128 | index = temp.indexOf('='); 129 | 130 | MyStaticValue.ENV.put(temp.substring(0, index).trim(), temp.substring(index + 1).trim()); 131 | } 132 | } catch (Exception e) { 133 | if (printErr) { 134 | LOG.error("{} load err: {}", path, e); 135 | } else { 136 | LOG.warn("{} load err", path); 137 | } 138 | } 139 | return null; 140 | }); 141 | } 142 | 143 | private void preheat() { 144 | ToAnalysis.parse("这是一个基于ansj的分词插件"); 145 | } 146 | 147 | /** 148 | * 设置一些全局变量 149 | * 150 | * @param map 151 | */ 152 | private void setGlobalVar(Map map) { 153 | // 是否开启人名识别 154 | if (map.containsKey("isNameRecognition")) { 155 | MyStaticValue.isNameRecognition = Boolean.valueOf(map.get("isNameRecognition")); 156 | } 157 | 158 | // 是否开启数字识别 159 | if (map.containsKey("isNumRecognition")) { 160 | MyStaticValue.isNumRecognition = Boolean.valueOf(map.get("isNumRecognition")); 161 | } 162 | 163 | // 是否数字和量词合并 164 | if (map.containsKey("isQuantifierRecognition")) { 165 | MyStaticValue.isQuantifierRecognition = Boolean.valueOf(map.get("isQuantifierRecognition")); 166 | } 167 | 168 | // 是否显示真实词语 169 | if (map.containsKey("isRealName")) { 170 | MyStaticValue.isRealName = Boolean.valueOf(map.get("isRealName")); 171 | } 172 | 173 | // 是否用户词典不加载相同的词 174 | if (map.containsKey("isSkipUserDefine")) { 175 | MyStaticValue.isSkipUserDefine = Boolean.parseBoolean(map.get("isSkipUserDefine")); 176 | } 177 | } 178 | 179 | /** 180 | * 重新加载配置 181 | * 如果词典正在使用,重新加载 182 | * 如果删除了正在使用的词典,清空 183 | */ 184 | public void reloadConfig() { 185 | init(); 186 | LOG.info("reload ansj plugin config successfully"); 187 | 188 | for (String key : DicLibrary.keys()) { 189 | if (!MyStaticValue.ENV.containsKey(key)) { 190 | Optional.ofNullable(DicLibrary.get(key)).ifPresent(SmartForest::clear); 191 | 192 | LOG.info("clear DicLibrary: {}", key); 193 | } 194 | } 195 | 196 | for (String key : StopLibrary.keys()) { 197 | if (!MyStaticValue.ENV.containsKey(key)) { 198 | Optional.ofNullable(StopLibrary.get(key)).ifPresent(StopRecognition::clear); 199 | 200 | LOG.info("clear StopLibrary: {}", key); 201 | } 202 | } 203 | 204 | for (String key : SynonymsLibrary.keys()) { 205 | if (!MyStaticValue.ENV.containsKey(key)) { 206 | Optional.ofNullable(SynonymsLibrary.get(key)).ifPresent(SmartForest::clear); 207 | 208 | LOG.info("clear SynonymsLibrary: {}", key); 209 | } 210 | } 211 | 212 | for (String key : AmbiguityLibrary.keys()) { 213 | if (!MyStaticValue.ENV.containsKey(key)) { 214 | Optional.ofNullable(AmbiguityLibrary.get(key)).ifPresent(SmartForest::clear); 215 | 216 | LOG.info("clear AmbiguityLibrary: {}", key); 217 | } 218 | } 219 | } 220 | 221 | /** 222 | * 重新加载词典,CRF词典有待处理 223 | * 如果是正在使用的词典,重新加载 224 | * 如果是已删除的并且还在使用的,清空 225 | * 226 | * @param key 227 | */ 228 | public void reloadLibrary(String key) { 229 | SpecialPermission.check(); 230 | AccessController.doPrivileged((PrivilegedAction) () -> { 231 | if (key.startsWith(DicLibrary.DEFAULT)) { 232 | if (MyStaticValue.ENV.containsKey(key)) { 233 | DicLibrary.reload(key); 234 | LOG.info("reload DicLibrary: {}", key); 235 | } else if (DicLibrary.keys().contains(key)) { 236 | Optional.ofNullable(DicLibrary.get(key)).ifPresent(SmartForest::clear); 237 | LOG.info("clear DicLibrary: {}", key); 238 | } else { 239 | LOG.warn("DicLibrary[{}] not found", key); 240 | } 241 | } else if (key.startsWith(StopLibrary.DEFAULT)) { 242 | if (MyStaticValue.ENV.containsKey(key)) { 243 | StopLibrary.reload(key); 244 | LOG.info("reload StopLibrary: {}", key); 245 | } else if (StopLibrary.keys().contains(key)) { 246 | Optional.ofNullable(StopLibrary.get(key)).ifPresent(StopRecognition::clear); 247 | LOG.info("clear StopLibrary: {}", key); 248 | } else { 249 | LOG.warn("StopLibrary[{}] not found", key); 250 | } 251 | } else if (key.startsWith(SynonymsLibrary.DEFAULT)) { 252 | if (MyStaticValue.ENV.containsKey(key)) { 253 | SynonymsLibrary.reload(key); 254 | LOG.info("reload SynonymsLibrary: {}", key); 255 | } else if (SynonymsLibrary.keys().contains(key)) { 256 | Optional.ofNullable(SynonymsLibrary.get(key)).ifPresent(SmartForest::clear); 257 | LOG.info("clear SynonymsLibrary: {}", key); 258 | } else { 259 | LOG.warn("SynonymsLibrary[{}] not found", key); 260 | } 261 | } else if (key.startsWith(AmbiguityLibrary.DEFAULT)) { 262 | if (MyStaticValue.ENV.containsKey(key)) { 263 | AmbiguityLibrary.reload(key); 264 | LOG.info("reload AmbiguityLibrary: {}", key); 265 | } else if (AmbiguityLibrary.keys().contains(key)) { 266 | Optional.ofNullable(AmbiguityLibrary.get(key)).ifPresent(SmartForest::clear); 267 | LOG.info("clear AmbiguityLibrary: {}", key); 268 | } else { 269 | LOG.warn("AmbiguityLibrary[{}] not found", key); 270 | } 271 | } else if (key.startsWith(CrfLibrary.DEFAULT)) { 272 | CrfLibrary.reload(key); 273 | LOG.info("reload CrfLibrary: {}", key); 274 | } 275 | return null; 276 | }); 277 | } 278 | 279 | /** 280 | * 默认配置 281 | */ 282 | public static Map getDefaults() { 283 | return ImmutableOpenMap.builder(10) 284 | // 是否开启人名识别 285 | .fPut("isNameRecognition", MyStaticValue.isNameRecognition.toString()) 286 | // 是否开启数字识别 287 | .fPut("isNumRecognition", MyStaticValue.isNumRecognition.toString()) 288 | // 是否数字和量词合并 289 | .fPut("isQuantifierRecognition", MyStaticValue.isQuantifierRecognition.toString()) 290 | // 是否显示真实词语 291 | .fPut("isRealName", MyStaticValue.isRealName.toString()) 292 | // 是否用户词典不加载相同的词 293 | .fPut("isSkipUserDefine", String.valueOf(MyStaticValue.isSkipUserDefine)) 294 | .fPut(CrfLibrary.DEFAULT, CrfLibrary.DEFAULT) 295 | .fPut(DicLibrary.DEFAULT, DicLibrary.DEFAULT) 296 | .fPut(StopLibrary.DEFAULT, StopLibrary.DEFAULT) 297 | .fPut(SynonymsLibrary.DEFAULT, SynonymsLibrary.DEFAULT) 298 | .fPut(AmbiguityLibrary.DEFAULT, AmbiguityLibrary.DEFAULT) 299 | .build(); 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/plugin/AnalysisAnsjPlugin.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.plugin; 2 | 3 | import org.ansj.elasticsearch.action.AnsjAction; 4 | import org.ansj.elasticsearch.action.TransportAnsjAction; 5 | import org.ansj.elasticsearch.cat.AnalyzerCatAction; 6 | import org.ansj.elasticsearch.cat.AnsjCatAction; 7 | import org.ansj.elasticsearch.index.analysis.AnsjAnalyzerProvider; 8 | import org.ansj.elasticsearch.index.analysis.AnsjTokenizerTokenizerFactory; 9 | import org.ansj.elasticsearch.index.config.AnsjElasticConfigurator; 10 | import org.ansj.elasticsearch.rest.RestAnsjAction; 11 | import org.ansj.lucene9.AnsjAnalyzer; 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | import org.apache.lucene.analysis.Analyzer; 15 | import org.elasticsearch.action.ActionRequest; 16 | import org.elasticsearch.action.ActionResponse; 17 | import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; 18 | import org.elasticsearch.cluster.node.DiscoveryNodes; 19 | import org.elasticsearch.common.io.stream.NamedWriteableRegistry; 20 | import org.elasticsearch.common.settings.ClusterSettings; 21 | import org.elasticsearch.common.settings.IndexScopedSettings; 22 | import org.elasticsearch.common.settings.Settings; 23 | import org.elasticsearch.common.settings.SettingsFilter; 24 | import org.elasticsearch.features.NodeFeature; 25 | import org.elasticsearch.index.analysis.AnalyzerProvider; 26 | import org.elasticsearch.index.analysis.TokenizerFactory; 27 | import org.elasticsearch.indices.analysis.AnalysisModule; 28 | import org.elasticsearch.plugins.ActionPlugin; 29 | import org.elasticsearch.plugins.AnalysisPlugin; 30 | import org.elasticsearch.plugins.Plugin; 31 | import org.elasticsearch.rest.RestController; 32 | import org.elasticsearch.rest.RestHandler; 33 | 34 | import java.util.Arrays; 35 | import java.util.Collection; 36 | import java.util.Collections; 37 | import java.util.HashMap; 38 | import java.util.List; 39 | import java.util.Map; 40 | import java.util.function.Predicate; 41 | import java.util.function.Supplier; 42 | 43 | public class AnalysisAnsjPlugin extends Plugin implements AnalysisPlugin, ActionPlugin { 44 | 45 | private static final Logger LOG = LogManager.getLogger(); 46 | 47 | @Override 48 | public Collection createComponents(PluginServices services) { 49 | return Collections.singletonList(new AnsjElasticConfigurator(services.environment())); 50 | } 51 | 52 | @Override 53 | public Map> getTokenizers() { 54 | 55 | Map> extra = new HashMap<>(); 56 | 57 | for (final AnsjAnalyzer.TYPE type : AnsjAnalyzer.TYPE.values()) { 58 | 59 | extra.put(type.name(), (indexSettings, env, name, settings) -> new AnsjTokenizerTokenizerFactory(indexSettings, name, settings)); 60 | 61 | LOG.info("regedit analyzer tokenizer named : {}", type.name()); 62 | } 63 | 64 | return extra; 65 | } 66 | 67 | @Override 68 | public Map>> getAnalyzers() { 69 | 70 | Map>> extra = new HashMap<>(); 71 | 72 | for (final AnsjAnalyzer.TYPE type : AnsjAnalyzer.TYPE.values()) { 73 | 74 | extra.put(type.name(), (indexSettings, env, name, settings) -> new AnsjAnalyzerProvider(indexSettings, name, settings)); 75 | 76 | LOG.info("regedit analyzer provider named : {}", type.name()); 77 | } 78 | 79 | return extra; 80 | } 81 | 82 | @Override 83 | public List> getActions() { 84 | return Collections.singletonList(new ActionHandler<>(AnsjAction.INSTANCE, TransportAnsjAction.class)); 85 | } 86 | 87 | @Override 88 | public Collection getRestHandlers(Settings settings, NamedWriteableRegistry namedWriteableRegistry, RestController restController, ClusterSettings clusterSettings, IndexScopedSettings indexScopedSettings, SettingsFilter settingsFilter, IndexNameExpressionResolver indexNameExpressionResolver, Supplier nodesInCluster, Predicate clusterSupportsFeature) { 89 | return Arrays.asList(new RestAnsjAction(), new AnalyzerCatAction(), new AnsjCatAction()); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/org/ansj/elasticsearch/rest/RestAnsjAction.java: -------------------------------------------------------------------------------- 1 | package org.ansj.elasticsearch.rest; 2 | 3 | import org.ansj.elasticsearch.action.AnsjAction; 4 | import org.ansj.elasticsearch.action.AnsjRequest; 5 | import org.ansj.library.AmbiguityLibrary; 6 | import org.ansj.library.CrfLibrary; 7 | import org.ansj.library.DicLibrary; 8 | import org.ansj.library.StopLibrary; 9 | import org.ansj.library.SynonymsLibrary; 10 | import org.elasticsearch.client.internal.node.NodeClient; 11 | import org.elasticsearch.rest.BaseRestHandler; 12 | import org.elasticsearch.rest.RestRequest; 13 | import org.elasticsearch.rest.action.RestToXContentListener; 14 | 15 | import java.util.Arrays; 16 | import java.util.Collections; 17 | import java.util.HashSet; 18 | import java.util.List; 19 | import java.util.Set; 20 | 21 | import static java.util.Arrays.asList; 22 | import static java.util.Collections.unmodifiableList; 23 | 24 | /** 25 | * Created by zhangqinghua on 16/2/2. 26 | */ 27 | public class RestAnsjAction extends BaseRestHandler { 28 | 29 | @Override 30 | public String getName() { 31 | return "ansj_action"; 32 | } 33 | 34 | @Override 35 | public List routes() { 36 | return unmodifiableList(asList( 37 | new Route(RestRequest.Method.GET, "/_ansj"), 38 | new Route(RestRequest.Method.POST, "/_ansj"))); 39 | } 40 | 41 | @Override 42 | protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { 43 | AnsjRequest ansjRequest = new AnsjRequest(); 44 | 45 | ansjRequest.asMap().putAll(request.params()); 46 | 47 | return channel -> client.execute(AnsjAction.INSTANCE, ansjRequest, new RestToXContentListener<>(channel)); 48 | } 49 | 50 | @Override 51 | protected Set responseParams() { 52 | Set responseParams = new HashSet<>(super.responseParams()); 53 | responseParams.addAll(Arrays.asList("text", "index", "field", "analyzer", "tokenizer", "filters", "token_filters", "char_filters", "type", "key", 54 | "isNameRecognition", "isNumRecognition", "isQuantifierRecognition", "isRealName", "isSkipUserDefine", 55 | CrfLibrary.DEFAULT, DicLibrary.DEFAULT, AmbiguityLibrary.DEFAULT, StopLibrary.DEFAULT, SynonymsLibrary.DEFAULT)); 56 | return Collections.unmodifiableSet(responseParams); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/plugin-metadata/plugin-security.policy: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Elasticsearch under one or more contributor 3 | * license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright 5 | * ownership. Elasticsearch licenses this file to you under 6 | * the Apache License, Version 2.0 (the "License"); you may 7 | * not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | grant { 21 | permission java.lang.RuntimePermission "getClassLoader"; 22 | permission java.lang.RuntimePermission "setContextClassLoader"; 23 | permission java.io.FilePermission "<>", "read"; 24 | permission java.net.SocketPermission "*", "accept,connect,resolve"; 25 | }; 26 | -------------------------------------------------------------------------------- /src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description} 2 | 3 | version=${project.version} 4 | 5 | name=${elasticsearch.plugin.name} 6 | 7 | classname=${elasticsearch.plugin.classname} 8 | 9 | java.version=${elasticsearch.plugin.java.version} 10 | 11 | elasticsearch.version=${elasticsearch.version} 12 | -------------------------------------------------------------------------------- /src/test/java/org/ansj/test/ESAnalysisAnsjTests.java: -------------------------------------------------------------------------------- 1 | package org.ansj.test; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.concurrent.ExecutionException; 6 | 7 | public class ESAnalysisAnsjTests { 8 | 9 | @Test 10 | public void testDefaultsIcuAnalysis() throws InterruptedException, ExecutionException { 11 | // System.out.println(analysis); 12 | 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/test/resource/datas.json: -------------------------------------------------------------------------------- 1 | {"index":{"_type": "test_index", "_id":"1"}} 2 | {"name" : "中国人民万岁"} 3 | -------------------------------------------------------------------------------- /src/test/resource/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings" : { 3 | "number_of_shards" : 1, 4 | "number_of_replicas" : 0 5 | 6 | }, 7 | "mappings" : { 8 | "type1" : { 9 | "_all" : { "enabled" : false }, 10 | "properties" : { 11 | "name" : { "type" : "string", "analyzer" : "index_ansj", "search_analyzer" : "query_ansj" } 12 | } 13 | } 14 | } 15 | } --------------------------------------------------------------------------------