├── .DS_Store
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── build.gradle
├── gradle.properties
├── gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── settings.gradle
└── src
    ├── .DS_Store
    ├── main
        ├── java
        │   └── org
        │   │   ├── apache
        │   │       └── lucene
        │   │       │   └── analysis
        │   │       │       └── ko
        │   │       │           ├── KoreanTokenPrepareable.java
        │   │       │           ├── OpenKoreanTextAnalyzer.java
        │   │       │           ├── OpenKoreanTextNormalizer.java
        │   │       │           ├── OpenKoreanTextPhraseExtractor.java
        │   │       │           ├── OpenKoreanTextRedundantFilter.java
        │   │       │           ├── OpenKoreanTextStemmer.java
        │   │       │           ├── OpenKoreanTextTokenFilter.java
        │   │       │           ├── OpenKoreanTextTokenizer.java
        │   │       │           └── UserDictionaryLoader.java
        │   │   └── elasticsearch
        │   │       ├── index
        │   │           └── analysis
        │   │           │   ├── OpenKoreanTextAnalyzerProvider.java
        │   │           │   ├── OpenKoreanTextNormalizerFactory.java
        │   │           │   ├── OpenKoreanTextPhraseExtractorFactory.java
        │   │           │   ├── OpenKoreanTextRedundantFilterFactory.java
        │   │           │   ├── OpenKoreanTextStemmerFactory.java
        │   │           │   └── OpenKoreanTextTokenizerFactory.java
        │   │       └── plugin
        │   │           └── analysis
        │   │               └── openkoreantext
        │   │                   └── AnalysisOpenKoreanTextPlugin.java
        └── resources
        │   ├── dic
        │       └── sample-dictionary
        │   └── plugin-descriptor.properties
    └── test
        ├── .DS_Store
        ├── java
            └── org
            │   ├── apache
            │       └── lucene
            │       │   └── analysis
            │       │       └── ko
            │       │           ├── OpenKoreanTextAnalyzerTest.java
            │       │           ├── OpenKoreanTextNormalizerTest.java
            │       │           ├── OpenKoreanTextPhraseExtractorTest.java
            │       │           ├── OpenKoreanTextRedundantFilterTest.java
            │       │           ├── OpenKoreanTextStemmerTest.java
            │       │           ├── OpenKoreanTextTokenizerTest.java
            │       │           └── TokenStreamAssertions.java
            │   └── elasticsesarch
            │       └── plugin
            │           └── analysis
            │               └── openkoreantext
            │                   └── AnalysisOpenKoreanTextPluginTest.java
        └── resources
            ├── .DS_Store
            ├── dic
                └── sample-dictionary
            ├── dictionary
            ├── httpdictionary
            └── plugin-descriptor.properties


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/a37dffab4cc64c5b478eded7bbb028fcfbdc4dd4/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Intellij project files
 2 | *.iml
 3 | *.ipr
 4 | *.iws
 5 | .idea/
 6 | out/
 7 | 
 8 | # Eclipse project files
 9 | .classpath
10 | .project
11 | .settings/
12 | 
13 | *.class
14 | 
15 | # Mobile Tools for Java (J2ME)
16 | .mtj.tmp/
17 | 
18 | # Package Files #
19 | *.jar
20 | *.war
21 | *.ear
22 | 
23 | # generated files
24 | bin/
25 | gen/
26 | classes/
27 | generated/
28 | 
29 | #Gradle
30 | .gradletasknamecache
31 | .gradle/
32 | build/
33 | bin/
34 | !gradle-wrapper.
35 | 
36 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
37 | hs_err_pid*
38 | 
39 | 
40 | secret.properties


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: java
 2 | 
 3 | jdk:
 4 |   - oraclejdk8
 5 | 
 6 | env:
 7 |   - JAVA_OPTS="-Xmx512m"
 8 |   - TERM=dumb
 9 | 
10 | sudo: false
11 | 
12 | before_script:
13 |   - sudo service mysql stop || true
14 |   - sudo service memcached stop || true
15 |   - sudo service elasticsearch stop || true
16 |   - sudo service mongodb stop || true
17 |   - sudo service neo4j stop || true
18 |   - sudo service cassandra stop || true
19 |   - sudo service riak stop || true
20 | 
21 | after_success:
22 | - ./gradlew jacocoRootReport coveralls
23 | 
24 | before_cache:
25 |   - rm -f  $HOME/.gradle/caches/modules-2/modules-2.lock
26 |   - rm -fr $HOME/.gradle/caches/*/plugin-resolution/
27 | 
28 | cache:
29 |   directories:
30 |     - $HOME/.gradle/caches/
31 |     - $HOME/.gradle/wrapper/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # elasticsearch-analysis-openkoreantext
  2 | 
  3 | [![Build Status](https://travis-ci.org/open-korean-text/elasticsearch-analysis-openkoreantext.svg?branch=master)](https://travis-ci.org/open-korean-text/elasticsearch-analysis-openkoreantext)
  4 | 
  5 | 한국어(한글)를 처리하는 Elasticsearch analyzer입니다. [open-korean-text](https://github.com/open-korean-text/open-korean-text) 한국어 처리엔진으로 작성되었습니다.
  6 | 
  7 | Korean analysis plugin that integrates [open-korean-text](https://github.com/open-korean-text/open-korean-text) module into Elasticsearch.
  8 | 
  9 | Elasticsearch 4.x 이하의 버전은 지원하지 않습니다.
 10 | 
 11 | ## Install
 12 | 
 13 | ```shell
 14 | $ cd ${ES_HOME}
 15 | $ bin/elasticsearch-plugin install {download URL}
 16 | ```
 17 | 
 18 | 설치 후 `bin/elasticsearch` 실행 시, `loaded plugin [elasticsearch-analysis-openkoreantext]` 라는 로그가 출력되는지 확인합니다.
 19 | 
 20 | **download URL 은 아래 [Compatible Versions](#compatible-versions)를 참고하여 Elasticsearch 버젼에 맞는 Plugin 버젼을 다운로드 받아야합니다.**
 21 | 
 22 | ## Example
 23 | #### Input
 24 | ```shell
 25 | curl -X POST 'http://localhost:9200/_analyze' -d '{
 26 |   "analyzer": "openkoreantext-analyzer",
 27 |   "text": "한국어를 처리하는 예시입니닼ㅋㅋ"
 28 | }'
 29 | ```
 30 | 
 31 | #### Output
 32 | ```json
 33 | {
 34 |   "tokens": [
 35 |     {
 36 |       "token": "한국어",
 37 |       "start_offset": 0,
 38 |       "end_offset": 3,
 39 |       "type": "Noun",
 40 |       "position": 0
 41 |     },
 42 |     {
 43 |       "token": "처리",
 44 |       "start_offset": 5,
 45 |       "end_offset": 7,
 46 |       "type": "Noun",
 47 |       "position": 1
 48 |     },
 49 |     {
 50 |       "token": "하다",
 51 |       "start_offset": 7,
 52 |       "end_offset": 9,
 53 |       "type": "Verb",
 54 |       "position": 2
 55 |     },
 56 |     {
 57 |       "token": "예시",
 58 |       "start_offset": 10,
 59 |       "end_offset": 12,
 60 |       "type": "Noun",
 61 |       "position": 3
 62 |     },
 63 |     {
 64 |       "token": "이다",
 65 |       "start_offset": 12,
 66 |       "end_offset": 15,
 67 |       "type": "Adjective",
 68 |       "position": 4
 69 |     },
 70 |     {
 71 |       "token": "ㅋㅋ",
 72 |       "start_offset": 15,
 73 |       "end_offset": 17,
 74 |       "type": "KoreanParticle",
 75 |       "position": 5
 76 |     }
 77 |   ]
 78 | }
 79 | ```
 80 | 
 81 | Elasticsearch의 default analyzer를 사용했을 경우
 82 | ```json
 83 | {
 84 |   "tokens": [
 85 |     {
 86 |       "token": "한국어를",
 87 |       "start_offset": 0,
 88 |       "end_offset": 4,
 89 |       "type": "<HANGUL>",
 90 |       "position": 0
 91 |     },
 92 |     {
 93 |       "token": "처리하는",
 94 |       "start_offset": 5,
 95 |       "end_offset": 9,
 96 |       "type": "<HANGUL>",
 97 |       "position": 1
 98 |     },
 99 |     {
100 |       "token": "예시입니닼ㅋㅋ",
101 |       "start_offset": 10,
102 |       "end_offset": 17,
103 |       "type": "<HANGUL>",
104 |       "position": 2
105 |     }
106 |   ]
107 | }
108 | ```
109 | 
110 | **실제 사용 방법은 [Elasicsearch analysis](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis.html)를 참고하세요.**
111 | 
112 | ## User Dictionary
113 | [기본사전](https://github.com/open-korean-text/open-korean-text/tree/master/src/main/resources/org/openkoreantext/processor/util) 이외에 사용자가 원하는 단어를 추가하여 사용할 수 있습니다. 예를들어 `말썽쟁이`를 분석하면 `말썽(Noun)`과 `쟁이(suffix)`로 추출되지만, 사전에 `말썽쟁이`를 추가하면 `말썽쟁이(Noun)`로 추출할 수 있습니다.
114 | 
115 | Analyzer Plugin을 설치하면 `{ES_HOME}/plugins/elasticserach-analysis-openkoreantext` 위치에 `dic/` 디렉토리를 찾을 수 있습니다. 해당 디렉토리 안에 사전 텍스트 파일을 추가하면 됩니다.
116 | 
117 | 사전 텍스트 파일은 각 단어들을 줄바꿈하여 넣으면 됩니다. (단, 띄워쓰기는 단어로 인식하지 않습니다.)
118 | 
119 | ```plain
120 | # {ES_HOME}/plugins/elasticserach-analysis-openkoreantext/dic/sampledictionary
121 | 말썽쟁이
122 | 뚜쟁이
123 | 욕쟁이할머니
124 | ...
125 | ```
126 | 
127 | 
128 | ## Components
129 | 이 Analyzer는 몇 가지 [components](https://www.elastic.co/guide/en/elasticsearch/reference/current/analyzer-anatomy.html)로 구성되어 있습니다.
130 | 
131 | **Charater Filter**
132 | * openkoreantext-normalizer
133 |   * 구어체를 표준화 합니다.
134 |   > 훌쩍훌쩍훌쩍훌쩍 -> 훌쩍훌쩍, 하겟다 -> 하겠다, 안됔ㅋㅋㅋ -> 안돼ㅋㅋ
135 | 
136 | **Tokenizer**
137 | * openkoreantext-tokenizer
138 |   * 문장을 토큰화 합니다.
139 |   > 한국어를 처리하는 예시입니다 ㅋㅋ -> [한국어, 를, 처리, 하는, 예시, 입니다, ㅋㅋ]
140 | 
141 | **Token Filter**
142 | * openkoreantext-stemmer
143 |   * 형용사와 동사를 스테밍합니다.
144 |   > 새로운 스테밍을 추가했었다. -> [새롭다, 스테밍, 을, 추가하다, .]
145 | 
146 | * openkoreantext-redundant-filter
147 |   * 접속사, 공백(띄워쓰기), 조사, 마침표 등을 제거합니다.
148 |   > 그리고 이것은 예시, 또는 예로써, 한국어를 처리하기 -> [예시, 예, 한국어, 처리, 하다]
149 | 
150 | * openkoreantext-phrase-extractor
151 |   * 어구를 추출합니다.
152 |   > 한국어를 처리하는 예시입니다 ㅋㅋ -> [한국어, 처리, 예시, 처리하는 예시]
153 | 
154 | **Analyzer**
155 | 
156 | [`openkoreantext-normalizer`] -> [`openkoreantext-tokenizer`] -> [`openkoreantext-stemmer`, `openkoreantext-redundant-filter`,  `classic`, `length`, `lowercase`]
157 | 
158 | * 이 analyzer에는 `openkoreantext-phrase-extractor`가 기본 token filter로 적용되어있지 않습니다.
159 | * custom analyzer 구성을 원하시면 [custom analyzer](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-custom-analyzer.html)를 참고하세요.
160 | 
161 | ## Compatible Versions
162 | 
163 | | Elasticsearch | open-korean-text | Download URL |
164 | |:----:|:----:|:----|
165 | | 6.1.1 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/6.1.1/elasticsearch-analysis-openkoreantext-6.1.1.2-plugin.zip |
166 | | 6.1.0 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/6.1.1/elasticsearch-analysis-openkoreantext-6.1.0.2-plugin.zip |
167 | | 6.0.0 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/6.0.0.2/elasticsearch-analysis-openkoreantext-6.0.0.2-plugin.zip |
168 | | 5.6.5 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/6.1.1/elasticsearch-analysis-openkoreantext-5.6.5.2-plugin.zip |
169 | | 5.6.4 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.6.4.2/elasticsearch-analysis-openkoreantext-5.6.4.2-plugin.zip |
170 | | 5.6.3 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.6.4.2/elasticsearch-analysis-openkoreantext-5.6.3.2-plugin.zip |
171 | | 5.6.2 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/v5.6.x/elasticsearch-analysis-openkoreantext-5.6.2.2-plugin.zip |
172 | | 5.6.1 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/v5.6.x/elasticsearch-analysis-openkoreantext-5.6.1.2-plugin.zip |
173 | | 5.6.0 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/v5.6.x/elasticsearch-analysis-openkoreantext-5.6.0.2-plugin.zip |
174 | | 5.5.2 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.5.2.2/elasticsearch-analysis-openkoreantext-5.5.2.2-plugin.zip |
175 | | 5.5.1 | 2.1.0 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.5.1.2.1/elasticsearch-analysis-openkoreantext-5.5.1.2-plugin.zip |
176 | | 5.5.0 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.5.0.2/elasticsearch-analysis-openkoreantext-5.5.0.2-plugin.zip |
177 | | 5.4.3 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.2.2/elasticsearch-analysis-openkoreantext-5.4.3.2-plugin.zip |
178 | | 5.4.2 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.2.2/elasticsearch-analysis-openkoreantext-5.4.2.2-plugin.zip |
179 | | 5.4.1 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.1.2/elasticsearch-analysis-openkoreantext-5.4.1.2-plugin.zip |
180 | | 5.4.0 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.4.0.2-plugin.zip |
181 | | 5.3.2 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.3.2.2-plugin.zip |
182 | | 5.3.1 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.3.1.2-plugin.zip |
183 | | 5.3.0 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.3.0.2-plugin.zip |
184 | | 5.2.2 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.2.2.2-plugin.zip |
185 | | 5.2.1 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.2.1.2-plugin.zip |
186 | | 5.1.2 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.1.2.2-plugin.zip |
187 | | 5.1.1 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.1.1.2-plugin.zip |
188 | | 5.1.0 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.1.0.2-plugin.zip |
189 | | 5.0.2 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.0.2.2-plugin.zip |
190 | | 5.0.1 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.0.1.2-plugin.zip |
191 | | 5.0.0 | 2.0.1 | https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext/releases/download/5.4.0.2/elasticsearch-analysis-openkoreantext-5.0.0.2-plugin.zip |
192 | 
193 | 
194 | * 5.0.0 미만의 버젼은 지원하지 않습니다. open-korean-text로 작성된 다른 플러그인은 참조하시기 바랍니다.  
195 |   * [tkt-elasticsearch](https://github.com/open-korean-text/open-korean-text-elastic-search)
196 |   * [elasticsearch-twitter-korean](https://github.com/jobplanet/elasticsearch-twitter-korean)
197 | 
198 | ## License
199 | Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
200 | 


--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
  1 | group 'org.openkoreantext'
  2 | version '6.5.2.0'
  3 | 
  4 | apply plugin: 'java'
  5 | apply plugin: 'maven'
  6 | apply plugin: 'signing'
  7 | apply plugin: 'jacoco'
  8 | 
  9 | sourceCompatibility = 1.8
 10 | 
 11 | repositories {
 12 |     mavenCentral()
 13 |     jcenter()
 14 | }
 15 | 
 16 | configurations {
 17 |     distJars {
 18 |         extendsFrom runtime
 19 |     }
 20 | }
 21 | 
 22 | ext {
 23 |     elasticsearchVersion = '6.5.2'
 24 |     openKoreanTextVersion = '2.1.0'
 25 | }
 26 | 
 27 | dependencies {
 28 |     compile group: 'org.openkoreantext', name: 'open-korean-text', version: openKoreanTextVersion
 29 | 
 30 |     compileOnly group: 'org.elasticsearch', name: 'elasticsearch', version: elasticsearchVersion
 31 | 
 32 |     testCompile group: 'org.elasticsearch.test', name: 'framework', version: elasticsearchVersion
 33 |     testCompile group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.9.1'
 34 |     testCompile group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.9.1'
 35 | }
 36 | 
 37 | task makePluginDescriptor(type: Copy) {
 38 |     from 'src/main/resources'
 39 |     into 'build/tmp/plugin'
 40 |     expand([
 41 |         'descriptor': [
 42 |             'name': 'elasticsearch-analysis-openkoreantext',
 43 |             'classname': 'org.elasticsearch.plugin.analysis.openkoreantext.AnalysisOpenKoreanTextPlugin',
 44 |             'description': 'Korean analysis plugin integrates open-korean-text module into elasticsearch.',
 45 |             'version': '1.0.0',
 46 |             'javaVersion': sourceCompatibility,
 47 |             'elasticsearchVersion' : elasticsearchVersion
 48 |         ]
 49 |     ])
 50 | }
 51 | 
 52 | task buildPluginZip(type: Zip, dependsOn: [':jar', ':makePluginDescriptor']) {
 53 |     from configurations.distJars
 54 |     from jar.archivePath
 55 |     from 'build/tmp/plugin'
 56 |     into '.'
 57 |     classifier = 'plugin'
 58 | }
 59 | 
 60 | build.finalizedBy(buildPluginZip)
 61 | 
 62 | task javadocJar(type: Jar) {
 63 |     classifier = 'javadoc'
 64 |     from javadoc
 65 | }
 66 | 
 67 | tasks.withType(Javadoc) {
 68 |     options.addStringOption('Xdoclint:none', '-quiet')
 69 |     options.addStringOption('encoding', 'UTF-8')
 70 |     options.addStringOption('charSet', 'UTF-8')
 71 | }
 72 | 
 73 | task sourcesJar(type: Jar) {
 74 |     classifier = 'sources'
 75 |     from sourceSets.main.allSource
 76 | }
 77 | 
 78 | artifacts {
 79 |     archives jar
 80 |     archives javadocJar
 81 |     archives sourcesJar
 82 | }
 83 | 
 84 | test {
 85 |     jvmArgs '-Dtests.security.manager=false'
 86 | }
 87 | 
 88 | jacoco {
 89 |     toolVersion = '0.7.1.201405082137'
 90 | }
 91 | 
 92 | jacocoTestReport {
 93 |     reports {
 94 |         html.enabled = true
 95 |         xml.enabled = true
 96 |         csv.enabled = false
 97 |     }
 98 | }
 99 | 
100 | task jacocoRootReport(type: org.gradle.testing.jacoco.tasks.JacocoReport) {
101 |     sourceDirectories = files(sourceSets.main.allSource.srcDirs)
102 |     classDirectories =  files(sourceSets.main.output)
103 |     executionData = files(jacocoTestReport.executionData)
104 |     reports {
105 |         html.enabled = true
106 |         xml.enabled = true
107 |         csv.enabled = false
108 |     }
109 | }
110 | 
111 | task wrapper(type: Wrapper) {
112 |     gradleVersion = '3.4'
113 | }


--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
1 | # http://yennicktrevels.com/blog/2013/10/11/automated-gradle-project-deployment-to-sonatype-oss-repository/
2 | signing.keyId=xxx
3 | signing.password=xxx
4 | signing.secretKeyRingFile=xxx
5 | 
6 | ossrhUsername=xxx
7 | ossrhPassword=xxx
8 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/a37dffab4cc64c5b478eded7bbb028fcfbdc4dd4/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Thu May 04 20:20:05 KST 2017
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-3.4-all.zip
7 | 


--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env sh
  2 | 
  3 | ##############################################################################
  4 | ##
  5 | ##  Gradle start up script for UN*X
  6 | ##
  7 | ##############################################################################
  8 | 
  9 | # Attempt to set APP_HOME
 10 | # Resolve links: $0 may be a link
 11 | PRG="$0"
 12 | # Need this for relative symlinks.
 13 | while [ -h "$PRG" ] ; do
 14 |     ls=`ls -ld "$PRG"`
 15 |     link=`expr "$ls" : '.*-> \(.*\)$'`
 16 |     if expr "$link" : '/.*' > /dev/null; then
 17 |         PRG="$link"
 18 |     else
 19 |         PRG=`dirname "$PRG"`"/$link"
 20 |     fi
 21 | done
 22 | SAVED="`pwd`"
 23 | cd "`dirname \"$PRG\"`/" >/dev/null
 24 | APP_HOME="`pwd -P`"
 25 | cd "$SAVED" >/dev/null
 26 | 
 27 | APP_NAME="Gradle"
 28 | APP_BASE_NAME=`basename "$0"`
 29 | 
 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
 31 | DEFAULT_JVM_OPTS=""
 32 | 
 33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
 34 | MAX_FD="maximum"
 35 | 
 36 | warn ( ) {
 37 |     echo "$*"
 38 | }
 39 | 
 40 | die ( ) {
 41 |     echo
 42 |     echo "$*"
 43 |     echo
 44 |     exit 1
 45 | }
 46 | 
 47 | # OS specific support (must be 'true' or 'false').
 48 | cygwin=false
 49 | msys=false
 50 | darwin=false
 51 | nonstop=false
 52 | case "`uname`" in
 53 |   CYGWIN* )
 54 |     cygwin=true
 55 |     ;;
 56 |   Darwin* )
 57 |     darwin=true
 58 |     ;;
 59 |   MINGW* )
 60 |     msys=true
 61 |     ;;
 62 |   NONSTOP* )
 63 |     nonstop=true
 64 |     ;;
 65 | esac
 66 | 
 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
 68 | 
 69 | # Determine the Java command to use to start the JVM.
 70 | if [ -n "$JAVA_HOME" ] ; then
 71 |     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
 72 |         # IBM's JDK on AIX uses strange locations for the executables
 73 |         JAVACMD="$JAVA_HOME/jre/sh/java"
 74 |     else
 75 |         JAVACMD="$JAVA_HOME/bin/java"
 76 |     fi
 77 |     if [ ! -x "$JAVACMD" ] ; then
 78 |         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
 79 | 
 80 | Please set the JAVA_HOME variable in your environment to match the
 81 | location of your Java installation."
 82 |     fi
 83 | else
 84 |     JAVACMD="java"
 85 |     which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
 86 | 
 87 | Please set the JAVA_HOME variable in your environment to match the
 88 | location of your Java installation."
 89 | fi
 90 | 
 91 | # Increase the maximum file descriptors if we can.
 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
 93 |     MAX_FD_LIMIT=`ulimit -H -n`
 94 |     if [ $? -eq 0 ] ; then
 95 |         if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
 96 |             MAX_FD="$MAX_FD_LIMIT"
 97 |         fi
 98 |         ulimit -n $MAX_FD
 99 |         if [ $? -ne 0 ] ; then
100 |             warn "Could not set maximum file descriptor limit: $MAX_FD"
101 |         fi
102 |     else
103 |         warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 |     fi
105 | fi
106 | 
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 |     GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 | 
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 |     APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 |     CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 |     JAVACMD=`cygpath --unix "$JAVACMD"`
117 | 
118 |     # We build the pattern for arguments to be converted via cygpath
119 |     ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 |     SEP=""
121 |     for dir in $ROOTDIRSRAW ; do
122 |         ROOTDIRS="$ROOTDIRS$SEP$dir"
123 |         SEP="|"
124 |     done
125 |     OURCYGPATTERN="(^($ROOTDIRS))"
126 |     # Add a user-defined pattern to the cygpath arguments
127 |     if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 |         OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 |     fi
130 |     # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 |     i=0
132 |     for arg in "$@" ; do
133 |         CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 |         CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
135 | 
136 |         if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
137 |             eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 |         else
139 |             eval `echo args$i`="\"$arg\""
140 |         fi
141 |         i=$((i+1))
142 |     done
143 |     case $i in
144 |         (0) set -- ;;
145 |         (1) set -- "$args0" ;;
146 |         (2) set -- "$args0" "$args1" ;;
147 |         (3) set -- "$args0" "$args1" "$args2" ;;
148 |         (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 |         (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 |         (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 |         (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 |         (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 |         (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 |     esac
155 | fi
156 | 
157 | # Escape application args
158 | save ( ) {
159 |     for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 |     echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 | 
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 | 
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 |   cd "$(dirname "$0")"
170 | fi
171 | 
172 | exec "$JAVACMD" "$@"
173 | 


--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @if "%DEBUG%" == "" @echo off
 2 | @rem ##########################################################################
 3 | @rem
 4 | @rem  Gradle startup script for Windows
 5 | @rem
 6 | @rem ##########################################################################
 7 | 
 8 | @rem Set local scope for the variables with windows NT shell
 9 | if "%OS%"=="Windows_NT" setlocal
10 | 
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 | 
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 | 
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 | 
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 | 
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 | 
32 | goto fail
33 | 
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 | 
38 | if exist "%JAVA_EXE%" goto init
39 | 
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 | 
46 | goto fail
47 | 
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 | 
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | 
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 | 
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 | 
61 | set CMD_LINE_ARGS=%*
62 | 
63 | :execute
64 | @rem Setup the command line
65 | 
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 | 
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 | 
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 | 
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 | 
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 | 
84 | :omega
85 | 


--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'elasticsearch-analysis-openkoreantext'
2 | 
3 | 


--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/a37dffab4cc64c5b478eded7bbb028fcfbdc4dd4/src/.DS_Store


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/KoreanTokenPrepareable.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import scala.collection.Seq;
 4 | 
 5 | import java.io.IOException;
 6 | 
 7 | import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;
 8 | 
 9 | /**
10 |  * To prepare korean token sequence.
11 |  */
12 | public interface KoreanTokenPrepareable {
13 |     /**
14 |      * To prepare all tokens before token increment.
15 |      */
16 |     Seq<KoreanToken> prepareKoreanTokens() throws IOException;
17 | 
18 |     KoreanToken getCurrentToken();
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextAnalyzer.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.*;
 4 | import org.apache.lucene.analysis.miscellaneous.LengthFilter;
 5 | import org.apache.lucene.analysis.standard.ClassicFilter;
 6 | 
 7 | import java.io.Reader;
 8 | import java.util.Arrays;
 9 | import java.util.List;
10 | 
11 | /**
12 |  * A default korean analyzer.
13 |  */
14 | public class OpenKoreanTextAnalyzer extends StopwordAnalyzerBase {
15 | 
16 |     private final static CharArraySet STOP_WORD_SET;
17 |     // Max token length is from https://ko.wikipedia.org/wiki/%EA%B8%B4_%ED%95%9C%EA%B5%AD%EC%96%B4_%EB%82%B1%EB%A7%90
18 |     private final static int MAX_TOKEN_LENGTH = 13;
19 | 
20 |     static {
21 |         List<String> stopWords = Arrays.asList(
22 |                 "a", "an", "and", "are", "as", "at", "be", "but", "by",
23 |                 "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
24 |                 "their", "then", "there", "these", "they", "this", "to", "was", "will", "with");
25 | 
26 |         STOP_WORD_SET = CharArraySet.unmodifiableSet(new CharArraySet(stopWords.size(), false));
27 |     }
28 | 
29 |     public OpenKoreanTextAnalyzer() {
30 |         super(STOP_WORD_SET);
31 |     }
32 | 
33 |     @Override
34 |     protected TokenStreamComponents createComponents(String fieldName) {
35 |         Tokenizer tokenizer = new OpenKoreanTextTokenizer();
36 | 
37 |         TokenStream tokenStream = new OpenKoreanTextStemmer(tokenizer);
38 |         tokenStream = new OpenKoreanTextRedundantFilter(tokenStream);
39 |         tokenStream = new ClassicFilter(tokenStream);
40 |         tokenStream = new LengthFilter(tokenStream, 0, MAX_TOKEN_LENGTH);
41 |         tokenStream = new LowerCaseFilter(tokenStream);
42 | 
43 |         return new TokenStreamComponents(tokenizer, tokenStream);
44 |     }
45 | 
46 |     @Override
47 |     protected Reader initReader(String fieldName, Reader reader) {
48 |         return new OpenKoreanTextNormalizer(super.initReader(fieldName, reader));
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextNormalizer.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.charfilter.BaseCharFilter;
 4 | import org.openkoreantext.processor.OpenKoreanTextProcessor;
 5 | 
 6 | import java.io.IOException;
 7 | import java.io.Reader;
 8 | 
 9 | /**
10 |  * A character filter for normalizing input text.
11 |  * For normalizing text, it delegates input to {@link OpenKoreanTextProcessor}.
12 |  *
13 |  * ex) 그랰ㅋㅋ -> 그래ㅋㅋ, 재밌닿ㅎㅎㅎ -> 재밌다ㅎㅎ
14 |  */
15 | public class OpenKoreanTextNormalizer extends BaseCharFilter {
16 |     private static final int READER_BUFFER_SIZE = 2048;
17 | 
18 |     private boolean preparedToRead;
19 |     private char[] inputText;
20 |     private int cursor;
21 | 
22 |     public OpenKoreanTextNormalizer(Reader in) {
23 |         super(in);
24 |         initAttributes();
25 |     }
26 | 
27 |     @Override
28 |     public int read(char[] cbuf, int off, int len) throws IOException {
29 |         if (off < 0) throw new IllegalArgumentException("off < 0");
30 |         if (off >= cbuf.length) throw new IllegalArgumentException("off >= cbuf.length");
31 |         if (len <= 0) throw new IllegalArgumentException("len <= 0");
32 | 
33 |         if (!this.preparedToRead) {
34 |             prepareToRead();
35 |         }
36 | 
37 |         int copyLen = this.inputText.length - cursor;
38 |         if(copyLen < 1){
39 |             initAttributes();
40 |             return -1;
41 |         }
42 | 
43 |         copyLen = copyLen > len ? len : copyLen;
44 |         System.arraycopy(inputText, cursor, cbuf, off, copyLen);
45 |         cursor += copyLen;
46 |         return copyLen;
47 |     }
48 | 
49 |     private void initAttributes(){
50 |         this.preparedToRead = false;
51 |         this.inputText = null;
52 |         this.cursor = -1;
53 |     }
54 | 
55 |     private void prepareToRead() throws IOException {
56 |         this.preparedToRead = true;
57 |         this.inputText = normalizeInput().toCharArray();
58 |         this.cursor = 0;
59 |     }
60 | 
61 |     private String normalizeInput() throws IOException {
62 |         StringBuilder text = new StringBuilder();
63 |         char[] tmp = new char[READER_BUFFER_SIZE];
64 |         int len = -1;
65 |         while ((len = input.read(tmp)) != -1) {
66 |             text.append(new String(tmp, 0, len));
67 |         }
68 |         return OpenKoreanTextProcessor.normalize(text).toString();
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextPhraseExtractor.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.openkoreantext.processor.OpenKoreanTextProcessor;
 5 | import scala.collection.Iterator;
 6 | import scala.collection.JavaConverters;
 7 | import scala.collection.Seq;
 8 | 
 9 | import java.util.Arrays;
10 | 
11 | import static org.openkoreantext.processor.phrase_extractor.KoreanPhraseExtractor.KoreanPhrase;
12 | import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;
13 | 
14 | /**
15 |  * Phrase Extractor. For extracting phrase, it delegates token to {@link OpenKoreanTextProcessor}
16 |  */
17 | public class OpenKoreanTextPhraseExtractor extends OpenKoreanTextTokenFilter {
18 | 
19 |     public OpenKoreanTextPhraseExtractor(TokenStream input) {
20 |         super(input);
21 |     }
22 | 
23 |     @Override
24 |     protected Seq<KoreanToken> perform(Seq<KoreanToken> tokens) {
25 |         Seq<KoreanPhrase> phrases = OpenKoreanTextProcessor.extractPhrases(tokens, false, true);
26 |         return convertPhrasesToTokens(phrases);
27 |     }
28 | 
29 |     private Seq<KoreanToken> convertPhrasesToTokens(Seq<KoreanPhrase> phrases) {
30 |         KoreanToken[] tokens = new KoreanToken[phrases.length()];
31 | 
32 |         Iterator<KoreanPhrase> iterator = phrases.iterator();
33 |         int i = 0;
34 |         while (iterator.hasNext()) {
35 |             KoreanPhrase phrase = iterator.next();
36 |             tokens[i++] = new KoreanToken(phrase.text(), phrase.pos(), phrase.offset(), phrase.length(), scala.Option.apply(null), false);
37 |         }
38 | 
39 |         Arrays.sort(tokens, (o1, o2) -> {
40 |             if(o1.offset()== o2.offset())
41 |                 return 0;
42 |             return o1.offset()< o2.offset()? -1 : 1;
43 |         });
44 | 
45 |         return JavaConverters.asScalaBuffer(Arrays.asList(tokens)).toSeq();
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextRedundantFilter.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import scala.collection.JavaConverters;
 5 | import scala.collection.Seq;
 6 | 
 7 | import java.util.*;
 8 | 
 9 | import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;
10 | 
11 | /**
12 |  * Remove redundant type and term tokens.
13 |  */
14 | public class OpenKoreanTextRedundantFilter extends OpenKoreanTextTokenFilter {
15 | 
16 |     private final static Set<String> redundantTypes;
17 | 
18 |     private final static Set<String> redundantTerms;
19 | 
20 |     static {
21 |         String[] types = new String[]{"Space", "Conjunction", "Josa", "Eomi", "PreEomi", "Punctuation"};
22 |         redundantTypes = new HashSet<>();
23 |         for(String redundant : types) {
24 |             redundantTypes.add(redundant);
25 |         }
26 | 
27 |         redundantTerms = new HashSet<>();
28 |         String[] terms = new String[]{"이", "그", "저", "요", "것", "수", "등", "들", "및", "에", "에서", "또", "또는", "또한", "꼭", "잘", "로서", "로써"};
29 |         for(String redundant : terms) {
30 |             redundantTerms.add(redundant);
31 |         }
32 |     }
33 | 
34 | 
35 |     public OpenKoreanTextRedundantFilter(TokenStream input) {
36 |         super(input);
37 |     }
38 | 
39 |     @Override
40 |     protected Seq<KoreanToken> perform(Seq<KoreanToken> tokens) {
41 |         List<KoreanToken> performed = new ArrayList<>();
42 |         for(KoreanToken token : JavaConverters.seqAsJavaList(tokens)) {
43 |             if(redundantTypes.contains(token.pos().toString())){
44 |                continue;
45 |             }
46 |             if(redundantTerms.contains(token.text())){
47 |                 continue;
48 |             }
49 |             performed.add(token);
50 |         }
51 | 
52 |         return JavaConverters.asScalaBuffer(performed).toSeq();
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextStemmer.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import scala.collection.Iterator;
 5 | import scala.collection.JavaConverters;
 6 | import scala.collection.Seq;
 7 | 
 8 | import java.util.Arrays;
 9 | 
10 | import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;
11 | 
12 | /**
13 |  * Stems Adjectives and Verbs tokens.
14 |  */
15 | public final class OpenKoreanTextStemmer extends OpenKoreanTextTokenFilter {
16 | 
17 |     public OpenKoreanTextStemmer(TokenStream input) {
18 |         super(input);
19 |     }
20 | 
21 |     @Override
22 |     protected Seq<KoreanToken> perform(Seq<KoreanToken> tokens) {
23 |         KoreanToken[] performed = new KoreanToken[tokens.length()];
24 | 
25 |         int i = 0;
26 |         Iterator<KoreanToken> tokenIterator =  tokens.iterator();
27 | 
28 |         while (tokenIterator.hasNext()) {
29 |             KoreanToken token = tokenIterator.next();
30 |             performed[i++] = token.stem().nonEmpty() ? stem(token) : token;
31 |         }
32 | 
33 |         return JavaConverters.asScalaBuffer(Arrays.asList(performed)).toSeq();
34 |     }
35 | 
36 |     private KoreanToken stem(KoreanToken token) {
37 |         return new KoreanToken(token.stem().get(), token.pos(), token.offset(), token.length(), scala.Option.apply(null), false);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenFilter.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.TokenFilter;
 4 | import org.apache.lucene.analysis.TokenStream;
 5 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 6 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 7 | import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 8 | import scala.collection.JavaConverters;
 9 | import scala.collection.Seq;
10 | 
11 | import java.io.IOException;
12 | import java.util.List;
13 | 
14 | import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;
15 | 
16 | /**
17 |  * Abstract token filter for processing korean tokens.
18 |  */
19 | public abstract class OpenKoreanTextTokenFilter extends TokenFilter implements KoreanTokenPrepareable {
20 | 
21 |     private final CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
22 |     private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
23 |     private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
24 |     private KoreanToken currentToken;
25 | 
26 |     protected int tokenIndex = 0;
27 |     protected List<KoreanToken> preparedTokens = null;
28 | 
29 |     public OpenKoreanTextTokenFilter(TokenStream input) {
30 |         super(input);
31 |     }
32 | 
33 |     @Override
34 |     public final boolean incrementToken() throws IOException {
35 |         clearAttributes();
36 | 
37 |         if(input instanceof KoreanTokenPrepareable) {
38 |             if(preparedTokens == null) {
39 |                 this.preparedTokens = JavaConverters.seqAsJavaList(prepareKoreanTokens());
40 |             }
41 | 
42 |             if (this.preparedTokens == null || this.preparedTokens.isEmpty() || tokenIndex >= this.preparedTokens.size()) {
43 |                 return false;
44 |             }
45 | 
46 |             setAttributes(this.preparedTokens.get(tokenIndex++));
47 |             return true;
48 |         } else {
49 |             return input.incrementToken();
50 |         }
51 |     }
52 | 
53 |     @Override
54 |     public Seq<KoreanToken> prepareKoreanTokens() throws IOException {
55 |         return perform(((KoreanTokenPrepareable) input).prepareKoreanTokens());
56 |     }
57 | 
58 |     @Override
59 |     public void reset() throws IOException {
60 |         super.reset();
61 |         initializeState();
62 |     }
63 | 
64 |     @Override
65 |     public KoreanToken getCurrentToken() {
66 |         return this.currentToken;
67 |     }
68 | 
69 |     protected abstract Seq<KoreanToken> perform(Seq<KoreanToken> tokens);
70 | 
71 |     private void setAttributes(KoreanToken token) {
72 |         charTermAttribute.append(token.text());
73 |         offsetAttribute.setOffset(token.offset(), token.offset() + token.length());
74 |         typeAttribute.setType(token.pos().toString());
75 |         this.currentToken = token;
76 |     }
77 | 
78 |     private void initializeState() {
79 |         this.tokenIndex = 0;
80 |         this.preparedTokens = null;
81 |         this.currentToken = null;
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenizer.java:
--------------------------------------------------------------------------------
  1 | package org.apache.lucene.analysis.ko;
  2 | 
  3 | import org.apache.lucene.analysis.Tokenizer;
  4 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  5 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
  6 | import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
  7 | import org.apache.lucene.util.AttributeFactory;
  8 | import org.openkoreantext.processor.OpenKoreanTextProcessor;
  9 | import org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;
 10 | import scala.collection.JavaConverters;
 11 | import scala.collection.Seq;
 12 | 
 13 | import java.io.IOException;
 14 | import java.util.HashSet;
 15 | import java.util.List;
 16 | import java.util.Set;
 17 | 
 18 | /**
 19 |  * Provides Korean tokenization.
 20 |  */
 21 | public class OpenKoreanTextTokenizer extends Tokenizer implements KoreanTokenPrepareable {
 22 | 
 23 |     private static final int READER_BUFFER_SIZE = 1024;
 24 | 
 25 |     private final static Set<String> stopTypes;
 26 | 
 27 |     static {
 28 |         stopTypes = new HashSet<>();
 29 |         stopTypes.add("Space");
 30 |     }
 31 | 
 32 |     private List<KoreanToken> preparedTokens = null;
 33 | 
 34 |     private KoreanToken currentToken = null;
 35 | 
 36 |     private final CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
 37 | 
 38 |     private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
 39 | 
 40 |     private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
 41 | 
 42 |     private int tokenIndex = 0;
 43 | 
 44 |     public OpenKoreanTextTokenizer() {
 45 |         super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
 46 |     }
 47 | 
 48 |     @Override
 49 |     public final boolean incrementToken() throws IOException {
 50 |         clearAttributes();
 51 | 
 52 |         if (this.preparedTokens == null) {
 53 |             this.preparedTokens = JavaConverters.seqAsJavaList(prepareKoreanTokens());
 54 |         }
 55 | 
 56 |         if (this.preparedTokens == null || this.preparedTokens.isEmpty() || tokenIndex >= this.preparedTokens.size()) {
 57 |             return false;
 58 |         }
 59 | 
 60 |         setAttributes(this.preparedTokens.get(tokenIndex++));
 61 |         return true;
 62 |     }
 63 | 
 64 |     @Override
 65 |     public Seq<KoreanToken> prepareKoreanTokens() throws IOException {
 66 |         CharSequence text = readText();
 67 |         return OpenKoreanTextProcessor.tokenize(text);
 68 |     }
 69 | 
 70 |     @Override
 71 |     public void reset() throws IOException {
 72 |         super.reset();
 73 |         initializeState();
 74 |     }
 75 | 
 76 |     @Override
 77 |     public KoreanToken getCurrentToken(){
 78 |         return this.currentToken;
 79 |     }
 80 | 
 81 |     private CharSequence readText() throws IOException {
 82 |         StringBuilder text = new StringBuilder();
 83 |         char[] tmp = new char[READER_BUFFER_SIZE];
 84 |         int len = -1;
 85 |         while ((len = input.read(tmp)) != -1) {
 86 |             text.append(new String(tmp, 0, len));
 87 |         }
 88 |         return text.toString();
 89 |     }
 90 | 
 91 |     private void setAttributes(KoreanToken token) {
 92 |         charTermAttribute.append(token.text());
 93 |         offsetAttribute.setOffset(token.offset(), token.offset() + token.length());
 94 |         typeAttribute.setType(token.pos().toString());
 95 |         this.currentToken = token;
 96 |     }
 97 | 
 98 |     private void initializeState() {
 99 |         this.tokenIndex = 0;
100 |         this.preparedTokens = null;
101 |         this.currentToken = null;
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/lucene/analysis/ko/UserDictionaryLoader.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.logging.log4j.Logger;
 4 | import org.elasticsearch.common.Strings;
 5 | import org.elasticsearch.common.logging.Loggers;
 6 | import org.elasticsearch.index.analysis.OpenKoreanTextTokenizerFactory;
 7 | import org.openkoreantext.processor.OpenKoreanTextProcessor;
 8 | import scala.collection.JavaConverters;
 9 | 
10 | import java.io.*;
11 | import java.net.URL;
12 | import java.net.URLConnection;
13 | import java.security.AccessControlException;
14 | import java.util.ArrayList;
15 | import java.util.HashMap;
16 | import java.util.List;
17 | import java.util.Map;
18 | 
19 | /**
20 |  * Loader to add user custom dictionaries
21 |  * dictionaries must be located in {PLUGIN_PATH}/dic/
22 |  */
23 | public class UserDictionaryLoader {
24 | 
25 |     private static Logger logger = Loggers.getLogger(UserDictionaryLoader.class, "open-korean-text");
26 | 
27 |     private final static Map<String, Boolean> loadedDictionaryFiles = new HashMap<>();
28 | 
29 |     private static final String DEFAULT_DIC_SUFFIX = "dic/";
30 |     private static File[] dicFiles;
31 | 
32 |     static {
33 |         String currentPath = OpenKoreanTextTokenizerFactory.class.getProtectionDomain().getCodeSource().getLocation().getPath();
34 | 
35 |         dicFiles = new File[]{};
36 | 
37 |         try {
38 |             File dicDirectory = new File(new File(currentPath).getParent() + "/" + DEFAULT_DIC_SUFFIX);
39 |             if(dicDirectory.isDirectory()) {
40 |                 dicFiles = dicDirectory.listFiles();
41 |             }
42 |         } catch (AccessControlException e) {
43 |             logger.error("Can not load dictionary files", e);
44 |         }
45 |     }
46 | 
47 |     public static void loadDefaultUserDictionaries() {
48 |         for(File file : dicFiles) {
49 |             Boolean loaded = loadedDictionaryFiles.get(file.getPath());
50 |             if(loaded == null ||loaded == false) {
51 |                 try {
52 |                     addUserDictionary(file);
53 |                 } catch (IOException e) {
54 |                     throw new IllegalArgumentException(e);
55 |                 }
56 |                 loadedDictionaryFiles.put(file.getPath(), true);
57 |             }
58 |         }
59 |     }
60 | 
61 |     public static void addUserDictionary(List<String> words) {
62 |         OpenKoreanTextProcessor.addNounsToDictionary(JavaConverters.asScalaBuffer(words).toSeq());
63 |     }
64 | 
65 |     public static void addUserDictionary(File file) throws IOException {
66 |         addUserDictionary(new BufferedReader(new FileReader(file)));
67 |     }
68 | 
69 |     public static void addUserDictionary(URL url) throws IOException {
70 |         URLConnection connection = url.openConnection();
71 |         addUserDictionary(new BufferedReader(new InputStreamReader(connection.getInputStream())));
72 |     }
73 | 
74 |     private static void addUserDictionary(BufferedReader bufferedReader) throws IOException {
75 |         List<String> words = new ArrayList<>();
76 |         String word;
77 |         while ((word = bufferedReader.readLine()) != null) {
78 |             if(Strings.isEmpty(word)) continue;
79 |             words.add(word);
80 |         }
81 |         addUserDictionary(words);
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextAnalyzerProvider.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis;
 2 | 
 3 | import org.apache.lucene.analysis.ko.OpenKoreanTextAnalyzer;
 4 | import org.apache.lucene.analysis.ko.UserDictionaryLoader;
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.env.Environment;
 7 | import org.elasticsearch.index.IndexSettings;
 8 | 
 9 | /**
10 |  * A analyzer provider for openkoreantext.
11 |  */
12 | public class OpenKoreanTextAnalyzerProvider extends AbstractIndexAnalyzerProvider<OpenKoreanTextAnalyzer> {
13 | 
14 |     private final OpenKoreanTextAnalyzer analyzer;
15 | 
16 |     public OpenKoreanTextAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
17 |         super(indexSettings, name, settings);
18 |         analyzer= new OpenKoreanTextAnalyzer();
19 |         UserDictionaryLoader.loadDefaultUserDictionaries();
20 |     }
21 | 
22 |     @Override
23 |     public OpenKoreanTextAnalyzer get() {
24 |         return this.analyzer;
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextNormalizerFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.elasticsearch.common.settings.Settings;
 5 | import org.elasticsearch.env.Environment;
 6 | import org.elasticsearch.index.IndexSettings;
 7 | import org.apache.lucene.analysis.ko.OpenKoreanTextNormalizer;
 8 | import org.apache.lucene.analysis.ko.OpenKoreanTextStemmer;
 9 | 
10 | import java.io.Reader;
11 | 
12 | /**
13 |  * A ES character-filter factory for {@link OpenKoreanTextNormalizer}.
14 |  */
15 | public class OpenKoreanTextNormalizerFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
16 | 
17 |     public OpenKoreanTextNormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
18 |         super(indexSettings, name);
19 |     }
20 | 
21 |     @Override
22 |     public Reader create(Reader reader) {
23 |         return new OpenKoreanTextNormalizer(reader);
24 |     }
25 | 
26 |     @Override
27 |     public Object getMultiTermComponent() {
28 |         return this;
29 |     }
30 | 
31 |     public static class OpenKoreanTextStemmerFactory extends AbstractTokenFilterFactory {
32 |         public OpenKoreanTextStemmerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
33 |             super(indexSettings, name, settings);
34 |         }
35 | 
36 |         @Override
37 |         public TokenStream create(TokenStream tokenStream) {
38 |             return new OpenKoreanTextStemmer(tokenStream);
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextPhraseExtractorFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.ko.OpenKoreanTextPhraseExtractor;
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.env.Environment;
 7 | import org.elasticsearch.index.IndexSettings;
 8 | 
 9 | /**
10 |  * A ES token filter factory for {@link OpenKoreanTextPhraseExtractor}.
11 |  */
12 | public class OpenKoreanTextPhraseExtractorFactory extends AbstractTokenFilterFactory {
13 | 
14 |     public OpenKoreanTextPhraseExtractorFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
15 |         super(indexSettings, name, settings);
16 |     }
17 | 
18 |     @Override
19 |     public TokenStream create(TokenStream tokenStream) {
20 |         return new OpenKoreanTextPhraseExtractor(tokenStream);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextRedundantFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.ko.OpenKoreanTextRedundantFilter;
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.env.Environment;
 7 | import org.elasticsearch.index.IndexSettings;
 8 | 
 9 | /**
10 |  * A ES token filter factory for {@link OpenKoreanTextRedundantFilter}.
11 |  */
12 | public class OpenKoreanTextRedundantFilterFactory extends AbstractTokenFilterFactory {
13 | 
14 |     public OpenKoreanTextRedundantFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
15 |         super(indexSettings, name, settings);
16 |     }
17 | 
18 |     @Override
19 |     public TokenStream create(TokenStream tokenStream) {
20 |         return new OpenKoreanTextRedundantFilter(tokenStream);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextStemmerFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.ko.OpenKoreanTextStemmer;
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.env.Environment;
 7 | import org.elasticsearch.index.IndexSettings;
 8 | 
 9 | /**
10 |  * A ES token filter factory for {@link OpenKoreanTextStemmer}.
11 |  */
12 | public class OpenKoreanTextStemmerFactory extends AbstractTokenFilterFactory {
13 | 
14 |     public OpenKoreanTextStemmerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
15 |         super(indexSettings, name, settings);
16 |     }
17 | 
18 |     @Override
19 |     public TokenStream create(TokenStream tokenStream) {
20 |         return new OpenKoreanTextStemmer(tokenStream);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextTokenizerFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis;
 2 | 
 3 | import org.apache.lucene.analysis.Tokenizer;
 4 | import org.apache.lucene.analysis.ko.OpenKoreanTextTokenizer;
 5 | import org.apache.lucene.analysis.ko.UserDictionaryLoader;
 6 | import org.elasticsearch.common.settings.Settings;
 7 | import org.elasticsearch.env.Environment;
 8 | import org.elasticsearch.index.IndexSettings;
 9 | 
10 | /**
11 |  * A ES tokenizer factory for {@link OpenKoreanTextTokenizer}.
12 |  */
13 | public class OpenKoreanTextTokenizerFactory extends AbstractTokenizerFactory {
14 | 
15 |     public OpenKoreanTextTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
16 |         super(indexSettings, name, settings);
17 |         UserDictionaryLoader.loadDefaultUserDictionaries();
18 |     }
19 | 
20 |     @Override
21 |     public Tokenizer create() {
22 |         return new OpenKoreanTextTokenizer();
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/plugin/analysis/openkoreantext/AnalysisOpenKoreanTextPlugin.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.analysis.openkoreantext;
 2 | 
 3 | import org.apache.lucene.analysis.Analyzer;
 4 | import org.elasticsearch.index.analysis.*;
 5 | import org.elasticsearch.indices.analysis.AnalysisModule;
 6 | import org.elasticsearch.plugins.AnalysisPlugin;
 7 | import org.elasticsearch.plugins.Plugin;
 8 | 
 9 | import java.util.HashMap;
10 | import java.util.Map;
11 | 
12 | import static java.util.Collections.singletonMap;
13 | 
14 | public class AnalysisOpenKoreanTextPlugin extends Plugin implements AnalysisPlugin {
15 | 
16 |     @Override
17 |     public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
18 |         return singletonMap("openkoreantext-analyzer", OpenKoreanTextAnalyzerProvider::new);
19 |     }
20 | 
21 |     @Override
22 |     public Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> getCharFilters() {
23 |         return singletonMap("openkoreantext-normalizer", OpenKoreanTextNormalizerFactory::new);
24 |     }
25 | 
26 |     @Override
27 |     public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
28 |         Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizerFactories = new HashMap<>();
29 |         tokenizerFactories.put("openkoreantext-tokenizer", OpenKoreanTextTokenizerFactory::new);
30 |         return tokenizerFactories;
31 |     }
32 | 
33 |     @Override
34 |     public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
35 |         Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters = new HashMap<>();
36 |         tokenFilters.put("openkoreantext-stemmer", OpenKoreanTextStemmerFactory::new);
37 |         tokenFilters.put("openkoreantext-redundant-filter", OpenKoreanTextRedundantFilterFactory::new);
38 |         tokenFilters.put("openkoreantext-phrase-extractor", OpenKoreanTextPhraseExtractorFactory::new);
39 | 
40 |         return tokenFilters;
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/resources/dic/sample-dictionary:
--------------------------------------------------------------------------------
1 | 샘플사전
2 | 엘라스틱서치
3 | 
4 | 


--------------------------------------------------------------------------------
/src/main/resources/plugin-descriptor.properties:
--------------------------------------------------------------------------------
1 | classname=${descriptor.classname}
2 | name=${descriptor.name}
3 | description=${descriptor.description}
4 | version=${descriptor.version}
5 | java.version=${descriptor.javaVersion}
6 | elasticsearch.version=${descriptor.elasticsearchVersion}


--------------------------------------------------------------------------------
/src/test/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/a37dffab4cc64c5b478eded7bbb028fcfbdc4dd4/src/test/.DS_Store


--------------------------------------------------------------------------------
/src/test/java/org/apache/lucene/analysis/ko/OpenKoreanTextAnalyzerTest.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.elasticsearch.common.settings.Settings;
 4 | import org.elasticsearch.index.Index;
 5 | import org.elasticsearch.index.analysis.*;
 6 | import org.elasticsearch.plugin.analysis.openkoreantext.AnalysisOpenKoreanTextPlugin;
 7 | import org.elasticsearch.test.ESTestCase;
 8 | 
 9 | import java.io.IOException;
10 | 
11 | import static org.hamcrest.Matchers.instanceOf;
12 | 
13 | public class OpenKoreanTextAnalyzerTest extends ESTestCase {
14 |     public void testDefaultComponentsLoading() throws IOException {
15 |         TestAnalysis analysis =  createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisOpenKoreanTextPlugin());
16 | 
17 |         CharFilterFactory charFilterFactory = analysis.charFilter.get("openkoreantext-normalizer");
18 |         assertNotNull(charFilterFactory);
19 |         assertThat(charFilterFactory, instanceOf(OpenKoreanTextNormalizerFactory.class));
20 | 
21 |         TokenizerFactory tokenizerFactory = analysis.tokenizer.get("openkoreantext-tokenizer");
22 |         assertNotNull(tokenizerFactory);
23 |         assertThat(tokenizerFactory, instanceOf(OpenKoreanTextTokenizerFactory.class));
24 | 
25 |         TokenFilterFactory tokenFilterFactory = analysis.tokenFilter.get("openkoreantext-stemmer");
26 |         assertNotNull(tokenFilterFactory);
27 |         assertThat(tokenFilterFactory, instanceOf(OpenKoreanTextStemmerFactory.class));
28 | 
29 |         tokenFilterFactory = analysis.tokenFilter.get("openkoreantext-redundant-filter");
30 |         assertNotNull(tokenFilterFactory);
31 |         assertThat(tokenFilterFactory, instanceOf(OpenKoreanTextRedundantFilterFactory.class));
32 |     }
33 | }


--------------------------------------------------------------------------------
/src/test/java/org/apache/lucene/analysis/ko/OpenKoreanTextNormalizerTest.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.CharFilter;
 4 | import org.junit.Assert;
 5 | import org.junit.Test;
 6 | 
 7 | import java.io.StringReader;
 8 | 
 9 | public class OpenKoreanTextNormalizerTest {
10 |     @Test
11 |     public void testNormalizerCharFilter() throws Exception {
12 |         String query = "한국어를 처리하는 예시입니닼ㅋ. 오픈코리안텍스틓ㅎㅎㅎㅎㅎㅎㅎ";
13 |         String expected = "한국어를 처리하는 예시입니다ㅋ. 오픈코리안텍스트ㅎㅎㅎ";
14 | 
15 |         CharFilter inputReader = new OpenKoreanTextNormalizer(new StringReader(query));
16 | 
17 |         char[] tempBuff = new char[10];
18 |         StringBuilder actual = new StringBuilder();
19 | 
20 |         while (true) {
21 |             int length = inputReader.read(tempBuff);
22 |             if (length == -1) break;
23 |             actual.append(tempBuff, 0, length);
24 |         }
25 | 
26 |         Assert.assertEquals(expected, actual.toString());
27 |     }
28 | }


--------------------------------------------------------------------------------
/src/test/java/org/apache/lucene/analysis/ko/OpenKoreanTextPhraseExtractorTest.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.Tokenizer;
 4 | import org.junit.Test;
 5 | 
 6 | import java.io.IOException;
 7 | import java.io.StringReader;
 8 | 
 9 | public class OpenKoreanTextPhraseExtractorTest {
10 | 
11 |     @Test
12 |     public void testBasicUsage() throws IOException {
13 |         String query = "한국어를 처리하는 예시입니다ㅋㅋ #한국어";
14 | 
15 |         String[] expectedCharTerms = new String[]{"한국어", "처리", "처리하는 예시", "예시", "#한국어"};
16 |         String[] expectedTypes = new String[]{"Noun", "Noun", "Noun", "Noun", "Hashtag"};
17 |         int[] expectedStartOffsets = new int[]{0, 5, 5, 10, 18};
18 |         int[] expectedEndOffsets = new int[]{3, 7, 12, 12, 22};
19 | 
20 |         Tokenizer tokenizer = new OpenKoreanTextTokenizer();
21 |         tokenizer.setReader(new StringReader(query));
22 | 
23 |         OpenKoreanTextTokenFilter tokenFilter = new OpenKoreanTextPhraseExtractor(tokenizer);
24 |         TokenStreamAssertions.assertTokenStream(tokenFilter, expectedCharTerms, expectedTypes, expectedStartOffsets, expectedEndOffsets);
25 |     }
26 | }


--------------------------------------------------------------------------------
/src/test/java/org/apache/lucene/analysis/ko/OpenKoreanTextRedundantFilterTest.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.Tokenizer;
 4 | import org.junit.Test;
 5 | 
 6 | import java.io.IOException;
 7 | import java.io.StringReader;
 8 | 
 9 | public class OpenKoreanTextRedundantFilterTest {
10 |     @Test
11 |     public void testBasicUsage() throws IOException {
12 |         String query = "그리고 이것은 예시, 또는 예로써, 한국어를 처리하기 입니다";
13 |         String[] expectedCharTerms = new String[]{"예시", "예", "한국어", "처리", "하다", "이다"};
14 |         String[] expectedTypes = new String[]{"Noun", "Modifier", "Noun", "Noun", "Verb", "Adjective"};
15 |         int[] expectedStartOffsets = new int[]{8, 15, 20, 25, 27, 30};
16 |         int[] expectedEndOffsets = new int[]{10, 16, 23, 27, 29, 33};
17 | 
18 |         Tokenizer tokenizer = new OpenKoreanTextTokenizer();
19 |         tokenizer.setReader(new StringReader(query));
20 | 
21 |         OpenKoreanTextTokenFilter tokenFilter = new OpenKoreanTextStemmer(tokenizer);
22 |         tokenFilter = new OpenKoreanTextRedundantFilter(tokenFilter);
23 | 
24 |         TokenStreamAssertions.assertTokenStream(tokenFilter, expectedCharTerms, expectedTypes, expectedStartOffsets, expectedEndOffsets);
25 |     }
26 | }


--------------------------------------------------------------------------------
/src/test/java/org/apache/lucene/analysis/ko/OpenKoreanTextStemmerTest.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.Tokenizer;
 4 | import org.junit.Test;
 5 | 
 6 | import java.io.IOException;
 7 | import java.io.StringReader;
 8 | 
 9 | public class OpenKoreanTextStemmerTest  {
10 | 
11 |     @Test
12 |     public void testBasicUsage() throws IOException {
13 |         String query = "한국어를 처리하는 예시입니다ㅋㅋ";
14 |         String[] expectedCharTerms = new String[]{"한국어", "를", " ", "처리", "하다", " ", "예시", "이다", "ㅋㅋ"};
15 |         String[] expectedTypes = new String[]{"Noun", "Josa", "Space", "Noun", "Verb", "Space", "Noun", "Adjective", "KoreanParticle"};
16 |         int[] expectedStartOffsets = new int[]{0, 3, 4, 5, 7, 9, 10, 12, 15};
17 |         int[] expectedEndOffsets = new int[]{3, 4, 5, 7, 9, 10, 12, 15, 17};
18 | 
19 |         Tokenizer tokenizer = new OpenKoreanTextTokenizer();
20 |         tokenizer.setReader(new StringReader(query));
21 | 
22 |         OpenKoreanTextTokenFilter tokenFilter = new OpenKoreanTextStemmer(tokenizer);
23 | 
24 |         TokenStreamAssertions.assertTokenStream(tokenFilter, expectedCharTerms, expectedTypes, expectedStartOffsets, expectedEndOffsets);
25 |     }
26 | }


--------------------------------------------------------------------------------
/src/test/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenizerTest.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import java.io.File;
 6 | import java.io.IOException;
 7 | import java.io.StringReader;
 8 | import java.net.URL;
 9 | import java.util.ArrayList;
10 | import java.util.List;
11 | 
12 | public class OpenKoreanTextTokenizerTest {
13 | 
14 |     @Test
15 |     public void testTokenizer() throws IOException {
16 |         String text = "한국어를 처리하는 예시입니다ㅋㅋ";
17 | 
18 |         OpenKoreanTextTokenizer tokenizer = new OpenKoreanTextTokenizer();
19 |         tokenizer.setReader(new StringReader(text));
20 | 
21 |         String[] expectedCharTerms = new String[]{"한국어", "를", " ", "처리", "하는", " ", "예시", "입니다", "ㅋㅋ"};
22 |         String[] expectedTypes = new String[]{"Noun", "Josa", "Space", "Noun", "Verb", "Space", "Noun", "Adjective", "KoreanParticle"};
23 |         int[] expectedStartOffsets = new int[]{0, 3, 4, 5, 7, 9, 10, 12, 15};
24 |         int[] expectedEndOffsets = new int[]{3, 4, 5, 7, 9, 10, 12, 15, 17};
25 | 
26 |         TokenStreamAssertions.assertTokenStream(tokenizer, expectedCharTerms, expectedTypes, expectedStartOffsets, expectedEndOffsets);
27 |     }
28 | 
29 |     @Test
30 |     public void testAddNounsToDictionary() throws IOException {
31 |         String text = "뷁충정식은 맛있다";
32 | 
33 |         OpenKoreanTextTokenizer tokenizer = new OpenKoreanTextTokenizer();
34 |         tokenizer.setReader(new StringReader(text));
35 | 
36 |         String[] expected = new String[]{"뷁충정식", "은", " ", "맛있다"};
37 | 
38 |         List<String> userDictionary = new ArrayList<>();
39 |         userDictionary.add("뷁충정식");
40 | 
41 |         UserDictionaryLoader.addUserDictionary(userDictionary);
42 | 
43 |         tokenizer.setReader(new StringReader(text));
44 | 
45 |         TokenStreamAssertions.assertTokenStream(tokenizer, expected, null, null, null);
46 |     }
47 | 
48 |     @Test
49 |     public void testUserDictionaryFromFile() throws IOException {
50 |         String text = "퀠후푸룩커피는 맛있다";
51 |         String[] expected = new String[]{"퀠후푸룩커피", "는", " ", "맛있다"};
52 | 
53 |         OpenKoreanTextTokenizer tokenizer = new OpenKoreanTextTokenizer();
54 | 
55 |         File dic = new File(getClass().getClassLoader().getResource("dictionary").getFile());
56 |         String path = dic.getAbsolutePath();
57 |         UserDictionaryLoader.addUserDictionary(new File(path));
58 | 
59 |         tokenizer.setReader(new StringReader(text));
60 |         TokenStreamAssertions.assertTokenStream(tokenizer, expected, null, null, null);
61 |     }
62 | 
63 |     @Test
64 |     public void testUserDictionaryFromURL() throws IOException {
65 |         String text = "안비빈비빔밥은 맛있다";
66 |         String[] expected = new String[]{"안비빈비빔밥", "은", " ", "맛있다"};
67 | 
68 |         OpenKoreanTextTokenizer tokenizer = new OpenKoreanTextTokenizer();
69 | 
70 |         URL url = new URL("https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/master/src/test/resources/httpdictionary");
71 |         UserDictionaryLoader.addUserDictionary(url);
72 | 
73 |         tokenizer.setReader(new StringReader(text));
74 | 
75 |         TokenStreamAssertions.assertTokenStream(tokenizer, expected, null, null, null);
76 |     }
77 | }


--------------------------------------------------------------------------------
/src/test/java/org/apache/lucene/analysis/ko/TokenStreamAssertions.java:
--------------------------------------------------------------------------------
 1 | package org.apache.lucene.analysis.ko;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 5 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 6 | import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 7 | 
 8 | import java.io.IOException;
 9 | 
10 | import static org.junit.Assert.assertEquals;
11 | 
12 | public class TokenStreamAssertions {
13 |     public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
14 |         tokenStream.reset();
15 |         int index = 0;
16 |         while (tokenStream.incrementToken() == true) {
17 |             assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());
18 | 
19 |             if(expectedTypes != null) {
20 |                 assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
21 |             }
22 | 
23 |             OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);
24 | 
25 |             if(expectedStartOffsets != null) {
26 |                 assertEquals(expectedStartOffsets[index], offsets.startOffset());
27 |             }
28 | 
29 |             if(expectedEndOffsets != null) {
30 |                 assertEquals(expectedEndOffsets[index], offsets.endOffset());
31 |             }
32 | 
33 |             index++;
34 |         }
35 |         tokenStream.end();
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsesarch/plugin/analysis/openkoreantext/AnalysisOpenKoreanTextPluginTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsesarch.plugin.analysis.openkoreantext;
 2 | 
 3 | import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
 4 | import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
 5 | import org.elasticsearch.plugin.analysis.openkoreantext.AnalysisOpenKoreanTextPlugin;
 6 | import org.elasticsearch.plugins.Plugin;
 7 | import org.elasticsearch.plugins.PluginInfo;
 8 | import org.elasticsearch.test.ESIntegTestCase;
 9 | import org.junit.Assert;
10 | 
11 | import java.util.Collection;
12 | import java.util.Collections;
13 | 
14 | public class AnalysisOpenKoreanTextPluginTest extends ESIntegTestCase {
15 |     @Override
16 |     protected Collection<Class<? extends Plugin>> nodePlugins() {
17 |         return Collections.singleton(AnalysisOpenKoreanTextPlugin.class);
18 |     }
19 | 
20 |     public void testPluginIsLoaded() {
21 |         NodesInfoResponse response = client().admin().cluster().prepareNodesInfo().setPlugins(true).get();
22 |         for (NodeInfo node : response.getNodes()) {
23 |             boolean founded = false;
24 |             for (PluginInfo pluginInfo : node.getPlugins().getPluginInfos()) {
25 |                 if (pluginInfo.getName().equals(AnalysisOpenKoreanTextPlugin.class.getName())) {
26 |                     founded = true;
27 |                 }
28 |             }
29 |             Assert.assertTrue(founded);
30 |         }
31 |     }
32 | }


--------------------------------------------------------------------------------
/src/test/resources/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/a37dffab4cc64c5b478eded7bbb028fcfbdc4dd4/src/test/resources/.DS_Store


--------------------------------------------------------------------------------
/src/test/resources/dic/sample-dictionary:
--------------------------------------------------------------------------------
1 | 샘플사전
2 | 엘라스틱서치
3 | 
4 | 


--------------------------------------------------------------------------------
/src/test/resources/dictionary:
--------------------------------------------------------------------------------
1 | 퀠후푸룩커피
2 | 후르륵짭짭커피


--------------------------------------------------------------------------------
/src/test/resources/httpdictionary:
--------------------------------------------------------------------------------
1 | # URL로제공되는 딕셔너리
2 | 뽀로록륵김치
3 | 안비빈비빔밥


--------------------------------------------------------------------------------
/src/test/resources/plugin-descriptor.properties:
--------------------------------------------------------------------------------
1 | classname=org.elasticsearch.plugin.analysis.openkoreantext.AnalysisOpenKoreanTextPlugin
2 | name=elasticserach-analysis-openkoreantext
3 | description=Korean analysis plugin integrates open-korean-text module into elasticsearch.
4 | version=1.0.0
5 | java.version=1.8
6 | elasticsearch.version=5.0.0


--------------------------------------------------------------------------------