├── .gitignore
├── README.md
├── pom.xml
├── release.xml
└── src
├── main
├── java
│ └── org
│ │ ├── apache
│ │ └── solr
│ │ │ └── index
│ │ │ └── analysis
│ │ │ ├── chosung
│ │ │ └── JavacafeChosungTokenFilterFactory.java
│ │ │ ├── eng2kor
│ │ │ └── JavacafeEng2KorConvertFilterFactory.java
│ │ │ ├── jamo
│ │ │ └── JavacafeJamoTokenFilterFactory.java
│ │ │ └── kor2eng
│ │ │ └── JavacafeKor2EngConvertFilterFactory.java
│ │ └── elasticsearch
│ │ ├── index
│ │ ├── analysis
│ │ │ ├── chosung
│ │ │ │ ├── JavacafeChosungTokenFilter.java
│ │ │ │ └── JavacafeChosungTokenFilterFactory.java
│ │ │ ├── eng2kor
│ │ │ │ ├── JavacafeEng2KorConvertFilter.java
│ │ │ │ ├── JavacafeEng2KorConvertFilter2.java
│ │ │ │ ├── JavacafeEng2KorConvertFilter3.java
│ │ │ │ └── JavacafeEng2KorConvertFilterFactory.java
│ │ │ ├── jamo
│ │ │ │ ├── JavacafeJamoTokenFilter.java
│ │ │ │ └── JavacafeJamoTokenFilterFactory.java
│ │ │ ├── kor2eng
│ │ │ │ ├── JavacafeKor2EngConvertFilter.java
│ │ │ │ └── JavacafeKor2EngConvertFilterFactory.java
│ │ │ └── spell
│ │ │ │ ├── JavacafeSpellFilter.java
│ │ │ │ └── JavacafeSpellFilterFactory.java
│ │ └── common
│ │ │ ├── converter
│ │ │ ├── EngToKorConverter.java
│ │ │ └── KorToEngConverter.java
│ │ │ ├── merger
│ │ │ └── KoreanMerger.java
│ │ │ ├── parser
│ │ │ ├── AbstractKoreanParser.java
│ │ │ ├── KoreanChosungParser.java
│ │ │ └── KoreanJamoParser.java
│ │ │ ├── type
│ │ │ └── CodeType.java
│ │ │ └── util
│ │ │ ├── HangulUtil.java
│ │ │ ├── JamoUtil.java
│ │ │ └── KeyboardUtil.java
│ │ └── plugin
│ │ └── analysis
│ │ └── JavacafePlugin.java
└── resources
│ └── plugin-descriptor.properties
└── test
└── java
└── org
└── elasticsearch
└── plugin
├── esTest
├── AbstractPluginTest.java
├── JavacafeChosungTest.java
├── JavacafeEng2KorTest.java
├── JavacafeJamoTest.java
├── JavacafeKor2EngTest.java
└── JavacafeSpellTest.java
├── luceneTest
└── TokenTest.java
└── utilTest
├── ConverterE2KTest.java
├── ConverterK2ETest.java
├── MergerTest.java
├── ParserChosungTest.java
├── ParserJamoTest.java
└── SpellCheckTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Eclipse ###
2 | *.pydevproject
3 | .metadata
4 | .gradle
5 | bin/
6 | tmp/
7 | target/
8 | *.tmp
9 | *.bak
10 | *.swp
11 | *~.nib
12 | local.properties
13 | .settings/
14 | .loadpath
15 | .factorypath
16 | .classpath
17 | .project
18 | logs/
19 | .idea
20 | work/Tomcat/
21 |
22 | # Spring
23 | .springBeans
24 |
25 | # External tool builders
26 | .externalToolBuilders/
27 |
28 | # Locally stored ��Eclipse launch configurations��
29 | *.launch
30 |
31 | # CDT-specific
32 | .cproject
33 |
34 | # PDT-specific
35 | .buildpath
36 |
37 | # sbteclipse plugin
38 | .target
39 |
40 | # TeXlipse plugin
41 | .texlipse
42 |
43 | ### Maven ###
44 | pom.xml.tag
45 | pom.xml.releaseBackup
46 | pom.xml.versionsBackup
47 | pom.xml.next
48 | release.properties
49 |
50 | ### Java ###
51 | *.class
52 |
53 | # Mobile Tools for Java (J2ME)
54 | .mtj.tmp/
55 |
56 | # Package Files #
57 | *.jar
58 | *.war
59 | *.ear
60 |
61 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
62 | hs_err_pid*
63 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # elasticsearch-plugin
2 | 자바카페 Elasticsearch 플러그인
3 |
4 | [elasticsearch-plugin](https://github.com/javacafe-project/elasticsearch-plugin)은 사용자가 한글을 검색하기 쉽게 만들어진 플러그인 입니다.
5 |
6 | > 링크 다운로드
7 | >
8 | >[7.0.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v7.0.0)
9 | >
10 | >[6.7.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.7.0)
11 | >
12 | >[6.6.2](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.2)
13 | >
14 | >[6.6.1](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.1)
15 | >
16 | >[6.6.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.0)
17 | >
18 | >[6.5.4](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.5.4)
19 |
20 | # 설치방법
21 | >~~~~
22 | >elasticsearch-plugin install https://github.com/javacafe-project/elastic-book-etc/raw/master/plugin/javacafe-analyzer-6.4.3.zip
23 | >
24 |
25 | # 제공기능
26 |
27 | 엘라스틱서치 혹은 솔라의 최신버전에서 사용가능한 한글기반의 자동완성/검색결과를 더욱 효율적으로 사용하기 위해 개발된 플러그인 이며 아래와 같은 기능을 제공합니다.
28 |
29 | ## 초성추출
30 | 검색어로 들어오는 단어가 초성인 경우 검색 결과 혹은 자동완성의 결과를 초성으로 매칭하여 검색되게 하는 플러그인 입니다.
31 |
32 | ### 사용방법
33 |
34 |
35 | ## 자소분해
36 | 자동완성에서 한글을 검색 가능한 형태로 변형하는 플러그인 입니다. 예를 들어 삼성전자의 경우 삼ㅅ만 검색하여도 삼성전자가 검색 될수 있도록 한글의 자소를 분해하여 검색 할 수 있도록 합니다.
37 |
38 | ### 사용방법
39 |
40 |
41 | ## 한영/영한 오타교정
42 | 한글을 영문으로, 영문을 한글로 검색한 결과를 보정해주는 플러그인 입니다. 예를들어 삼성전자를 tkatjdwjswk 라고 검색하거나 ㅑㅔㅙㅜㄷ와 같이 iphone 을 잘못 검색한 경우 검색 결과를 도출 할수 있도록 도와줍니다.
43 |
44 | ### 사용방법
45 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 | 4.0.0
6 |
7 | org.elasticsearch.plugin
8 | javacafe-analyzer
9 | 1.0
10 |
11 | jar
12 |
13 |
14 |
15 | 6.4.3
16 |
17 | 7.2.1
18 |
19 |
20 |
21 |
22 |
23 |
24 | org.elasticsearch
25 | elasticsearch
26 | ${elasticsearch.version}
27 | provided
28 |
29 |
30 | org.apache.solr
31 | solr-core
32 | ${lucene.version}
33 | jar
34 | provided
35 |
36 |
37 | org.apache.lucene
38 | lucene-core
39 | ${lucene.version}
40 | provided
41 |
42 |
43 |
44 | org.apache.commons
45 | commons-lang3
46 | 3.5
47 |
48 |
49 |
50 | org.apache.logging.log4j
51 | log4j-core
52 | 2.16.0
53 | provided
54 |
55 |
56 |
57 |
58 |
59 | org.elasticsearch.test
60 | framework
61 | ${elasticsearch.version}
62 | provided
63 |
64 |
65 | junit
66 | junit
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | junit
75 | junit
76 | 4.11
77 | provided
78 |
79 |
80 | org.hamcrest
81 | hamcrest-core
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 | javacafe-analyzer-${elasticsearch.version}
92 |
93 |
94 | src/main/resources
95 | false
96 |
97 | *.properties
98 |
99 |
100 |
101 |
102 |
103 | org.apache.maven.plugins
104 | maven-compiler-plugin
105 |
106 | 1.8
107 | 1.8
108 | UTF-8
109 |
110 |
111 |
112 | org.apache.maven.plugins
113 | maven-dependency-plugin
114 | 3.0.0
115 |
116 |
117 | copy-dependencies
118 | package
119 |
120 | copy-dependencies
121 |
122 |
123 | ${project.build.directory}/lib
124 |
125 |
126 |
127 |
128 |
129 | org.apache.maven.plugins
130 | maven-surefire-plugin
131 | 2.12.1
132 |
133 | -Dtests.security.manager=false
134 | true
135 |
136 |
137 |
138 | org.apache.maven.plugins
139 | maven-assembly-plugin
140 | 3.0.0
141 |
142 | false
143 | ${project.build.directory}/releases
144 |
145 | release.xml
146 |
147 |
148 |
149 |
150 | zip-with-dependencies
151 | package
152 |
153 | single
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
--------------------------------------------------------------------------------
/release.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 | zip-with-dependencies
6 |
7 | zip
8 |
9 | false
10 |
11 |
12 | ${project.basedir}/src/main/resources/plugin-descriptor.properties
13 |
14 | true
15 |
16 |
17 |
18 |
19 |
20 | true
21 | true
22 |
23 |
24 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/chosung/JavacafeChosungTokenFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.apache.solr.index.analysis.chosung;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
5 | import org.elasticsearch.common.settings.Settings;
6 | import org.elasticsearch.env.Environment;
7 | import org.elasticsearch.index.IndexSettings;
8 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
9 | import org.elasticsearch.index.analysis.chosung.JavacafeChosungTokenFilter;
10 |
11 | import java.util.Map;
12 | /**
13 | *
14 | *
15 | *
16 | *
17 | *
18 | *
19 | *
20 | *
21 | *
22 | *
23 | *
24 | *
25 | *
26 | *
27 | *
28 | *
29 | *
30 | *
31 | *
32 | * */
33 | public class JavacafeChosungTokenFilterFactory extends TokenFilterFactory {
34 |
35 |
36 | public JavacafeChosungTokenFilterFactory(Map args) {
37 | super(args);
38 | }
39 |
40 | @Override
41 | public TokenStream create(TokenStream stream) {
42 | return new JavacafeChosungTokenFilter(stream);
43 | }
44 |
45 |
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/eng2kor/JavacafeEng2KorConvertFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.apache.solr.index.analysis.eng2kor;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
5 | import org.elasticsearch.index.analysis.eng2kor.JavacafeEng2KorConvertFilter;
6 |
7 | import java.util.Map;
8 | /**
9 | *
10 | *
11 | *
12 | *
13 | *
14 | *
15 | *
16 | *
17 | *
18 | *
19 | *
20 | *
21 | *
22 | *
23 | *
24 | *
25 | *
26 | * */
27 | public class JavacafeEng2KorConvertFilterFactory extends TokenFilterFactory {
28 |
29 |
30 | public JavacafeEng2KorConvertFilterFactory(Map args) {
31 | super(args);
32 | }
33 |
34 |
35 | @Override
36 | public TokenStream create(TokenStream tokenStream) {
37 | return new JavacafeEng2KorConvertFilter(tokenStream);
38 | }
39 |
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/jamo/JavacafeJamoTokenFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.apache.solr.index.analysis.jamo;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
5 | import org.elasticsearch.index.analysis.jamo.JavacafeJamoTokenFilter;
6 |
7 | import java.util.Map;
8 |
9 | public class JavacafeJamoTokenFilterFactory extends TokenFilterFactory {
10 |
11 |
12 | public JavacafeJamoTokenFilterFactory(Map args) {
13 | super(args);
14 | }
15 |
16 |
17 | @Override
18 | public TokenStream create(TokenStream stream) {
19 | return new JavacafeJamoTokenFilter(stream);
20 | }
21 |
22 |
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/kor2eng/JavacafeKor2EngConvertFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.apache.solr.index.analysis.kor2eng;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
5 | import org.elasticsearch.index.analysis.kor2eng.JavacafeKor2EngConvertFilter;
6 |
7 | import java.util.Map;
8 |
9 | /**
10 | *
11 | *
12 | *
13 | *
14 | *
15 | *
16 | *
17 | *
18 | *
19 | *
20 | *
21 | *
22 | *
23 | *
24 | *
25 | *
26 | *
27 | * */
28 | public class JavacafeKor2EngConvertFilterFactory extends TokenFilterFactory {
29 |
30 |
31 | public JavacafeKor2EngConvertFilterFactory(Map args) {
32 | super(args);
33 | }
34 |
35 |
36 | @Override
37 | public TokenStream create(TokenStream tokenStream) {
38 | return new JavacafeKor2EngConvertFilter(tokenStream);
39 | }
40 |
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/chosung/JavacafeChosungTokenFilter.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.chosung;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.lucene.analysis.TokenFilter;
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
8 | import org.elasticsearch.index.common.parser.KoreanChosungParser;
9 |
10 | /**
11 | * 한글 초성 분석 필터
12 | *
13 | * @author hrkim
14 | *
15 | */
16 | public final class JavacafeChosungTokenFilter extends TokenFilter {
17 |
18 | private KoreanChosungParser parser;
19 | private CharTermAttribute termAtt;
20 |
21 |
22 | public JavacafeChosungTokenFilter(TokenStream stream) {
23 | super(stream);
24 | this.parser = new KoreanChosungParser();
25 | this.termAtt = addAttribute(CharTermAttribute.class);
26 | }
27 |
28 |
29 | /**
30 | * 한글 초성 Parser를 이용하여 토큰을 파싱하고 Term을 구한다.
31 | */
32 | @Override
33 | public boolean incrementToken() throws IOException {
34 |
35 | if (input.incrementToken()) {
36 | CharSequence parserdData = parser.parse(termAtt.toString());
37 | termAtt.setEmpty();
38 | termAtt.append(parserdData);
39 |
40 | return true;
41 | }
42 |
43 | return false;
44 | }
45 |
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/chosung/JavacafeChosungTokenFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.chosung;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.elasticsearch.common.settings.Settings;
5 | import org.elasticsearch.env.Environment;
6 | import org.elasticsearch.index.IndexSettings;
7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
8 |
9 | public class JavacafeChosungTokenFilterFactory extends AbstractTokenFilterFactory {
10 |
11 |
12 | public JavacafeChosungTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
13 | super(indexSettings, name, settings);
14 | }
15 |
16 |
17 | @Override
18 | public TokenStream create(TokenStream stream) {
19 | return new JavacafeChosungTokenFilter(stream);
20 | }
21 |
22 |
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.eng2kor;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.lucene.analysis.TokenFilter;
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
8 | import org.elasticsearch.index.common.converter.EngToKorConverter;
9 |
10 | /**
11 | * 영한 변환 필터
12 | *
13 | * @author hrkim
14 | *
15 | */
16 | public final class JavacafeEng2KorConvertFilter extends TokenFilter {
17 |
18 | private EngToKorConverter converter;
19 | private CharTermAttribute termAtt;
20 |
21 |
22 | public JavacafeEng2KorConvertFilter(TokenStream stream) {
23 | super(stream);
24 | this.converter = new EngToKorConverter();
25 | this.termAtt = addAttribute(CharTermAttribute.class);
26 | }
27 |
28 |
29 | @Override
30 | public boolean incrementToken() throws IOException {
31 |
32 | if (input.incrementToken()) {
33 | CharSequence parserdData = converter.convert(termAtt.toString());
34 | termAtt.setEmpty();
35 | termAtt.append(parserdData);
36 |
37 | return true;
38 | }
39 |
40 | return false;
41 | }
42 |
43 |
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter2.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.eng2kor;
2 |
3 | import org.apache.lucene.analysis.TokenFilter;
4 | import org.apache.lucene.analysis.TokenStream;
5 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
6 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
7 | import org.elasticsearch.index.common.converter.EngToKorConverter;
8 |
9 | import java.io.IOException;
10 | import java.util.LinkedList;
11 | import java.util.Queue;
12 |
13 | /**
14 | * 영한 변환 필터
15 | *
16 | * @author hrkim
17 | *
18 | */
19 | public final class JavacafeEng2KorConvertFilter2 extends TokenFilter {
20 |
21 | private EngToKorConverter converter;
22 | private CharTermAttribute termAtt;
23 | private PositionIncrementAttribute positionIncrementAttribute;
24 |
25 | private Queue simpleQueue;
26 |
27 |
28 | public JavacafeEng2KorConvertFilter2(TokenStream stream) {
29 | super(stream);
30 |
31 | this.converter = new EngToKorConverter();
32 | this.termAtt = addAttribute(CharTermAttribute.class);
33 | this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
34 |
35 | this.simpleQueue = new LinkedList();
36 | }
37 |
38 |
39 | @Override
40 | public boolean incrementToken() throws IOException {
41 |
42 | if (!simpleQueue.isEmpty()) {
43 | char[] buffer = simpleQueue.poll();
44 | termAtt.setEmpty();
45 | termAtt.copyBuffer(buffer, 0, buffer.length);
46 | positionIncrementAttribute.setPositionIncrement(0);
47 |
48 | return true;
49 | }
50 |
51 | if (!input.incrementToken()) {
52 | return false;
53 |
54 | } else {
55 | String result = converter.convert(termAtt.toString());
56 | simpleQueue.add(result.toCharArray());
57 | return true;
58 | }
59 | }
60 |
61 |
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter3.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.eng2kor;
2 |
3 | import org.apache.lucene.analysis.TokenFilter;
4 | import org.apache.lucene.analysis.TokenStream;
5 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
6 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
7 | import org.elasticsearch.index.common.converter.EngToKorConverter;
8 |
9 | import java.io.IOException;
10 | import java.util.LinkedList;
11 | import java.util.Queue;
12 |
13 | /**
14 | * 영한 변환 필터
15 | *
16 | * @author hrkim
17 | *
18 | */
19 | public final class JavacafeEng2KorConvertFilter3 extends TokenFilter {
20 |
21 | private EngToKorConverter converter;
22 | private CharTermAttribute termAtt;
23 |
24 | private PositionIncrementAttribute positionIncrementAttribute;
25 | private Queue simpleQueue;
26 |
27 |
28 | public JavacafeEng2KorConvertFilter3(TokenStream stream) {
29 | super(stream);
30 | this.converter = new EngToKorConverter();
31 | this.termAtt = addAttribute(CharTermAttribute.class);
32 |
33 | this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
34 |
35 | this.simpleQueue = new LinkedList();
36 | }
37 |
38 |
39 | @Override
40 | public boolean incrementToken() throws IOException {
41 |
42 | if (!simpleQueue.isEmpty()) {
43 | char[] buffer = simpleQueue.poll();
44 | termAtt.setEmpty();
45 | termAtt.copyBuffer(buffer, 0, buffer.length);
46 |
47 | positionIncrementAttribute.setPositionIncrement(0);
48 |
49 | return true;
50 | }
51 |
52 | if (input.incrementToken()) {
53 | String result = converter.convert(termAtt.toString());
54 | simpleQueue.add(result.toCharArray());
55 |
56 | return true;
57 | }
58 |
59 | return false;
60 | }
61 |
62 |
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.eng2kor;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.elasticsearch.common.settings.Settings;
5 | import org.elasticsearch.env.Environment;
6 | import org.elasticsearch.index.IndexSettings;
7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
8 |
9 |
10 | public class JavacafeEng2KorConvertFilterFactory extends AbstractTokenFilterFactory {
11 |
12 |
13 | public JavacafeEng2KorConvertFilterFactory(IndexSettings indexSettings, Environment env , String name, Settings settings) {
14 | super(indexSettings, name, settings);
15 | }
16 |
17 |
18 | @Override
19 | public TokenStream create(TokenStream tokenStream) {
20 | return new JavacafeEng2KorConvertFilter(tokenStream);
21 | }
22 |
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/jamo/JavacafeJamoTokenFilter.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.jamo;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.lucene.analysis.TokenFilter;
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
8 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
9 |
10 | /**
11 | * 한글 자모 분석 필터
12 | *
13 | * @author hrkim
14 | *
15 | */
16 | public final class JavacafeJamoTokenFilter extends TokenFilter {
17 |
18 | private KoreanJamoParser parser;
19 | private CharTermAttribute termAtt;
20 |
21 |
22 | public JavacafeJamoTokenFilter(TokenStream stream) {
23 | super(stream);
24 | this.parser = new KoreanJamoParser();
25 | this.termAtt = addAttribute(CharTermAttribute.class);
26 | }
27 |
28 |
29 | /**
30 | * 한글 자모 Parser를 이용하여 토큰을 파싱하고 Term을 구한다.
31 | */
32 | @Override
33 | public boolean incrementToken() throws IOException {
34 |
35 | if (input.incrementToken()) {
36 | CharSequence parserdData = parser.parse(termAtt.toString());
37 | termAtt.setEmpty();
38 | termAtt.append(parserdData);
39 |
40 | return true;
41 | }
42 |
43 | return false;
44 | }
45 |
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/jamo/JavacafeJamoTokenFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.jamo;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.elasticsearch.common.settings.Settings;
5 | import org.elasticsearch.env.Environment;
6 | import org.elasticsearch.index.IndexSettings;
7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
8 |
9 | public class JavacafeJamoTokenFilterFactory extends AbstractTokenFilterFactory {
10 |
11 |
12 | public JavacafeJamoTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
13 | super(indexSettings, name, settings);
14 | }
15 |
16 |
17 | @Override
18 | public TokenStream create(TokenStream stream) {
19 | return new JavacafeJamoTokenFilter(stream);
20 | }
21 |
22 |
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/kor2eng/JavacafeKor2EngConvertFilter.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.kor2eng;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.lucene.analysis.TokenFilter;
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
8 | import org.elasticsearch.index.common.converter.KorToEngConverter;
9 |
10 | /**
11 | * 한영 변환 필터
12 | *
13 | * @author hrkim
14 | *
15 | */
16 | public final class JavacafeKor2EngConvertFilter extends TokenFilter {
17 |
18 | private KorToEngConverter converter;
19 | private CharTermAttribute termAtt;
20 |
21 |
22 | public JavacafeKor2EngConvertFilter(TokenStream stream) {
23 | super(stream);
24 | this.converter = new KorToEngConverter();
25 | this.termAtt = addAttribute(CharTermAttribute.class);
26 | }
27 |
28 |
29 | @Override
30 | public boolean incrementToken() throws IOException {
31 |
32 | if (input.incrementToken()) {
33 | CharSequence parserdData = converter.convert(termAtt.toString());
34 | termAtt.setEmpty();
35 | termAtt.append(parserdData);
36 |
37 | return true;
38 | }
39 |
40 | return false;
41 | }
42 |
43 |
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/kor2eng/JavacafeKor2EngConvertFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.kor2eng;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.elasticsearch.common.settings.Settings;
5 | import org.elasticsearch.env.Environment;
6 | import org.elasticsearch.index.IndexSettings;
7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
8 |
9 |
10 | public class JavacafeKor2EngConvertFilterFactory extends AbstractTokenFilterFactory {
11 |
12 |
13 | public JavacafeKor2EngConvertFilterFactory(IndexSettings indexSettings, Environment env , String name, Settings settings) {
14 | super(indexSettings, name, settings);
15 | }
16 |
17 |
18 | @Override
19 | public TokenStream create(TokenStream tokenStream) {
20 | return new JavacafeKor2EngConvertFilter(tokenStream);
21 | }
22 |
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/spell/JavacafeSpellFilter.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.spell;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.lucene.analysis.TokenFilter;
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
8 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
9 |
10 | /**
11 | * 스펠링 체크 필터
12 | *
13 | * @author hrkim
14 | *
15 | */
16 | public final class JavacafeSpellFilter extends TokenFilter {
17 |
18 | private KoreanJamoParser parser;
19 | private CharTermAttribute termAtt;
20 |
21 |
22 | public JavacafeSpellFilter(TokenStream stream) {
23 | super(stream);
24 | this.parser = new KoreanJamoParser();
25 | this.termAtt = addAttribute(CharTermAttribute.class);
26 | }
27 |
28 |
29 | /**
30 | * 한글 자모 Parser를 이용하여 토큰을 파싱하고 Term을 구한다.
31 | */
32 | @Override
33 | public boolean incrementToken() throws IOException {
34 |
35 | if (input.incrementToken()) {
36 | CharSequence parserdData = parser.parse(termAtt.toString());
37 | termAtt.setEmpty();
38 | termAtt.append(parserdData);
39 |
40 | return true;
41 | }
42 |
43 | return false;
44 | }
45 |
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/spell/JavacafeSpellFilterFactory.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.analysis.spell;
2 |
3 | import org.apache.lucene.analysis.TokenStream;
4 | import org.elasticsearch.common.settings.Settings;
5 | import org.elasticsearch.env.Environment;
6 | import org.elasticsearch.index.IndexSettings;
7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
8 |
9 | public class JavacafeSpellFilterFactory extends AbstractTokenFilterFactory {
10 |
11 |
12 | public JavacafeSpellFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
13 | super(indexSettings, name, settings);
14 | }
15 |
16 |
17 | @Override
18 | public TokenStream create(TokenStream stream) {
19 | return new JavacafeSpellFilter(stream);
20 | }
21 |
22 |
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/converter/EngToKorConverter.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.converter;
2 |
3 | import java.util.Map;
4 |
5 | import org.elasticsearch.index.common.util.JamoUtil;
6 | import org.elasticsearch.index.common.util.KeyboardUtil;
7 |
8 | /**
9 | * 영한 오타 변환기 (Eng -> Kor)
10 | *
11 | * @author hrkim
12 | *
13 | */
14 | public class EngToKorConverter {
15 |
16 |
17 | /**
18 | * 토큰을 영문 키보드 기준으로 변환한다.
19 | *
20 | * @param token
21 | * @return
22 | */
23 | public String convert(String token) {
24 | StringBuilder sb = new StringBuilder();
25 |
26 | // 문자열을 한글자씩 잘라서 처리한다.
27 | String word = token.trim();
28 | for (int index = 0; index < word.length(); index++) {
29 |
30 | // 처리 불가능한 글자는 그냥 넘긴다.
31 | if (KeyboardUtil.IGNORE_CHAR.indexOf(word.substring(index, index + 1)) > -1) {
32 | sb.append(word.substring(index, index + 1));
33 | index++;
34 | }
35 | if (index >= word.length()) {
36 | break;
37 | }
38 |
39 | try {
40 | // 초성 정보를 구한다.
41 | Map mChoSung = KeyboardUtil.getInfoForChoSung(index, word);
42 | int cho = mChoSung.get("code");
43 | index = mChoSung.get("idx");
44 |
45 | // 중성 정보를 구한다.
46 | Map mJungSung = KeyboardUtil.getInfoForJungSung(index, word);
47 | int jung = mJungSung.get("code");
48 | index = mJungSung.get("idx");
49 |
50 | // 종성 정보를 구한다.
51 | Map mJongSung = KeyboardUtil.getInfoForJongSung(index, word);
52 | int jong = mJongSung.get("code");
53 | index = mJongSung.get("idx");
54 |
55 | // 한글 유니코드를 생성한다.
56 | sb.append((char) (JamoUtil.START_KOREA_UNICODE + cho + jung + jong));
57 |
58 | } catch(Exception e) {}
59 | }
60 |
61 | return sb.toString();
62 | }
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/converter/KorToEngConverter.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.converter;
2 |
3 | import org.apache.commons.lang3.StringUtils;
4 | import org.elasticsearch.index.common.type.CodeType;
5 | import org.elasticsearch.index.common.util.JamoUtil;
6 | import org.elasticsearch.index.common.util.KeyboardUtil;
7 |
8 | /**
9 | * 한영 오타 변환기 (Kor -> Eng)
10 | *
11 | * @author hrkim
12 | *
13 | */
14 | public class KorToEngConverter {
15 |
16 |
17 | /**
18 | * 토큰을 한글 키보드 기준으로 변환한다.
19 | *
20 | * @param token
21 | * @return
22 | */
23 | public String convert(String token) {
24 | StringBuilder sb = new StringBuilder();
25 |
26 | // 문자열을 한글자씩 잘라서 처리한다.
27 | String word = token.trim();
28 | for (int index = 0; index < word.length(); index++) {
29 |
30 | // 처리 불가능한 글자는 그냥 넘긴다.
31 | if (KeyboardUtil.IGNORE_CHAR.indexOf(word.substring(index, index + 1)) > -1) {
32 | sb.append(word.substring(index, index + 1));
33 | index++;
34 | }
35 | if (index >= word.length()) {
36 | break;
37 | }
38 |
39 | try {
40 | int init = word.charAt(index);
41 | int initUnicode = init - JamoUtil.START_KOREA_UNICODE;
42 |
43 | if (initUnicode > 0) {
44 | /**
45 | * 1글자로 조합형 한글이 들어올 경우 처리
46 | */
47 | int cho = initUnicode / 21 / 28; // 0 ~ 18
48 | String strCho = getSameEngChar(CodeType.CHOSUNG, cho);
49 | if (StringUtils.isNotEmpty(strCho)) {
50 | sb.append(strCho);
51 | }
52 |
53 |
54 | int jung = initUnicode / 28 % 21; // 0 ~ 20
55 | String strJung = getSameEngChar(CodeType.JUNGSUNG, jung);
56 | if (StringUtils.isNotEmpty(strJung)) {
57 | sb.append(strJung);
58 | }
59 |
60 | int jong = initUnicode % 28; // 0 ~ 27
61 | String strJong = getSameEngChar(CodeType.JONGSUNG, jong);
62 | if (StringUtils.isNotEmpty(strJong)) {
63 | sb.append(strJong);
64 | }
65 |
66 | } else {
67 | /**
68 | * 1글자로 자모가 들어올 경우 처리
69 | */
70 | String subStr = String.valueOf((char) init);
71 | sb.append(getSameEngCharForJamo(subStr, 0));
72 | }
73 | } catch(Exception e) {}
74 | }
75 |
76 | return sb.toString();
77 | }
78 |
79 |
80 |
81 |
82 | private String getSameEngChar(CodeType type, int pos) {
83 | switch (type) {
84 | case CHOSUNG:
85 | return KeyboardUtil.KEYBOARD_CHO_SUNG[pos];
86 |
87 | case JUNGSUNG:
88 | return KeyboardUtil.KEYBOARD_JUNG_SUNG[pos];
89 |
90 | case JONGSUNG:
91 | if ((pos - 1) > -1) {
92 | return KeyboardUtil.KEYBOARD_JONG_SUNG[pos - 1];
93 | }
94 | return "";
95 | }
96 |
97 | return "";
98 | }
99 |
100 |
101 | private String getSameEngCharForJamo(String key, int pos) {
102 | for (int i=0; i jamoList) throws Exception {
22 | String result = "";
23 |
24 | if (jamoList.size() == 0) {
25 | return "";
26 | }
27 |
28 | int jungSungSize = HangulUtil.JUNG_SUNG.length;
29 | int jongSungSize = HangulUtil.JONG_SUNG.length;
30 |
31 | int startIdx = 0;
32 | while (true) {
33 | if (startIdx >= jamoList.size()) {
34 | break;
35 | }
36 |
37 | // 자모 리스트에서 한글 한글자에 해당하는 사이즈를 구한다.
38 | int oneHangulJamoSize = HangulUtil.getOneHangulJamoSize(startIdx, jamoList);
39 | if (oneHangulJamoSize == -1) {
40 | throw new Exception("한글은 최소 2개 이상의 유니코드 조합으로 이루어져야 합니다.");
41 | }
42 |
43 | // 한글 유니코드가 시작되는 Decimal값을 구한다.
44 | int decimalCode = HangulUtil.START_KOREA_UNICODE_DECIMAL;
45 |
46 | // 초성에 해당하는 Decimal값을 더한다.
47 | int chosungIdx = HangulUtil.getChoSungIndex(startIdx, jamoList);
48 | if (chosungIdx >= 0) {
49 | decimalCode = decimalCode + (jongSungSize * jungSungSize * chosungIdx);
50 | }
51 |
52 | // 중성에 해당하는 Decimal값을 더한다.
53 | int jungsungIdx = HangulUtil.getJungSungIndex(startIdx, jamoList);
54 | if (jungsungIdx >= 0) {
55 | decimalCode = decimalCode + (jongSungSize * jungsungIdx);
56 | }
57 |
58 | // 종성에 해당하는 Decimal값을 더한다.
59 | if (oneHangulJamoSize > 2) {
60 | int jongsungIdx = HangulUtil.getJongSungIndex(startIdx, jamoList);
61 | if (jongsungIdx >= 0) {
62 | decimalCode = decimalCode + jongsungIdx;
63 | }
64 | }
65 |
66 | // Decimal값을 String으로 변환한다.
67 | String hangul = Character.toString((char)decimalCode);
68 | result = result + hangul;
69 |
70 | startIdx = startIdx + oneHangulJamoSize;
71 | }
72 |
73 | return result;
74 | }
75 |
76 |
77 |
78 | }
79 |
80 |
81 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/parser/AbstractKoreanParser.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.parser;
2 |
3 | import org.apache.commons.lang3.StringUtils;
4 | import org.elasticsearch.index.common.util.JamoUtil;
5 |
6 | /**
7 | * 한글 기본 Parser
8 | *
9 | * @author hrkim
10 | *
11 | */
12 | public abstract class AbstractKoreanParser {
13 |
14 |
15 | /**
16 | * 토큰을 자음과 모음으로 파싱한다.
17 | *
18 | * @param token
19 | * @return
20 | */
21 | public String parse(String token) {
22 | if (StringUtils.isBlank(token)) {
23 | return "";
24 | }
25 |
26 | StringBuilder result = new StringBuilder();
27 |
28 | // 토큰을 한글자씩 잘라서 처리한다.
29 | char[] arrCh = token.toCharArray();
30 | for(char ch : arrCh) {
31 |
32 | // 처리 할 char의 유니코드 인덱스를 구한다.
33 | char unicodeIndex = (char)(ch - JamoUtil.START_KOREA_UNICODE);
34 |
35 | // 한글 유니코드 범위 : 0xAC00 ~ 0xD7AF (11184개)
36 | // 한글 유니코드인지 검사한다.
37 | if(unicodeIndex >= 0 && unicodeIndex <= 11184) {
38 |
39 | // 초성 유니코드
40 | int idxChoSung = unicodeIndex / (28 * 21);
41 | char chosung = JamoUtil.UNICODE_CHO_SUNG[idxChoSung];
42 |
43 | // 중성 유니코드
44 | int idxJungSung = unicodeIndex % (28 * 21) / 28;
45 | char jungsung = JamoUtil.UNICODE_JUNG_SUNG[idxJungSung];
46 |
47 | // 종성 유니코드
48 | int idxJongSung = unicodeIndex % (28 * 21) % 28;
49 | char jongsung = JamoUtil.UNICODE_JONG_SUNG[idxJongSung];
50 |
51 | // 한글 한글자를 처리한다.
52 | processForKoreanChar(result, chosung, jungsung, jongsung);
53 |
54 | } else {
55 |
56 | // 한글이 아닌 한글자를 처리한다.
57 | processForOther(result, ch);
58 | }
59 | }
60 |
61 | // 토큰을 분석한 최종 결과를 리턴한다.
62 | return result.toString();
63 | }
64 |
65 |
66 | /**
67 | * 한글 문자를 처리한다.
68 | *
69 | * @param sb
70 | * @param chosung
71 | * @param jungsung
72 | * @param jongsung
73 | */
74 | protected abstract void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung);
75 |
76 |
77 | /**
78 | * 한글 문자를 제외한 일반 문자를 처리한다.
79 | *
80 | * @param sb
81 | * @param eachToken
82 | */
83 | protected abstract void processForOther(StringBuilder sb, char eachToken);
84 |
85 |
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/parser/KoreanChosungParser.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.parser;
2 |
3 | /**
4 | * 한글 초성 Parser
5 | *
6 | * @author hrkim
7 | *
8 | */
9 | public class KoreanChosungParser extends AbstractKoreanParser {
10 |
11 |
12 | @Override
13 | protected void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung) {
14 | sb.append(chosung);
15 | }
16 |
17 |
18 |
19 | @Override
20 | protected void processForOther(StringBuilder sb, char eachToken) {
21 | sb.append(eachToken);
22 | }
23 |
24 |
25 |
26 | }
27 |
28 |
29 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/parser/KoreanJamoParser.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.parser;
2 |
3 | import org.elasticsearch.index.common.util.JamoUtil;
4 |
5 | /**
6 | * 한글 자모 Parser
7 | *
8 | * @author hrkim
9 | *
10 | */
11 | public class KoreanJamoParser extends AbstractKoreanParser {
12 |
13 |
14 |
15 |
16 | @Override
17 | protected void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung) {
18 | sb.append(chosung).append(jungsung);
19 |
20 | if(jongsung != JamoUtil.UNICODE_JONG_SUNG_EMPTY) {
21 | sb.append(jongsung);
22 | }
23 | }
24 |
25 |
26 |
27 | @Override
28 | protected void processForOther(StringBuilder sb, char eachToken) {
29 | sb.append(eachToken);
30 | }
31 |
32 |
33 |
34 | }
35 |
36 |
37 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/type/CodeType.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.type;
2 |
3 | /**
4 | * 한글 구성요소 기본 타입
5 | *
6 | * @author hrkim
7 | *
8 | */
9 | public enum CodeType {
10 |
11 | /**
12 | * 초성
13 | */
14 | CHOSUNG,
15 |
16 | /**
17 | * 중성
18 | */
19 | JUNGSUNG,
20 |
21 | /**
22 | * 종성
23 | */
24 | JONGSUNG
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/util/HangulUtil.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.util;
2 |
3 | import java.util.List;
4 |
5 | /**
6 | * 한글 유니코드 유틸리티
7 | *
8 | * http://www.unicode.org/charts/PDF/UAC00.pdf
9 | *
10 | * @author hrkim
11 | *
12 | */
13 | public class HangulUtil {
14 |
15 |
16 | /**
17 | * 초성 (19자)
18 | */
19 | public static final char[] CHO_SUNG = {
20 | 'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ',
21 | 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
22 | };
23 |
24 |
25 | /**
26 | * 중성 (21자)
27 | */
28 | public static final char[] JUNG_SUNG = {
29 | 'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ',
30 | 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ'
31 | };
32 |
33 |
34 | /**
35 | * 종성 (28자) - "빈값" 포함
36 | */
37 | public static final char[] JONG_SUNG = {
38 | ' ', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ',
39 | 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ',
40 | 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
41 | };
42 |
43 |
44 | /**
45 | * 한글 유니코드의 시작값 (가)
46 | *
47 | * 16진수 : 0xAC00
48 | * 10진수 : 44032
49 | *
50 | */
51 | public static final int START_KOREA_UNICODE_DECIMAL = 44032;
52 |
53 |
54 |
55 |
56 |
57 |
58 | public static int getOneHangulJamoSize(int startIdx, List jamoList) {
59 | int remainJamoSize = jamoList.size() - startIdx;
60 |
61 | if (remainJamoSize == 1) {
62 | return -1;
63 | }
64 |
65 | if (remainJamoSize == 2 || remainJamoSize == 3) {
66 | return remainJamoSize;
67 | }
68 |
69 | // 초성이나 종성은 겹치는 문자가 존재하기 때문에
70 | // 다음 글자의 중성을 이용하여 한글자의 사이즈를 검사한다.
71 | String strJungSung = new String(JUNG_SUNG);
72 | String strChar = jamoList.get(startIdx + 3);
73 |
74 | if (strJungSung.contains(strChar)) {
75 | return 2;
76 | }
77 |
78 | return 3;
79 | }
80 |
81 |
82 | public static int getChoSungIndex(int startIdx, List jamoList) {
83 | String strChoSung = new String(CHO_SUNG);
84 | String strChoSungChar = jamoList.get(startIdx);
85 |
86 | return strChoSung.indexOf(strChoSungChar);
87 | }
88 |
89 |
90 | public static int getJungSungIndex(int startIdx, List jamoList) {
91 | String strJungSung = new String(HangulUtil.JUNG_SUNG);
92 | String strJungSungChar = jamoList.get(startIdx + 1);
93 |
94 | return strJungSung.indexOf(strJungSungChar);
95 | }
96 |
97 |
98 | public static int getJongSungIndex(int startIdx, List jamoList) {
99 | String strJongSung = new String(HangulUtil.JONG_SUNG);
100 | String strJongSungChar = jamoList.get(startIdx + 2);
101 |
102 | return strJongSung.indexOf(strJongSungChar);
103 | }
104 |
105 |
106 |
107 | }
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/util/JamoUtil.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.util;
2 |
3 | /**
4 | * 한글 자모 유니코드 유틸리티
5 | *
6 | * https://www.unicode.org/charts/PDF/U1100.pdf
7 | *
8 | * @author hrkim
9 | *
10 | */
11 | public class JamoUtil {
12 |
13 |
14 | /**
15 | * 초성 (19자)
16 | *
17 | * 초성으로 올 수 있는 유니코드들
18 | * 총 19자로 구성된다.
19 | *
20 | * ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ ㅃ ㅅ
21 | * ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ ㅌ ㅍ ㅎ
22 | *
23 | */
24 | public static final char[] UNICODE_CHO_SUNG = {
25 | 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 0x3143, 0x3145,
26 | 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B, 0x314C, 0x314D, 0x314E
27 | };
28 |
29 |
30 | /**
31 | * 중성 (21자)
32 | *
33 | * 중성으로 올 수 있는 유니코드들
34 | * 총 21자로 구성된다.
35 | *
36 | * ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ ㅗ ㅘ
37 | * ㅙ ㅚ ㅛ ㅜ ㅝ ㅞ ㅟ ㅠ ㅡ ㅢ
38 | * ㅣ
39 | *
40 | */
41 | public static final char[] UNICODE_JUNG_SUNG = {
42 | 0x314F, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x3158,
43 | 0x3159, 0x315A, 0x315B, 0x315C, 0x315D, 0x315E, 0x315F, 0x3160, 0x3161, 0x3162,
44 | 0x3163
45 | };
46 |
47 |
48 | /**
49 | * 종성 (28자)
50 | *
51 | * 종성으로 올 수 있는 유니코드들
52 | * 기본 27자와 "빈값"을 표현하는 1자를 합쳐서 총 28자로 구성된다.
53 | *
54 | * 빈값 ㄱ ㄲ ㄳ ㄴ ㄵ ㄶ ㄷ ㄹ ㄺ
55 | * ㄻ ㄼ ㄽ ㄾ ㄿ ㅀ ㅁ ㅂ ㅄ ㅅ
56 | * ㅆ ㅇ ㅈ ㅊ ㅋ ㅌ ㅍ ㅎ
57 | *
58 | */
59 | public static final char[] UNICODE_JONG_SUNG = {
60 | 0x0000, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139, 0x313A,
61 | 0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141, 0x3142, 0x3144, 0x3145,
62 | 0x3146, 0x3147, 0x3148, 0x314A, 0x314B, 0x314C, 0x314D, 0x314E
63 | };
64 |
65 |
66 |
67 | /**
68 | * 한글 유니코드의 시작값 (가)
69 | *
70 | * 한글 유니코드는 0xAC00로 시작하여 0xD79F로 끝난다.
71 | * 시작값과 끝값을 벗어난 유니코드는 한글이 아니다.
72 | *
73 | * 시작값 : 0xAC00 가
74 | * 끝값 : 0xD79F 힟
75 | */
76 | public static final char START_KOREA_UNICODE = 0xAC00;
77 |
78 |
79 |
80 | /**
81 | * 종성 빈값 유니코드
82 | */
83 | public static final char UNICODE_JONG_SUNG_EMPTY = 0x0000;
84 |
85 |
86 |
87 |
88 | }
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/util/KeyboardUtil.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.index.common.util;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 |
6 | import org.elasticsearch.index.common.type.CodeType;
7 |
8 | /**
9 | * 한글 키보드 유틸리티
10 | *
11 | *
12 | * @author hrkim
13 | *
14 | */
15 | public class KeyboardUtil {
16 |
17 |
18 | /**
19 | * Converter 진행시 무시되는 문자들
20 | */
21 | public static final String IGNORE_CHAR = "`1234567890-=[]\\;',./~!@#$%^&*()_+{}|:\"<>?\' \' ";
22 |
23 |
24 |
25 | /**
26 | * 초성 키에 해당하는 키보드상의 영문자 (19자)
27 | */
28 | public static final String[] KEYBOARD_CHO_SUNG = {
29 | "r", "R", "s", "e", "E", "f", "a", "q", "Q", "t",
30 | "T", "d", "w", "W", "c", "z", "x", "v", "g"
31 | };
32 |
33 | /**
34 | * 중성 키에 해당하는 키보스상의 영문자 (21자)
35 | */
36 | public static final String[] KEYBOARD_JUNG_SUNG = {
37 | "k", "o", "i", "O", "j", "p", "u", "P", "h", "hk",
38 | "ho", "hl", "y", "n", "nj", "np", "nl", "b", "m", "ml", "l"
39 | };
40 |
41 | /**
42 | * 종성 키에 해당하는 키보드상의 영문자 (27자) - "빈값" 제외
43 | */
44 | public static final String[] KEYBOARD_JONG_SUNG = {
45 | "r", "R", "rt", "s", "sw", "sg", "e", "f", "fr", "fa",
46 | "fq", "ft", "fx", "fv", "fg", "a", "q", "qt", "t", "T",
47 | "d", "w", "c", "z", "x", "v", "g"
48 | };
49 |
50 |
51 |
52 | /**
53 | * 키보드상에서 한영키에 의해서 오타 교정이 필요한 키배열 (영문키 33자)
54 | */
55 | public static final String[] KEYBOARD_KEY_ENG = {
56 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
57 | "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
58 | "u", "v", "w", "x", "y", "z", "Q", "W", "E", "R",
59 | "T", "O", "P"
60 | };
61 |
62 | /**
63 | * 키보드상에서 한영키에 의해서 오타 교정이 필요한 키배열 (한글키 33자)
64 | */
65 | public static final String[] KEYBOARD_KEY_KOR = {
66 | "ㅁ", "ㅠ", "ㅊ", "ㅇ", "ㄷ", "ㄹ", "ㅎ", "ㅗ", "ㅑ", "ㅓ",
67 | "ㅏ", "ㅣ", "ㅡ", "ㅜ", "ㅐ", "ㅔ", "ㅂ", "ㄱ", "ㄴ", "ㅅ",
68 | "ㅕ", "ㅍ", "ㅈ", "ㅌ", "ㅛ", "ㅋ", "ㅃ", "ㅉ", "ㄸ", "ㄲ",
69 | "ㅆ", "ㅒ", "ㅖ"
70 | };
71 |
72 |
73 |
74 |
75 |
76 | /**
77 | * 초성 정보를 제공한다.
78 | *
79 | * - 초성과 매칭된 코드 조회
80 | * - 한 자로 이루어진 초성코드만 존재한다.
81 | *
82 | * @param index
83 | * @param word
84 | * @return
85 | */
86 | public static Map getInfoForChoSung(int index, String word) {
87 | int code = KeyboardUtil.makeUnicodeIndex(CodeType.CHOSUNG, word.substring(index, index + 1));
88 | int idx = index + 1;
89 |
90 | Map m = new HashMap<>();
91 | m.put("code", code);
92 | m.put("idx", idx);
93 |
94 | return m;
95 | }
96 |
97 |
98 | /**
99 | * 중성 정보를 제공한다.
100 | *
101 | * - 중성과 매칭된 코드 조회
102 | * - 두 자로 이루어진 중성코드가 존재한다.
103 | *
104 | * @param index
105 | * @param word
106 | * @return
107 | */
108 | public static Map getInfoForJungSung(int index, String word) {
109 | int code = KeyboardUtil.getDoubleMedial(index, word);
110 | int idx = index + 2;
111 |
112 | if (-1 == code) {
113 | code = KeyboardUtil.getSingleMedial(index, word);
114 | idx = index + 1;
115 | }
116 |
117 | Map m = new HashMap<>();
118 | m.put("code", code);
119 | m.put("idx", idx);
120 |
121 | return m;
122 | }
123 |
124 |
125 | /**
126 | * 종성 정보를 제공한다.
127 | *
128 | * - 종성과 매칭된 코드 조회
129 | * - 두 자로 이루어진 종성코드가 존재한다.
130 | *
131 | * @param index
132 | * @param word
133 | * @return
134 | */
135 | public static Map getInfoForJongSung(int index, String word) {
136 | int code;
137 | int idx = index;
138 |
139 | int temp = KeyboardUtil.getDoubleFinal(idx, word);
140 | if (-1 == temp) {
141 | temp = KeyboardUtil.getSingleMedial(idx + 1, word);
142 | if (temp != -1) {
143 | code = 0;
144 | idx--;
145 | } else {
146 | code = KeyboardUtil.getSingleFinal(idx, word);
147 | if (code == -1) {
148 | code = 0;
149 | idx--;
150 | }
151 | }
152 |
153 | } else {
154 | code = temp;
155 | temp = KeyboardUtil.getSingleMedial(idx + 2, word);
156 | if (temp != -1) {
157 | code = KeyboardUtil.getSingleFinal(idx, word);
158 | } else {
159 | idx++;
160 | }
161 |
162 | }
163 |
164 | Map m = new HashMap<>();
165 | m.put("code", code);
166 | m.put("idx", idx);
167 |
168 | return m;
169 | }
170 |
171 |
172 |
173 |
174 | /**
175 | * 1자로 구성된 중성 유니코드 Index를 리턴한다.
176 | *
177 | * @param index
178 | * @param word
179 | * @return
180 | */
181 | private static int getSingleMedial(int index, String word) {
182 | if ((index + 1) <= word.length()) {
183 | return makeUnicodeIndex(CodeType.JUNGSUNG, word.substring(index, index+1));
184 | } else {
185 | return -1;
186 | }
187 | }
188 |
189 | /**
190 | * 2자로 구성된 중성 유니코드 Index를 리턴한다.
191 | *
192 | * @param index
193 | * @param word
194 | * @return
195 | */
196 | private static int getDoubleMedial(int index, String word) {
197 | if ((index + 2) > word.length()) {
198 | return -1;
199 | } else {
200 | return makeUnicodeIndex(CodeType.JUNGSUNG, word.substring(index, index+2));
201 | }
202 | }
203 |
204 | /**
205 | * 1자로 구성된 종성 유니코드 Index를 리턴한다.
206 | *
207 | * @param index
208 | * @param word
209 | * @return
210 | */
211 | private static int getSingleFinal(int index, String word) {
212 | if ((index + 1) <= word.length()) {
213 | return makeUnicodeIndex(CodeType.JONGSUNG, word.substring(index, index+1));
214 | } else {
215 | return -1;
216 | }
217 | }
218 |
219 | /**
220 | * 2자로 구성된 종성 유니코드 Index를 리턴한다.
221 | *
222 | * @param index
223 | * @param word
224 | * @return
225 | */
226 | private static int getDoubleFinal(int index, String word) {
227 | if ((index + 2) > word.length()) {
228 | return -1;
229 | } else {
230 | return makeUnicodeIndex(CodeType.JONGSUNG, word.substring(index, index+2));
231 | }
232 | }
233 |
234 |
235 | /**
236 | * 키보드상에 매칭된 유니코드값 Index를 리턴한다.
237 | *
238 | * @param type
239 | * @param sub_str
240 | * @return
241 | */
242 | private static int makeUnicodeIndex(CodeType type, String subStr) {
243 | switch (type) {
244 | case CHOSUNG:
245 | for (int i=0; i> getTokenFilters() {
26 | Map> extra = new HashMap<>();
27 |
28 | // (1) 한글 자모 분석 필터
29 | extra.put("javacafe_jamo", JavacafeJamoTokenFilterFactory::new);
30 |
31 | // (2) 한글 초성 분석 필터
32 | extra.put("javacafe_chosung", JavacafeChosungTokenFilterFactory::new);
33 |
34 | // (3) 영한 오타 변환 필터
35 | extra.put("javacafe_eng2kor", JavacafeEng2KorConvertFilterFactory::new);
36 |
37 | // (4) 한영 오타 변환 필터
38 | extra.put("javacafe_kor2eng", JavacafeKor2EngConvertFilterFactory::new);
39 |
40 | // (5) 한글 스펠링 체크 필터
41 | extra.put("javacafe_spell", JavacafeSpellFilterFactory::new);
42 |
43 | return extra;
44 | }
45 |
46 | }
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/src/main/resources/plugin-descriptor.properties:
--------------------------------------------------------------------------------
1 | name=${project.artifactId}
2 | description=Elasticsearch Javacafe Plugin.
3 | version=${project.version}
4 | classname=org.elasticsearch.plugin.analysis.JavacafePlugin
5 | elasticsearch.version=${elasticsearch.version}
6 | java.version=1.8
7 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/AbstractPluginTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.esTest;
2 |
3 | import java.io.IOException;
4 | import java.io.StringReader;
5 |
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.apache.lucene.analysis.Tokenizer;
8 | import org.apache.lucene.analysis.standard.StandardTokenizer;
9 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
10 | import org.elasticsearch.index.analysis.TokenFilterFactory;
11 | import org.elasticsearch.test.ESTestCase;
12 |
13 |
14 | public class AbstractPluginTest extends ESTestCase {
15 |
16 |
17 | public void runFilter(TokenFilterFactory myFilter, String source, String[] result) throws IOException {
18 | init();
19 |
20 | // StandardTokenizer 생성
21 | Tokenizer tokenizer = new StandardTokenizer();
22 | tokenizer.setReader(new StringReader(source));
23 |
24 |
25 | // 필터를 이용하여 tokenStream 생성
26 | TokenStream tokenStream = myFilter.create(tokenizer);
27 | tokenStream.reset();
28 |
29 | CharTermAttribute termAttr = tokenStream.getAttribute(CharTermAttribute.class);
30 |
31 |
32 | // 테스트 시작
33 | System.out.println("[소스] : " + source);
34 |
35 | int i = 0;
36 | while (tokenStream.incrementToken()) {
37 | String t = termAttr.toString();
38 |
39 | System.out.println("Token[" + i + "] => [예상결과] : " + result[i] + " , [실제결과] : " + t);
40 | //assertThat("Token 생성이 잘못되었습니다.", t, equalTo(result[i]));
41 |
42 | i++;
43 | }
44 |
45 | System.out.println("[결과] 생성된 Token 수 : " + i);
46 | //assertThat("Token 수가 일치하지 않습니다.", i, equalTo(result.length));
47 |
48 | destroy();
49 | }
50 |
51 |
52 | public void init() {
53 | System.out.println("-------------------------------");
54 | System.out.println("테스트를 시작합니다.");
55 | System.out.println("-------------------------------");
56 | }
57 |
58 |
59 | public void destroy() {
60 | System.out.println("-------------------------------");
61 | System.out.println("테스트를 종료합니다.");
62 | System.out.println("-------------------------------");
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeChosungTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.esTest;
2 |
3 | import java.io.IOException;
4 |
5 | import org.elasticsearch.common.settings.Settings;
6 | import org.elasticsearch.index.Index;
7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
9 |
10 |
11 | public class JavacafeChosungTest extends AbstractPluginTest {
12 |
13 |
14 | /**
15 | * 초성 필터를 테스트한다.
16 | *
17 | * @throws IOException
18 | */
19 | public void testChosungFilter() throws Exception {
20 |
21 | String source = "자바카페 한글";
22 |
23 | String[] result = new String[]{
24 | "ㅈㅂㅋㅍ",
25 | "ㅎㄱ"
26 | };
27 |
28 | String filterName = "javacafe_chosung";
29 |
30 |
31 | // 실행
32 | TestAnalysis analysis = createTestAnalysis(
33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 | );
35 |
36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 | runFilter(myFilter, source, result);
38 | }
39 |
40 |
41 |
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeEng2KorTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.esTest;
2 |
3 | import java.io.IOException;
4 |
5 | import org.elasticsearch.common.settings.Settings;
6 | import org.elasticsearch.index.Index;
7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
9 |
10 |
11 | public class JavacafeEng2KorTest extends AbstractPluginTest {
12 |
13 |
14 | /**
15 | * 영한 오타 변환기를 테스트한다.
16 | *
17 | * @throws IOException
18 | */
19 | public void test() throws Exception {
20 |
21 | String source = "wkqkzkvp gksrmf";
22 |
23 | String[] result = new String[]{
24 | "자바카페",
25 | "한글"
26 | };
27 |
28 | String filterName = "javacafe_eng2kor";
29 |
30 |
31 | // 실행
32 | TestAnalysis analysis = createTestAnalysis(
33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 | );
35 |
36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 | runFilter(myFilter, source, result);
38 | }
39 |
40 |
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeJamoTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.esTest;
2 |
3 | import java.io.IOException;
4 |
5 | import org.elasticsearch.common.settings.Settings;
6 | import org.elasticsearch.index.Index;
7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
9 |
10 |
11 | public class JavacafeJamoTest extends AbstractPluginTest {
12 |
13 |
14 | /**
15 | * 자모 필터를 테스트한다.
16 | *
17 | * @throws IOException
18 | */
19 | public void testJamoFilter() throws Exception {
20 |
21 | String source = "자바카페 한글";
22 |
23 | String[] result = new String[]{
24 | "ㅈㅏㅂㅏㅋㅏㅍㅔ",
25 | "ㅎㅏㄴㄱㅡㄹ"
26 | };
27 |
28 | String filterName = "javacafe_jamo";
29 |
30 |
31 | // 실행
32 | TestAnalysis analysis = createTestAnalysis(
33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 | );
35 |
36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 | runFilter(myFilter, source, result);
38 | }
39 |
40 |
41 |
42 |
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeKor2EngTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.esTest;
2 |
3 | import java.io.IOException;
4 |
5 | import org.elasticsearch.common.settings.Settings;
6 | import org.elasticsearch.index.Index;
7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
9 |
10 |
11 | public class JavacafeKor2EngTest extends AbstractPluginTest {
12 |
13 |
14 | /**
15 | * 한영 오타 변환기를 테스트한다.
16 | *
17 | * @throws IOException
18 | */
19 | public void test() throws Exception {
20 |
21 | String source = "ㅓㅁㅍㅁㅊㅁㄹㄷ ㅑㅔㅗㅐㅜㄷ";
22 |
23 | String[] result = new String[]{
24 | "javacafe",
25 | "iphone"
26 | };
27 |
28 | String filterName = "javacafe_kor2eng";
29 |
30 |
31 | // 실행
32 | TestAnalysis analysis = createTestAnalysis(
33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 | );
35 |
36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 | runFilter(myFilter, source, result);
38 | }
39 |
40 |
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeSpellTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.esTest;
2 |
3 | import org.elasticsearch.common.settings.Settings;
4 | import org.elasticsearch.index.Index;
5 | import org.elasticsearch.index.analysis.TokenFilterFactory;
6 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
7 |
8 |
9 | public class JavacafeSpellTest extends AbstractPluginTest {
10 |
11 |
12 | public void test1() throws Exception {
13 |
14 | String source = "자바카페 한글";
15 |
16 | String[] result = new String[]{
17 | "ㅈㅏㅂㅏㅋㅏㅍㅔ",
18 | "ㅎㅏㄴㄱㅡㄹ"
19 | };
20 |
21 | String filterName = "javacafe_spell";
22 |
23 |
24 | // 실행
25 | TestAnalysis analysis = createTestAnalysis(
26 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
27 | );
28 |
29 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
30 | runFilter(myFilter, source, result);
31 | }
32 |
33 |
34 | public void test2() throws Exception {
35 |
36 | String source = "삼성전자";
37 |
38 | String[] result = new String[] {
39 | "",
40 | "ㅅㅏㅁㅅㅓㅇㅈㅓㄴㅈㅏ"
41 | };
42 |
43 | String filterName = "javacafe_spell";
44 |
45 |
46 | // 실행
47 | TestAnalysis analysis = createTestAnalysis(
48 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
49 | );
50 |
51 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
52 | runFilter(myFilter, source, result);
53 | }
54 |
55 |
56 |
57 |
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/luceneTest/TokenTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.luceneTest;
2 |
3 | import static org.junit.Assert.assertTrue;
4 |
5 | import java.io.IOException;
6 | import java.io.Reader;
7 | import java.io.StringReader;
8 |
9 | import org.apache.lucene.analysis.TokenStream;
10 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
11 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
12 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
13 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
14 | import org.junit.Test;
15 |
16 | public class TokenTest {
17 |
18 |
19 | String text =
20 | "Every mammal on this planet instinctively develops a natural " +
21 | "equilibrium with the surrounding environment; " +
22 | "but you humans do not. Instead you multiply, " +
23 | "and multiply, until every resource is consumed." +
24 |
25 | "The only way for you to survive is to spread to another area. " +
26 |
27 | "There is another organism on this planet that follows the same pattern... a virus.";
28 |
29 | String fieldName = "content";
30 |
31 |
32 | @Test
33 | public void test() throws IOException {
34 |
35 | Reader textReader = new StringReader(text);
36 |
37 | // 필드명과 텍스트 값을 위한 TokenStream 생성
38 | StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
39 | TokenStream tokenStream = standardAnalyzer.tokenStream(fieldName, textReader);
40 |
41 | CharTermAttribute terms = tokenStream.addAttribute(CharTermAttribute.class);
42 | OffsetAttribute offsets = tokenStream.addAttribute(OffsetAttribute.class);
43 | PositionIncrementAttribute positions = tokenStream.addAttribute(PositionIncrementAttribute.class);
44 |
45 | System.out.println("INCR\t(START,\tEND)\tTERM");
46 | System.out.println();
47 |
48 | tokenStream.reset();
49 | while (tokenStream.incrementToken()) {
50 |
51 | // 다음 토큰을 읽을때마다 attribute 값이 새롭게 세팅되어 제공된다.
52 | String term = terms.toString();
53 |
54 | int increment = positions.getPositionIncrement();
55 |
56 | int start = offsets.startOffset();
57 | int end = offsets.endOffset();
58 |
59 | System.out.print(increment + "\t" + "(" + start + ",\t" + end + ")\t" + term);
60 | System.out.println();
61 | }
62 |
63 | standardAnalyzer.close();
64 |
65 |
66 | assertTrue(true);
67 | }
68 |
69 |
70 |
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ConverterE2KTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.utilTest;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import org.elasticsearch.index.common.converter.EngToKorConverter;
6 | import org.junit.Test;
7 |
8 | public class ConverterE2KTest {
9 |
10 |
11 | @Test
12 | public void test1() {
13 | String token = "wkqkzkvp";
14 |
15 | EngToKorConverter convert = new EngToKorConverter();
16 | String result = convert.convert(token);
17 |
18 | System.out.println(result);
19 | assertEquals("자바카페", result);
20 | }
21 |
22 |
23 | @Test
24 | public void test2() {
25 | String token = "tkatjdwjswk";
26 |
27 | EngToKorConverter convert = new EngToKorConverter();
28 | String result = convert.convert(token);
29 |
30 | System.out.println(result);
31 | assertEquals("삼성전자", result);
32 | }
33 |
34 |
35 | @Test
36 | public void test3() {
37 | String token = "gksrmf";
38 |
39 | EngToKorConverter convert = new EngToKorConverter();
40 | String result = convert.convert(token);
41 |
42 | System.out.println(result);
43 | assertEquals("한글", result);
44 | }
45 |
46 |
47 | @Test
48 | public void test4() {
49 | String token = "gksrmf1";
50 |
51 | EngToKorConverter convert = new EngToKorConverter();
52 | String result = convert.convert(token);
53 |
54 | System.out.println(result);
55 | assertEquals("한글1", result);
56 | }
57 |
58 |
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ConverterK2ETest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.utilTest;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import org.elasticsearch.index.common.converter.KorToEngConverter;
6 | import org.junit.Test;
7 |
8 | public class ConverterK2ETest {
9 |
10 |
11 |
12 | @Test
13 | public void test1() {
14 | String token = "ㅓㅁㅍㅁㅊㅁㄹㄷ";
15 |
16 | KorToEngConverter convert = new KorToEngConverter();
17 | String result = convert.convert(token);
18 |
19 | System.out.println(result);
20 | assertEquals("javacafe", result);
21 | }
22 |
23 |
24 | @Test
25 | public void test2() {
26 | String token = "ㅑㅔㅗㅐㅜㄷ";
27 |
28 | KorToEngConverter convert = new KorToEngConverter();
29 | String result = convert.convert(token);
30 |
31 | System.out.println(result);
32 | assertEquals("iphone", result);
33 | }
34 |
35 |
36 | @Test
37 | public void test3() {
38 | String token = "재ㅡ무";
39 |
40 | KorToEngConverter convert = new KorToEngConverter();
41 | String result = convert.convert(token);
42 |
43 | System.out.println(result);
44 | assertEquals("woman", result);
45 | }
46 |
47 |
48 | @Test
49 | public void test4() {
50 | String token = "ㄴ므녀ㅜㅎ";
51 |
52 | KorToEngConverter convert = new KorToEngConverter();
53 | String result = convert.convert(token);
54 |
55 | System.out.println(result);
56 | assertEquals("samsung", result);
57 | }
58 |
59 |
60 | @Test
61 | public void test5() {
62 | String token = "ㄴ므녀ㅜㅎ1";
63 |
64 | KorToEngConverter convert = new KorToEngConverter();
65 | String result = convert.convert(token);
66 |
67 | System.out.println(result);
68 | assertEquals("samsung1", result);
69 | }
70 |
71 |
72 | @Test
73 | public void test6() {
74 | String token = "신혼여행(身魂旅行)";
75 |
76 | KorToEngConverter convert = new KorToEngConverter();
77 | String result = convert.convert(token);
78 |
79 | System.out.println(result);
80 | assertEquals("tlsghsdugod()", result);
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/MergerTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.utilTest;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import org.elasticsearch.index.common.merger.KoreanMerger;
9 | import org.junit.Test;
10 |
11 | public class MergerTest {
12 |
13 |
14 | @Test
15 | public void mergerTest() throws Exception {
16 | List jasoList = new ArrayList<>();
17 | jasoList.add("ㅎ");
18 | jasoList.add("ㅏ");
19 | jasoList.add("ㄴ");
20 | jasoList.add("ㄱ");
21 | jasoList.add("ㅡ");
22 | jasoList.add("ㄹ");
23 |
24 | KoreanMerger merger = new KoreanMerger();
25 | String word = merger.merge(jasoList);
26 |
27 | System.out.println("결과 : " + word);
28 | assertEquals("한글", word);
29 | }
30 |
31 |
32 | @Test
33 | public void mergerTest2() throws Exception {
34 | List jasoList = new ArrayList<>();
35 | jasoList.add("ㅈ");
36 | jasoList.add("ㅏ");
37 | jasoList.add("ㅂ");
38 | jasoList.add("ㅏ");
39 | jasoList.add("ㅋ");
40 | jasoList.add("ㅏ");
41 | jasoList.add("ㅍ");
42 | jasoList.add("ㅔ");
43 |
44 | KoreanMerger merger = new KoreanMerger();
45 | String word = merger.merge(jasoList);
46 |
47 | System.out.println("결과 : " + word);
48 | assertEquals("자바카페", word);
49 | }
50 |
51 |
52 | @Test
53 | public void mergerTest3() throws Exception {
54 | List jasoList = new ArrayList<>();
55 | jasoList.add("ㅅ");
56 | jasoList.add("ㅏ");
57 | jasoList.add("ㅁ");
58 | jasoList.add("ㅅ");
59 | jasoList.add("ㅓ");
60 | jasoList.add("ㅇ");
61 | jasoList.add("ㅈ");
62 | jasoList.add("ㅓ");
63 | jasoList.add("ㄴ");
64 | jasoList.add("ㅈ");
65 | jasoList.add("ㅏ");
66 |
67 | KoreanMerger merger = new KoreanMerger();
68 | String word = merger.merge(jasoList);
69 |
70 | System.out.println("결과 : " + word);
71 | assertEquals("삼성전자", word);
72 | }
73 |
74 | }
75 |
76 |
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ParserChosungTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.utilTest;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import org.elasticsearch.index.common.parser.KoreanChosungParser;
6 | import org.junit.Test;
7 |
8 | public class ParserChosungTest {
9 |
10 |
11 | @Test
12 | public void chosungTest() {
13 | String token = "자바카페";
14 | KoreanChosungParser parser = new KoreanChosungParser();
15 | String result = parser.parse(token);
16 |
17 | System.out.println(result);
18 | assertEquals("ㅈㅂㅋㅍ", result);
19 | }
20 |
21 |
22 | @Test
23 | public void chosungTest2() {
24 | String token = "삼성전자";
25 | KoreanChosungParser parser = new KoreanChosungParser();
26 | String result = parser.parse(token);
27 |
28 | System.out.println(result);
29 | assertEquals("ㅅㅅㅈㅈ", result);
30 | }
31 |
32 |
33 |
34 |
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ParserJamoTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.utilTest;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
6 | import org.junit.Test;
7 |
8 | public class ParserJamoTest {
9 |
10 |
11 | @Test
12 | public void jamoTest() {
13 | String token = "자바카페";
14 | KoreanJamoParser parser = new KoreanJamoParser();
15 | String result = parser.parse(token);
16 |
17 | System.out.println(result);
18 | assertEquals("ㅈㅏㅂㅏㅋㅏㅍㅔ", result);
19 | }
20 |
21 |
22 | @Test
23 | public void jamoTest2() {
24 | String token = "삼성전자";
25 | KoreanJamoParser parser = new KoreanJamoParser();
26 | String result = parser.parse(token);
27 |
28 | System.out.println(result);
29 | assertEquals("ㅅㅏㅁㅅㅓㅇㅈㅓㄴㅈㅏ", result);
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/SpellCheckTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.plugin.utilTest;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
6 | import org.junit.Test;
7 |
8 | public class SpellCheckTest {
9 |
10 |
11 | @Test
12 | public void spellTest() {
13 | String token = "자바카페";
14 | KoreanJamoParser parser = new KoreanJamoParser();
15 | String result = parser.parse(token);
16 |
17 | System.out.println(result);
18 | assertEquals("ㅈㅏㅂㅏㅋㅏㅍㅔ", result);
19 | }
20 |
21 |
22 |
23 | }
24 |
25 |
--------------------------------------------------------------------------------