├── .gitignore
├── README.md
├── pom.xml
├── release.xml
└── src
    ├── main
        ├── java
        │   └── org
        │   │   ├── apache
        │   │       └── solr
        │   │       │   └── index
        │   │       │       └── analysis
        │   │       │           ├── chosung
        │   │       │               └── JavacafeChosungTokenFilterFactory.java
        │   │       │           ├── eng2kor
        │   │       │               └── JavacafeEng2KorConvertFilterFactory.java
        │   │       │           ├── jamo
        │   │       │               └── JavacafeJamoTokenFilterFactory.java
        │   │       │           └── kor2eng
        │   │       │               └── JavacafeKor2EngConvertFilterFactory.java
        │   │   └── elasticsearch
        │   │       ├── index
        │   │           ├── analysis
        │   │           │   ├── chosung
        │   │           │   │   ├── JavacafeChosungTokenFilter.java
        │   │           │   │   └── JavacafeChosungTokenFilterFactory.java
        │   │           │   ├── eng2kor
        │   │           │   │   ├── JavacafeEng2KorConvertFilter.java
        │   │           │   │   ├── JavacafeEng2KorConvertFilter2.java
        │   │           │   │   ├── JavacafeEng2KorConvertFilter3.java
        │   │           │   │   └── JavacafeEng2KorConvertFilterFactory.java
        │   │           │   ├── jamo
        │   │           │   │   ├── JavacafeJamoTokenFilter.java
        │   │           │   │   └── JavacafeJamoTokenFilterFactory.java
        │   │           │   ├── kor2eng
        │   │           │   │   ├── JavacafeKor2EngConvertFilter.java
        │   │           │   │   └── JavacafeKor2EngConvertFilterFactory.java
        │   │           │   └── spell
        │   │           │   │   ├── JavacafeSpellFilter.java
        │   │           │   │   └── JavacafeSpellFilterFactory.java
        │   │           └── common
        │   │           │   ├── converter
        │   │           │       ├── EngToKorConverter.java
        │   │           │       └── KorToEngConverter.java
        │   │           │   ├── merger
        │   │           │       └── KoreanMerger.java
        │   │           │   ├── parser
        │   │           │       ├── AbstractKoreanParser.java
        │   │           │       ├── KoreanChosungParser.java
        │   │           │       └── KoreanJamoParser.java
        │   │           │   ├── type
        │   │           │       └── CodeType.java
        │   │           │   └── util
        │   │           │       ├── HangulUtil.java
        │   │           │       ├── JamoUtil.java
        │   │           │       └── KeyboardUtil.java
        │   │       └── plugin
        │   │           └── analysis
        │   │               └── JavacafePlugin.java
        └── resources
        │   └── plugin-descriptor.properties
    └── test
        └── java
            └── org
                └── elasticsearch
                    └── plugin
                        ├── esTest
                            ├── AbstractPluginTest.java
                            ├── JavacafeChosungTest.java
                            ├── JavacafeEng2KorTest.java
                            ├── JavacafeJamoTest.java
                            ├── JavacafeKor2EngTest.java
                            └── JavacafeSpellTest.java
                        ├── luceneTest
                            └── TokenTest.java
                        └── utilTest
                            ├── ConverterE2KTest.java
                            ├── ConverterK2ETest.java
                            ├── MergerTest.java
                            ├── ParserChosungTest.java
                            ├── ParserJamoTest.java
                            └── SpellCheckTest.java


/.gitignore:
--------------------------------------------------------------------------------
 1 | ﻿### Eclipse ###
 2 | *.pydevproject
 3 | .metadata
 4 | .gradle
 5 | bin/
 6 | tmp/
 7 | target/
 8 | *.tmp
 9 | *.bak
10 | *.swp
11 | *~.nib
12 | local.properties
13 | .settings/
14 | .loadpath
15 | .factorypath
16 | .classpath
17 | .project
18 | logs/
19 | .idea
20 | work/Tomcat/
21 | 
22 | # Spring
23 | .springBeans
24 | 
25 | # External tool builders
26 | .externalToolBuilders/
27 | 
28 | # Locally stored ��Eclipse launch configurations��
29 | *.launch
30 | 
31 | # CDT-specific
32 | .cproject
33 | 
34 | # PDT-specific
35 | .buildpath
36 | 
37 | # sbteclipse plugin 
38 | .target
39 | 
40 | # TeXlipse plugin
41 | .texlipse
42 | 
43 | ### Maven ###
44 | pom.xml.tag
45 | pom.xml.releaseBackup
46 | pom.xml.versionsBackup
47 | pom.xml.next
48 | release.properties
49 | 
50 | ### Java ###
51 | *.class
52 | 
53 | # Mobile Tools for Java (J2ME)
54 | .mtj.tmp/
55 | 
56 | # Package Files #
57 | *.jar
58 | *.war
59 | *.ear
60 | 
61 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 
62 | hs_err_pid*
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # elasticsearch-plugin
 2 | 자바카페 Elasticsearch 플러그인
 3 | 
 4 | [elasticsearch-plugin](https://github.com/javacafe-project/elasticsearch-plugin)은 사용자가 한글을 검색하기 쉽게 만들어진 플러그인 입니다.
 5 | 
 6 | > 링크 다운로드
 7 | >
 8 | >[7.0.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v7.0.0)
 9 | >
10 | >[6.7.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.7.0)
11 | >
12 | >[6.6.2](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.2)
13 | >
14 | >[6.6.1](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.1)
15 | >
16 | >[6.6.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.0)
17 | >
18 | >[6.5.4](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.5.4)
19 | 
20 | # 설치방법
21 | >~~~~
22 | >elasticsearch-plugin install https://github.com/javacafe-project/elastic-book-etc/raw/master/plugin/javacafe-analyzer-6.4.3.zip
23 | >
24 | 
25 | # 제공기능
26 | 
27 | 엘라스틱서치 혹은 솔라의 최신버전에서 사용가능한 한글기반의 자동완성/검색결과를 더욱 효율적으로 사용하기 위해 개발된 플러그인 이며 아래와 같은 기능을 제공합니다. 
28 | 
29 | ## 초성추출
30 | 검색어로 들어오는 단어가 초성인 경우 검색 결과 혹은 자동완성의 결과를 초성으로 매칭하여 검색되게 하는 플러그인 입니다. 
31 | 
32 | ### 사용방법
33 | 
34 | 
35 | ## 자소분해
36 | 자동완성에서 한글을 검색 가능한 형태로 변형하는 플러그인 입니다. 예를 들어 삼성전자의 경우 삼ㅅ만 검색하여도 삼성전자가 검색 될수 있도록 한글의 자소를 분해하여 검색 할 수 있도록 합니다. 
37 | 
38 | ### 사용방법
39 | 
40 | 
41 | ## 한영/영한 오타교정
42 | 한글을 영문으로, 영문을 한글로 검색한 결과를 보정해주는 플러그인 입니다. 예를들어 삼성전자를 tkatjdwjswk 라고 검색하거나 ㅑㅔㅙㅜㄷ와 같이 iphone 을 잘못 검색한 경우 검색 결과를 도출 할수 있도록 도와줍니다. 
43 | 
44 | ### 사용방법
45 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  4 | 
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>org.elasticsearch.plugin</groupId>
  8 |     <artifactId>javacafe-analyzer</artifactId>
  9 |     <version>1.0</version>
 10 | 
 11 |     <packaging>jar</packaging>
 12 | 
 13 |     <!-- 로컬에서는 테스트 문제로 6.2.2를 사용하자 -->
 14 |     <properties>
 15 |         <elasticsearch.version>6.4.3</elasticsearch.version>
 16 |         <!-- 		<elasticsearch.version>6.2.2</elasticsearch.version> -->
 17 |         <lucene.version>7.2.1</lucene.version>
 18 |     </properties>
 19 | 
 20 | 
 21 | 
 22 |     <dependencies>
 23 |         <dependency>
 24 |             <groupId>org.elasticsearch</groupId>
 25 |             <artifactId>elasticsearch</artifactId>
 26 |             <version>${elasticsearch.version}</version>
 27 |             <scope>provided</scope>
 28 |         </dependency>
 29 |         <dependency>
 30 |             <groupId>org.apache.solr</groupId>
 31 |             <artifactId>solr-core</artifactId>
 32 |             <version>${lucene.version}</version>
 33 |             <type>jar</type>
 34 |             <scope>provided</scope>
 35 |         </dependency>
 36 |         <dependency>
 37 |             <groupId>org.apache.lucene</groupId>
 38 |             <artifactId>lucene-core</artifactId>
 39 |             <version>${lucene.version}</version>
 40 |             <scope>provided</scope>
 41 |         </dependency>
 42 | 
 43 |         <dependency>
 44 |             <groupId>org.apache.commons</groupId>
 45 |             <artifactId>commons-lang3</artifactId>
 46 |             <version>3.5</version>
 47 |         </dependency>
 48 | 
 49 |         <dependency>
 50 |             <groupId>org.apache.logging.log4j</groupId>
 51 |             <artifactId>log4j-core</artifactId>
 52 |             <version>2.16.0</version>
 53 |             <scope>provided</scope>
 54 |         </dependency>
 55 | 
 56 | 
 57 |         <!-- 추후 scope를 test로 변경 -->
 58 |         <dependency>
 59 |             <groupId>org.elasticsearch.test</groupId>
 60 |             <artifactId>framework</artifactId>
 61 |             <version>${elasticsearch.version}</version>
 62 |             <scope>provided</scope>
 63 |             <exclusions>
 64 |                 <exclusion>
 65 |                     <groupId>junit</groupId>
 66 |                     <artifactId>junit</artifactId>
 67 |                 </exclusion>
 68 |             </exclusions>
 69 |         </dependency>
 70 | 
 71 | 
 72 |         <!-- 추후 scope를 test로 변경 -->
 73 |         <dependency>
 74 |             <groupId>junit</groupId>
 75 |             <artifactId>junit</artifactId>
 76 |             <version>4.11</version>
 77 |             <scope>provided</scope>
 78 |             <exclusions>
 79 |                 <exclusion>
 80 |                     <groupId>org.hamcrest</groupId>
 81 |                     <artifactId>hamcrest-core</artifactId>
 82 |                 </exclusion>
 83 |             </exclusions>
 84 |         </dependency>
 85 | 
 86 |     </dependencies>
 87 | 
 88 | 
 89 | 
 90 |     <build>
 91 |         <finalName>javacafe-analyzer-${elasticsearch.version}</finalName>
 92 |         <resources>
 93 |             <resource>
 94 |                 <directory>src/main/resources</directory>
 95 |                 <filtering>false</filtering>
 96 |                 <excludes>
 97 |                     <exclude>*.properties</exclude>
 98 |                 </excludes>
 99 |             </resource>
100 |         </resources>
101 |         <plugins>
102 |             <plugin>
103 |                 <groupId>org.apache.maven.plugins</groupId>
104 |                 <artifactId>maven-compiler-plugin</artifactId>
105 |                 <configuration>
106 |                     <source>1.8</source>
107 |                     <target>1.8</target>
108 |                     <encoding>UTF-8</encoding>
109 |                 </configuration>
110 |             </plugin>
111 |             <plugin>
112 |                 <groupId>org.apache.maven.plugins</groupId>
113 |                 <artifactId>maven-dependency-plugin</artifactId>
114 |                 <version>3.0.0</version>
115 |                 <executions>
116 |                     <execution>
117 |                         <id>copy-dependencies</id>
118 |                         <phase>package</phase>
119 |                         <goals>
120 |                             <goal>copy-dependencies</goal>
121 |                         </goals>
122 |                         <configuration>
123 |                             <outputDirectory>${project.build.directory}/lib</outputDirectory>
124 |                         </configuration>
125 |                     </execution>
126 |                 </executions>
127 |             </plugin>
128 |             <plugin>
129 |                 <groupId>org.apache.maven.plugins</groupId>
130 |                 <artifactId>maven-surefire-plugin</artifactId>
131 |                 <version>2.12.1</version>
132 |                 <configuration>
133 |                     <argLine>-Dtests.security.manager=false</argLine>
134 |                     <skipTests>true</skipTests>
135 |                 </configuration>
136 |             </plugin>
137 |             <plugin>
138 |                 <groupId>org.apache.maven.plugins</groupId>
139 |                 <artifactId>maven-assembly-plugin</artifactId>
140 |                 <version>3.0.0</version>
141 |                 <configuration>
142 |                     <appendAssemblyId>false</appendAssemblyId>
143 |                     <outputDirectory>${project.build.directory}/releases</outputDirectory>
144 |                     <descriptors>
145 |                         <descriptor>release.xml</descriptor>
146 |                     </descriptors>
147 |                 </configuration>
148 |                 <executions>
149 |                     <execution>
150 |                         <id>zip-with-dependencies</id>
151 |                         <phase>package</phase>
152 |                         <goals>
153 |                             <goal>single</goal>
154 |                         </goals>
155 |                     </execution>
156 |                 </executions>
157 |             </plugin>
158 |         </plugins>
159 |     </build>
160 | 
161 | </project>
162 | 


--------------------------------------------------------------------------------
/release.xml:
--------------------------------------------------------------------------------
 1 | <assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
 2 |           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |           xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
 4 |     <!-- TODO: a jarjar format would be better -->
 5 |     <id>zip-with-dependencies</id>
 6 |     <formats>
 7 |         <format>zip</format>
 8 |     </formats>
 9 |     <includeBaseDirectory>false</includeBaseDirectory>
10 |     <files>
11 |         <file>
12 |             <source>${project.basedir}/src/main/resources/plugin-descriptor.properties</source>
13 |             <outputDirectory></outputDirectory>
14 |             <filtered>true</filtered>
15 |         </file>
16 |     </files>
17 |     <dependencySets>
18 |         <dependencySet>
19 |             <outputDirectory></outputDirectory>
20 |             <useProjectArtifact>true</useProjectArtifact>
21 |             <useTransitiveFiltering>true</useTransitiveFiltering>
22 |         </dependencySet>
23 |     </dependencySets>
24 | </assembly>


--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/chosung/JavacafeChosungTokenFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.apache.solr.index.analysis.chosung;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.env.Environment;
 7 | import org.elasticsearch.index.IndexSettings;
 8 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 9 | import org.elasticsearch.index.analysis.chosung.JavacafeChosungTokenFilter;
10 | 
11 | import java.util.Map;
12 | /**
13 |  *
14 |  *  <fieldType name="text_auto_chosung" class="solr.TextField" positionIncrementGap="100">
15 |  *       <analyzer type="index">
16 |  *         <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
17 |  *         <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
18 |  *         <filter class="solr.KoreanReadingFormFilterFactory" />
19 |  *         <filter class="org.apache.solr.index.analysis.chosung.JavacafeChosungTokenFilterFactory"/>
20 |  *         <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="50"/>
21 |  *         <filter class="solr.LowerCaseFilterFactory" />
22 |  *       </analyzer>
23 |  *        <analyzer type="query">
24 |  *         <tokenizer class="solr.StandardTokenizerFactory"/>
25 |  *         <filter class="org.apache.solr.index.analysis.chosung.JavacafeChosungTokenFilterFactory"/>
26 |  *         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
27 |  *         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
28 |  *         <filter class="solr.LowerCaseFilterFactory"/>
29 |  *       </analyzer>
30 |  *     </fieldType>
31 |  *
32 |  * */
33 | public class JavacafeChosungTokenFilterFactory extends TokenFilterFactory {
34 | 
35 |     
36 |     public JavacafeChosungTokenFilterFactory(Map<String, String> args) {
37 |         super(args);
38 |     }
39 | 
40 |     @Override
41 |     public TokenStream create(TokenStream stream) {
42 |         return new JavacafeChosungTokenFilter(stream);
43 |     }
44 | 
45 |     
46 |     
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/eng2kor/JavacafeEng2KorConvertFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.apache.solr.index.analysis.eng2kor;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
 5 | import org.elasticsearch.index.analysis.eng2kor.JavacafeEng2KorConvertFilter;
 6 | 
 7 | import java.util.Map;
 8 | /**
 9 |  *
10 |  *  <fieldType name="text_auto_en2ko" class="solr.TextField" positionIncrementGap="100">
11 |  *       <analyzer type="index">
12 |  *         <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
13 |  *         <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
14 |  *         <filter class="solr.KoreanReadingFormFilterFactory" />
15 |  *         <filter class="solr.LowerCaseFilterFactory" />
16 |  *       </analyzer>
17 |  *        <analyzer type="query">
18 |  *         <tokenizer class="solr.StandardTokenizerFactory"/>
19 |  *         <filter class="org.apache.solr.index.analysis.eng2kor.JavacafeEng2KorConvertFilterFactory"/>
20 |  *         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
21 |  *         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
22 |  *         <filter class="solr.LowerCaseFilterFactory"/>
23 |  *       </analyzer>
24 |  *     </fieldType>
25 |  *
26 |  * */
27 | public class JavacafeEng2KorConvertFilterFactory extends TokenFilterFactory {
28 | 
29 |     
30 |     public JavacafeEng2KorConvertFilterFactory(Map<String, String> args) {
31 |         super(args);
32 |     }
33 |     
34 | 
35 |     @Override
36 |     public TokenStream create(TokenStream tokenStream) {
37 |         return new JavacafeEng2KorConvertFilter(tokenStream);
38 |     }
39 |     
40 |     
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/jamo/JavacafeJamoTokenFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.apache.solr.index.analysis.jamo;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
 5 | import org.elasticsearch.index.analysis.jamo.JavacafeJamoTokenFilter;
 6 | 
 7 | import java.util.Map;
 8 | 
 9 | public class JavacafeJamoTokenFilterFactory  extends TokenFilterFactory {
10 | 
11 |     
12 |     public JavacafeJamoTokenFilterFactory(Map<String, String> args) {
13 |         super(args);
14 |     }
15 |     
16 | 
17 |     @Override
18 |     public TokenStream create(TokenStream stream) {
19 |         return new JavacafeJamoTokenFilter(stream);
20 |     }
21 | 
22 |     
23 |     
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/apache/solr/index/analysis/kor2eng/JavacafeKor2EngConvertFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.apache.solr.index.analysis.kor2eng;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.apache.lucene.analysis.util.TokenFilterFactory;
 5 | import org.elasticsearch.index.analysis.kor2eng.JavacafeKor2EngConvertFilter;
 6 | 
 7 | import java.util.Map;
 8 | 
 9 | /**
10 |  *
11 |  *  <fieldType name="text_auto_ko2en" class="solr.TextField" positionIncrementGap="100">
12 |  *       <analyzer type="index">
13 |  *         <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
14 |  *         <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
15 |  *         <filter class="solr.KoreanReadingFormFilterFactory" />
16 |  *         <filter class="solr.LowerCaseFilterFactory" />
17 |  *       </analyzer>
18 |  *        <analyzer type="query">
19 |  *         <tokenizer class="solr.StandardTokenizerFactory"/>
20 |  *         <filter class="org.apache.solr.index.analysis.kor2eng.JavacafeKor2EngConvertFilterFactory"/>
21 |  *         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
22 |  *         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
23 |  *         <filter class="solr.LowerCaseFilterFactory"/>
24 |  *       </analyzer>
25 |  *     </fieldType>
26 |  *
27 |  * */
28 | public class JavacafeKor2EngConvertFilterFactory extends TokenFilterFactory {
29 | 
30 |     
31 |     public JavacafeKor2EngConvertFilterFactory(Map<String, String> args) {
32 |         super(args);
33 |     }
34 |     
35 | 
36 |     @Override
37 |     public TokenStream create(TokenStream tokenStream) {
38 |         return new JavacafeKor2EngConvertFilter(tokenStream);
39 |     }
40 |     
41 |     
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/chosung/JavacafeChosungTokenFilter.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.chosung;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.lucene.analysis.TokenFilter;
 6 | import org.apache.lucene.analysis.TokenStream;
 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 8 | import org.elasticsearch.index.common.parser.KoreanChosungParser;
 9 | 
10 | /**
11 |  * 한글 초성 분석 필터
12 |  *
13 |  * @author hrkim
14 |  *
15 |  */
16 | public final class JavacafeChosungTokenFilter extends TokenFilter {
17 |     
18 |     private KoreanChosungParser parser;
19 |     private CharTermAttribute termAtt;
20 | 
21 |     
22 |     public JavacafeChosungTokenFilter(TokenStream stream) {
23 |         super(stream);
24 |         this.parser = new KoreanChosungParser();
25 |         this.termAtt = addAttribute(CharTermAttribute.class);
26 |     }
27 | 
28 |     
29 |     /**
30 |      * 한글 초성 Parser를 이용하여 토큰을 파싱하고 Term을 구한다. 
31 |      */
32 |     @Override
33 |     public boolean incrementToken() throws IOException {
34 |         
35 |         if (input.incrementToken()) {
36 |             CharSequence parserdData = parser.parse(termAtt.toString());
37 |             termAtt.setEmpty();
38 |             termAtt.append(parserdData);
39 |         
40 |             return true;
41 |         }
42 |         
43 |         return false;
44 |     }
45 |     
46 |     
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/chosung/JavacafeChosungTokenFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.chosung;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.elasticsearch.common.settings.Settings;
 5 | import org.elasticsearch.env.Environment;
 6 | import org.elasticsearch.index.IndexSettings;
 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 8 | 
 9 | public class JavacafeChosungTokenFilterFactory extends AbstractTokenFilterFactory {
10 | 
11 |     
12 |     public JavacafeChosungTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
13 |         super(indexSettings, name, settings);
14 |     }
15 |     
16 | 
17 |     @Override
18 |     public TokenStream create(TokenStream stream) {
19 |         return new JavacafeChosungTokenFilter(stream);
20 |     }
21 | 
22 |     
23 |     
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.eng2kor;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.lucene.analysis.TokenFilter;
 6 | import org.apache.lucene.analysis.TokenStream;
 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 8 | import org.elasticsearch.index.common.converter.EngToKorConverter;
 9 | 
10 | /**
11 |  * 영한 변환 필터
12 |  *
13 |  * @author hrkim
14 |  *
15 |  */
16 | public final class JavacafeEng2KorConvertFilter extends TokenFilter {
17 | 
18 |     private EngToKorConverter converter;
19 |     private CharTermAttribute termAtt;   
20 | 
21 |     
22 |     public JavacafeEng2KorConvertFilter(TokenStream stream) {
23 |         super(stream);       
24 |         this.converter = new EngToKorConverter();
25 |         this.termAtt = addAttribute(CharTermAttribute.class);   
26 |     }
27 | 
28 |     
29 |     @Override
30 |     public boolean incrementToken() throws IOException {
31 |         
32 |         if (input.incrementToken()) {
33 |             CharSequence parserdData = converter.convert(termAtt.toString());
34 |             termAtt.setEmpty();
35 |             termAtt.append(parserdData);
36 |         
37 |             return true;
38 |         }
39 |         
40 |         return false;
41 |     }
42 |     
43 |     
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter2.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.eng2kor;
 2 | 
 3 | import org.apache.lucene.analysis.TokenFilter;
 4 | import org.apache.lucene.analysis.TokenStream;
 5 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 6 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 7 | import org.elasticsearch.index.common.converter.EngToKorConverter;
 8 | 
 9 | import java.io.IOException;
10 | import java.util.LinkedList;
11 | import java.util.Queue;
12 | 
13 | /**
14 |  * 영한 변환 필터
15 |  *
16 |  * @author hrkim
17 |  *
18 |  */
19 | public final class JavacafeEng2KorConvertFilter2 extends TokenFilter {
20 | 
21 |     private EngToKorConverter converter;
22 |     private CharTermAttribute termAtt; 
23 |     private PositionIncrementAttribute positionIncrementAttribute;
24 |     
25 |     private Queue<char[]> simpleQueue;
26 | 
27 |     
28 |     public JavacafeEng2KorConvertFilter2(TokenStream stream) {
29 |         super(stream);
30 |         
31 |         this.converter = new EngToKorConverter();
32 |         this.termAtt = addAttribute(CharTermAttribute.class);
33 |         this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
34 |         
35 |         this.simpleQueue = new LinkedList<char[]>();       
36 |     }
37 | 
38 |     
39 |     @Override
40 |     public boolean incrementToken() throws IOException {
41 |         
42 |         if (!simpleQueue.isEmpty()) {
43 |             char[] buffer = simpleQueue.poll();
44 |             termAtt.setEmpty();
45 |             termAtt.copyBuffer(buffer, 0, buffer.length);
46 |             positionIncrementAttribute.setPositionIncrement(0);
47 |             
48 |             return true;
49 |         }
50 |         
51 |         if (!input.incrementToken()) {
52 |             return false;
53 |             
54 |         } else {
55 |             String result = converter.convert(termAtt.toString());
56 |             simpleQueue.add(result.toCharArray());
57 |             return true;
58 |         }
59 |     }
60 |     
61 |     
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter3.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.eng2kor;
 2 | 
 3 | import org.apache.lucene.analysis.TokenFilter;
 4 | import org.apache.lucene.analysis.TokenStream;
 5 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 6 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 7 | import org.elasticsearch.index.common.converter.EngToKorConverter;
 8 | 
 9 | import java.io.IOException;
10 | import java.util.LinkedList;
11 | import java.util.Queue;
12 | 
13 | /**
14 |  * 영한 변환 필터
15 |  *
16 |  * @author hrkim
17 |  *
18 |  */
19 | public final class JavacafeEng2KorConvertFilter3 extends TokenFilter {
20 | 
21 |     private EngToKorConverter converter;
22 |     private CharTermAttribute termAtt; 
23 |     
24 |     private PositionIncrementAttribute positionIncrementAttribute;
25 |     private Queue<char[]> simpleQueue;
26 | 
27 |     
28 |     public JavacafeEng2KorConvertFilter3(TokenStream stream) {
29 |         super(stream);       
30 |         this.converter = new EngToKorConverter();
31 |         this.termAtt = addAttribute(CharTermAttribute.class);
32 |         
33 |         this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
34 |         
35 |         this.simpleQueue = new LinkedList<char[]>();       
36 |     }
37 | 
38 |     
39 |     @Override
40 |     public boolean incrementToken() throws IOException {
41 |         
42 |         if (!simpleQueue.isEmpty()) {
43 |             char[] buffer = simpleQueue.poll();
44 |             termAtt.setEmpty();
45 |             termAtt.copyBuffer(buffer, 0, buffer.length);
46 |             
47 |             positionIncrementAttribute.setPositionIncrement(0);
48 |             
49 |             return true;
50 |         }
51 |         
52 |         if (input.incrementToken()) {
53 |             String result = converter.convert(termAtt.toString());
54 |             simpleQueue.add(result.toCharArray());
55 |             
56 |             return true;            
57 |         }
58 |         
59 |         return false;
60 |     }
61 |     
62 |     
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.eng2kor;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.elasticsearch.common.settings.Settings;
 5 | import org.elasticsearch.env.Environment;
 6 | import org.elasticsearch.index.IndexSettings;
 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 8 | 
 9 | 
10 | public class JavacafeEng2KorConvertFilterFactory extends AbstractTokenFilterFactory {
11 | 
12 |     
13 |     public JavacafeEng2KorConvertFilterFactory(IndexSettings indexSettings, Environment env , String name, Settings settings) {
14 |         super(indexSettings, name, settings);
15 |     }
16 |     
17 | 
18 |     @Override
19 |     public TokenStream create(TokenStream tokenStream) {
20 |         return new JavacafeEng2KorConvertFilter(tokenStream);
21 |     }
22 |     
23 |     
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/jamo/JavacafeJamoTokenFilter.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.jamo;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.lucene.analysis.TokenFilter;
 6 | import org.apache.lucene.analysis.TokenStream;
 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 8 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
 9 | 
10 | /**
11 |  * 한글 자모 분석 필터
12 |  *
13 |  * @author hrkim
14 |  *
15 |  */
16 | public final class JavacafeJamoTokenFilter extends TokenFilter {
17 |     
18 |     private KoreanJamoParser parser;
19 |     private CharTermAttribute termAtt;
20 | 
21 |     
22 |     public JavacafeJamoTokenFilter(TokenStream stream) {
23 |         super(stream);
24 |         this.parser = new KoreanJamoParser();
25 |         this.termAtt = addAttribute(CharTermAttribute.class);
26 |     }
27 | 
28 |     
29 |     /**
30 |      * 한글 자모 Parser를 이용하여 토큰을 파싱하고 Term을 구한다. 
31 |      */
32 |     @Override
33 |     public boolean incrementToken() throws IOException {
34 |         
35 |         if (input.incrementToken()) {
36 |             CharSequence parserdData = parser.parse(termAtt.toString());
37 |             termAtt.setEmpty();
38 |             termAtt.append(parserdData);
39 |         
40 |             return true;
41 |         }
42 |         
43 |         return false;
44 |     }
45 |     
46 |     
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/jamo/JavacafeJamoTokenFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.jamo;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.elasticsearch.common.settings.Settings;
 5 | import org.elasticsearch.env.Environment;
 6 | import org.elasticsearch.index.IndexSettings;
 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 8 | 
 9 | public class JavacafeJamoTokenFilterFactory extends AbstractTokenFilterFactory {
10 | 
11 |     
12 |     public JavacafeJamoTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
13 |         super(indexSettings, name, settings);
14 |     }
15 |     
16 | 
17 |     @Override
18 |     public TokenStream create(TokenStream stream) {
19 |         return new JavacafeJamoTokenFilter(stream);
20 |     }
21 | 
22 |     
23 |     
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/kor2eng/JavacafeKor2EngConvertFilter.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.kor2eng;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.lucene.analysis.TokenFilter;
 6 | import org.apache.lucene.analysis.TokenStream;
 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 8 | import org.elasticsearch.index.common.converter.KorToEngConverter;
 9 | 
10 | /**
11 |  * 한영 변환 필터
12 |  *
13 |  * @author hrkim
14 |  *
15 |  */
16 | public final class JavacafeKor2EngConvertFilter extends TokenFilter {
17 | 
18 |     private KorToEngConverter converter;
19 |     private CharTermAttribute termAtt;
20 | 
21 |     
22 |     public JavacafeKor2EngConvertFilter(TokenStream stream) {
23 |         super(stream);        
24 |         this.converter = new KorToEngConverter();
25 |         this.termAtt = addAttribute(CharTermAttribute.class);     
26 |     }
27 | 
28 |     
29 |     @Override
30 |     public boolean incrementToken() throws IOException {
31 |         
32 |         if (input.incrementToken()) {
33 |             CharSequence parserdData = converter.convert(termAtt.toString());
34 |             termAtt.setEmpty();
35 |             termAtt.append(parserdData);
36 |         
37 |             return true;
38 |         }
39 |         
40 |         return false;
41 |     }
42 |     
43 |     
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/kor2eng/JavacafeKor2EngConvertFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.kor2eng;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.elasticsearch.common.settings.Settings;
 5 | import org.elasticsearch.env.Environment;
 6 | import org.elasticsearch.index.IndexSettings;
 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 8 | 
 9 | 
10 | public class JavacafeKor2EngConvertFilterFactory extends AbstractTokenFilterFactory {
11 | 
12 |     
13 |     public JavacafeKor2EngConvertFilterFactory(IndexSettings indexSettings, Environment env , String name, Settings settings) {
14 |         super(indexSettings, name, settings);
15 |     }
16 |     
17 | 
18 |     @Override
19 |     public TokenStream create(TokenStream tokenStream) {
20 |         return new JavacafeKor2EngConvertFilter(tokenStream);
21 |     }
22 |     
23 |     
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/spell/JavacafeSpellFilter.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.spell;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.lucene.analysis.TokenFilter;
 6 | import org.apache.lucene.analysis.TokenStream;
 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 8 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
 9 | 
10 | /**
11 |  * 스펠링 체크 필터
12 |  *
13 |  * @author hrkim
14 |  *
15 |  */
16 | public final class JavacafeSpellFilter extends TokenFilter {
17 |     
18 |     private KoreanJamoParser parser;
19 |     private CharTermAttribute termAtt;
20 | 
21 |     
22 |     public JavacafeSpellFilter(TokenStream stream) {
23 |         super(stream);
24 |         this.parser = new KoreanJamoParser();
25 |         this.termAtt = addAttribute(CharTermAttribute.class);
26 |     }
27 | 
28 |     
29 |     /**
30 |      * 한글 자모 Parser를 이용하여 토큰을 파싱하고 Term을 구한다. 
31 |      */
32 |     @Override
33 |     public boolean incrementToken() throws IOException {
34 |         
35 |         if (input.incrementToken()) {
36 |             CharSequence parserdData = parser.parse(termAtt.toString());
37 |             termAtt.setEmpty();
38 |             termAtt.append(parserdData);
39 |         
40 |             return true;
41 |         }
42 |         
43 |         return false;
44 |     }
45 |     
46 |     
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/spell/JavacafeSpellFilterFactory.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.analysis.spell;
 2 | 
 3 | import org.apache.lucene.analysis.TokenStream;
 4 | import org.elasticsearch.common.settings.Settings;
 5 | import org.elasticsearch.env.Environment;
 6 | import org.elasticsearch.index.IndexSettings;
 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 8 | 
 9 | public class JavacafeSpellFilterFactory extends AbstractTokenFilterFactory {
10 | 
11 |     
12 |     public JavacafeSpellFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
13 |         super(indexSettings, name, settings);
14 |     }
15 |     
16 | 
17 |     @Override
18 |     public TokenStream create(TokenStream stream) {
19 |         return new JavacafeSpellFilter(stream);
20 |     }
21 | 
22 |     
23 |     
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/converter/EngToKorConverter.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.common.converter;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | import org.elasticsearch.index.common.util.JamoUtil;
 6 | import org.elasticsearch.index.common.util.KeyboardUtil;
 7 | 
 8 | /**
 9 |  * 영한 오타 변환기 (Eng -> Kor)
10 |  *
11 |  * @author hrkim
12 |  *
13 |  */
14 | public class EngToKorConverter {
15 |       
16 |     
17 |     /**
18 |      * 토큰을 영문 키보드 기준으로 변환한다.
19 |      * 
20 |      * @param token
21 |      * @return
22 |      */
23 |     public String convert(String token) {        
24 |         StringBuilder sb = new StringBuilder();
25 | 
26 |         // 문자열을 한글자씩 잘라서 처리한다.
27 |         String word = token.trim();        
28 |         for (int index = 0; index < word.length(); index++) {
29 |             
30 |             // 처리 불가능한 글자는 그냥 넘긴다.
31 |             if (KeyboardUtil.IGNORE_CHAR.indexOf(word.substring(index, index + 1)) > -1) {
32 |                 sb.append(word.substring(index, index + 1));
33 |                 index++;
34 |             }
35 |             if (index >= word.length()) {
36 |             	break;
37 |             }
38 |             
39 |             try {
40 |                 // 초성 정보를 구한다.
41 |                 Map<String, Integer> mChoSung = KeyboardUtil.getInfoForChoSung(index, word);
42 |                 int cho = mChoSung.get("code");
43 |                 index = mChoSung.get("idx");
44 | 
45 |                 // 중성 정보를 구한다.
46 |                 Map<String, Integer> mJungSung = KeyboardUtil.getInfoForJungSung(index, word);
47 |                 int jung = mJungSung.get("code");
48 |                 index = mJungSung.get("idx");
49 | 
50 |                 // 종성 정보를 구한다.
51 |                 Map<String, Integer> mJongSung = KeyboardUtil.getInfoForJongSung(index, word);
52 |                 int jong = mJongSung.get("code");
53 |                 index = mJongSung.get("idx");
54 |                             
55 |                 // 한글 유니코드를 생성한다.
56 |                 sb.append((char) (JamoUtil.START_KOREA_UNICODE + cho + jung + jong));
57 |                 
58 |             } catch(Exception e) {}
59 |         }
60 |         
61 |         return sb.toString();
62 |     }
63 |     
64 |     
65 |     
66 |     
67 | 
68 |     
69 |     
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/converter/KorToEngConverter.java:
--------------------------------------------------------------------------------
  1 | package org.elasticsearch.index.common.converter;
  2 | 
  3 | import org.apache.commons.lang3.StringUtils;
  4 | import org.elasticsearch.index.common.type.CodeType;
  5 | import org.elasticsearch.index.common.util.JamoUtil;
  6 | import org.elasticsearch.index.common.util.KeyboardUtil;
  7 | 
  8 | /**
  9 |  * 한영 오타 변환기 (Kor -> Eng)
 10 |  *
 11 |  * @author hrkim
 12 |  *
 13 |  */
 14 | public class KorToEngConverter {
 15 | 
 16 |           
 17 |     /**
 18 |      * 토큰을 한글 키보드 기준으로 변환한다.
 19 |      * 
 20 |      * @param token
 21 |      * @return
 22 |      */
 23 |     public String convert(String token) {        
 24 |         StringBuilder sb = new StringBuilder();
 25 |         
 26 |         // 문자열을 한글자씩 잘라서 처리한다.
 27 |         String word = token.trim();        
 28 |         for (int index = 0; index < word.length(); index++) {
 29 | 
 30 |             // 처리 불가능한 글자는 그냥 넘긴다.
 31 |             if (KeyboardUtil.IGNORE_CHAR.indexOf(word.substring(index, index + 1)) > -1) {
 32 |                 sb.append(word.substring(index, index + 1));
 33 |                 index++;
 34 |             }
 35 |             if (index >= word.length()) {
 36 |             	break;
 37 |             }
 38 | 
 39 |             try {
 40 |                 int init = word.charAt(index);
 41 |                 int initUnicode = init - JamoUtil.START_KOREA_UNICODE;
 42 | 
 43 |                 if (initUnicode > 0) {
 44 |                     /**
 45 |                      * 1글자로 조합형 한글이 들어올 경우 처리
 46 |                      */              
 47 |                     int cho  = initUnicode / 21 / 28;   // 0 ~ 18
 48 |                     String strCho = getSameEngChar(CodeType.CHOSUNG, cho);
 49 |                     if (StringUtils.isNotEmpty(strCho)) {
 50 |                         sb.append(strCho);
 51 |                     }
 52 |                     
 53 | 
 54 |                     int jung = initUnicode / 28 % 21;   // 0 ~ 20
 55 |                     String strJung = getSameEngChar(CodeType.JUNGSUNG, jung);
 56 |                     if (StringUtils.isNotEmpty(strJung)) {
 57 |                         sb.append(strJung);
 58 |                     }
 59 |                     
 60 |                     int jong = initUnicode % 28;        // 0 ~ 27
 61 |                     String strJong = getSameEngChar(CodeType.JONGSUNG, jong);
 62 |                     if (StringUtils.isNotEmpty(strJong)) {
 63 |                         sb.append(strJong);
 64 |                     }
 65 | 
 66 |                 } else {
 67 |                     /**
 68 |                      * 1글자로 자모가 들어올 경우 처리
 69 |                      */
 70 |                     String subStr = String.valueOf((char) init);
 71 |                     sb.append(getSameEngCharForJamo(subStr, 0));
 72 |                 }
 73 |             } catch(Exception e) {}
 74 |         }
 75 | 
 76 |         return sb.toString();
 77 |     }
 78 |     
 79 |     
 80 |     
 81 | 
 82 |     private String getSameEngChar(CodeType type, int pos) {
 83 |         switch (type) {
 84 |             case CHOSUNG:
 85 |                 return KeyboardUtil.KEYBOARD_CHO_SUNG[pos];
 86 | 
 87 |             case JUNGSUNG:
 88 |                 return KeyboardUtil.KEYBOARD_JUNG_SUNG[pos];
 89 | 
 90 |             case JONGSUNG:
 91 |                 if ((pos - 1) > -1) {
 92 |                     return KeyboardUtil.KEYBOARD_JONG_SUNG[pos - 1];
 93 |                 }
 94 |                 return "";
 95 |         }
 96 | 
 97 |         return "";
 98 |     }
 99 |     
100 | 
101 |     private String getSameEngCharForJamo(String key, int pos) {
102 |         for (int i=0; i<KeyboardUtil.KEYBOARD_KEY_KOR.length; i++) {
103 |             if (KeyboardUtil.KEYBOARD_KEY_KOR[i].equals(key)) {
104 |                 return KeyboardUtil.KEYBOARD_KEY_ENG[i];
105 |             }
106 |         }
107 |         
108 |         return "";
109 |     }
110 |     
111 |     
112 |     
113 | }
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/merger/KoreanMerger.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.common.merger;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.elasticsearch.index.common.util.HangulUtil;
 6 | 
 7 | /**
 8 |  * 한글 Merger
 9 |  *
10 |  * @author hrkim
11 |  *
12 |  */
13 | public class KoreanMerger {
14 | 
15 |     /**
16 |      * 자모 리스트를 합쳐서 한글로 변환한다.
17 |      * @param jamoList
18 |      * @return
19 |      * @throws Exception
20 |      */
21 |     public String merge(List<String> jamoList) throws Exception {
22 |         String result = "";
23 | 
24 |         if (jamoList.size() == 0) {
25 |             return "";
26 |         }
27 | 
28 |         int jungSungSize = HangulUtil.JUNG_SUNG.length;
29 |         int jongSungSize = HangulUtil.JONG_SUNG.length;
30 | 
31 |         int startIdx = 0;
32 |         while (true) {
33 |             if (startIdx >= jamoList.size()) {
34 |                 break;
35 |             }
36 | 
37 |             // 자모 리스트에서 한글 한글자에 해당하는 사이즈를 구한다.
38 |             int oneHangulJamoSize = HangulUtil.getOneHangulJamoSize(startIdx, jamoList);
39 |             if (oneHangulJamoSize == -1) {
40 |                 throw new Exception("한글은 최소 2개 이상의 유니코드 조합으로 이루어져야 합니다."); 
41 |             }
42 | 
43 |             // 한글 유니코드가 시작되는 Decimal값을 구한다.
44 |             int decimalCode = HangulUtil.START_KOREA_UNICODE_DECIMAL;
45 | 
46 |             // 초성에 해당하는 Decimal값을 더한다.
47 |             int chosungIdx = HangulUtil.getChoSungIndex(startIdx, jamoList);
48 |             if (chosungIdx >= 0) {
49 |                 decimalCode = decimalCode + (jongSungSize * jungSungSize * chosungIdx);
50 |             }
51 | 
52 |             // 중성에 해당하는 Decimal값을 더한다.
53 |             int jungsungIdx = HangulUtil.getJungSungIndex(startIdx, jamoList);
54 |             if (jungsungIdx >= 0) {
55 |                 decimalCode = decimalCode + (jongSungSize * jungsungIdx);
56 |             }
57 | 
58 |             // 종성에 해당하는 Decimal값을 더한다.
59 |             if (oneHangulJamoSize > 2) {
60 |                 int jongsungIdx = HangulUtil.getJongSungIndex(startIdx, jamoList);
61 |                 if (jongsungIdx >= 0) {
62 |                     decimalCode = decimalCode + jongsungIdx;
63 |                 }
64 |             }
65 | 
66 |             // Decimal값을 String으로 변환한다.
67 |             String hangul = Character.toString((char)decimalCode);
68 |             result = result + hangul;
69 |             
70 |             startIdx = startIdx + oneHangulJamoSize;
71 |         }
72 | 
73 |         return result;
74 |     }
75 | 
76 |     
77 |     
78 | }
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/parser/AbstractKoreanParser.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.common.parser;
 2 | 
 3 | import org.apache.commons.lang3.StringUtils;
 4 | import org.elasticsearch.index.common.util.JamoUtil;
 5 | 
 6 | /**
 7 |  * 한글 기본 Parser
 8 |  *
 9 |  * @author hrkim
10 |  *
11 |  */
12 | public abstract class AbstractKoreanParser {
13 |     
14 |     
15 |     /**
16 |      * 토큰을 자음과 모음으로 파싱한다.
17 |      * 
18 |      * @param token
19 |      * @return
20 |      */
21 |     public String parse(String token) {
22 |         if (StringUtils.isBlank(token)) {
23 |             return "";
24 |         }
25 | 
26 |         StringBuilder result = new StringBuilder();
27 | 
28 |         // 토큰을 한글자씩 잘라서 처리한다.
29 |         char[] arrCh = token.toCharArray();        
30 |         for(char ch : arrCh) {
31 |             
32 |             // 처리 할 char의 유니코드 인덱스를 구한다.
33 |             char unicodeIndex = (char)(ch - JamoUtil.START_KOREA_UNICODE);
34 | 
35 |             // 한글 유니코드 범위 : 0xAC00 ~ 0xD7AF (11184개)
36 |             // 한글 유니코드인지 검사한다.            
37 |             if(unicodeIndex >= 0 && unicodeIndex <= 11184) {
38 |                 
39 |                 // 초성 유니코드
40 |                 int idxChoSung = unicodeIndex / (28 * 21);
41 |                 char chosung = JamoUtil.UNICODE_CHO_SUNG[idxChoSung];
42 |                                 
43 |                 // 중성 유니코드
44 |                 int idxJungSung = unicodeIndex % (28 * 21) / 28;
45 |                 char jungsung = JamoUtil.UNICODE_JUNG_SUNG[idxJungSung];
46 |                 
47 |                 // 종성 유니코드
48 |                 int idxJongSung = unicodeIndex % (28 * 21) % 28;
49 |                 char jongsung = JamoUtil.UNICODE_JONG_SUNG[idxJongSung];
50 | 
51 |                 // 한글 한글자를 처리한다.
52 |                 processForKoreanChar(result, chosung, jungsung, jongsung);
53 |             
54 |             } else {
55 |                 
56 |                 // 한글이 아닌 한글자를 처리한다.
57 |                 processForOther(result, ch);
58 |             }
59 |         }
60 | 
61 |         // 토큰을 분석한 최종 결과를 리턴한다.
62 |         return result.toString();
63 |     }
64 |     
65 | 
66 |     /**
67 |      * 한글 문자를 처리한다.
68 |      * 
69 |      * @param sb
70 |      * @param chosung
71 |      * @param jungsung
72 |      * @param jongsung
73 |      */
74 |     protected abstract void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung);
75 |     
76 |     
77 |     /**
78 |      * 한글 문자를 제외한 일반 문자를 처리한다.
79 |      * 
80 |      * @param sb
81 |      * @param eachToken
82 |      */
83 |     protected abstract void processForOther(StringBuilder sb, char eachToken);
84 | 
85 | 
86 |     
87 | }
88 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/parser/KoreanChosungParser.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.common.parser;
 2 | 
 3 | /**
 4 |  * 한글 초성 Parser
 5 |  *
 6 |  * @author hrkim
 7 |  *
 8 |  */
 9 | public class KoreanChosungParser extends AbstractKoreanParser {
10 | 
11 |     
12 |     @Override
13 |     protected void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung) {
14 |         sb.append(chosung);
15 |     }
16 | 
17 |     
18 |     
19 |     @Override
20 |     protected void processForOther(StringBuilder sb, char eachToken) {
21 |         sb.append(eachToken);
22 |     }
23 | 
24 |     
25 |     
26 | }
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/parser/KoreanJamoParser.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.common.parser;
 2 | 
 3 | import org.elasticsearch.index.common.util.JamoUtil;
 4 | 
 5 | /**
 6 |  * 한글 자모 Parser
 7 |  *
 8 |  * @author hrkim
 9 |  *
10 |  */
11 | public class KoreanJamoParser extends AbstractKoreanParser {
12 | 
13 |     
14 |     
15 |     
16 |     @Override
17 |     protected void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung) {
18 |         sb.append(chosung).append(jungsung);
19 | 
20 |         if(jongsung != JamoUtil.UNICODE_JONG_SUNG_EMPTY) {
21 |             sb.append(jongsung);
22 |         }
23 |     }
24 | 
25 |     
26 |     
27 |     @Override
28 |     protected void processForOther(StringBuilder sb, char eachToken) {
29 |         sb.append(eachToken);
30 |     }
31 | 
32 |     
33 |     
34 | }
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/type/CodeType.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.index.common.type;
 2 | 
 3 | /**
 4 |  * 한글 구성요소 기본 타입
 5 |  *
 6 |  * @author hrkim
 7 |  *
 8 |  */
 9 | public enum CodeType {
10 |         
11 |     /**
12 |      * 초성
13 |      */
14 |     CHOSUNG,
15 |     
16 |     /**
17 |      * 중성
18 |      */
19 |     JUNGSUNG,
20 |     
21 |     /**
22 |      * 종성
23 |      */
24 |     JONGSUNG
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/util/HangulUtil.java:
--------------------------------------------------------------------------------
  1 | package org.elasticsearch.index.common.util;
  2 | 
  3 | import java.util.List;
  4 | 
  5 | /**
  6 |  * 한글 유니코드 유틸리티
  7 |  * 
  8 |  * http://www.unicode.org/charts/PDF/UAC00.pdf
  9 |  *
 10 |  * @author hrkim
 11 |  *
 12 |  */
 13 | public class HangulUtil {
 14 | 
 15 | 
 16 |     /**
 17 |      * 초성 (19자)
 18 |      */
 19 |     public static final char[] CHO_SUNG = {
 20 |             'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 
 21 |             'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
 22 |     };
 23 | 
 24 | 
 25 |     /**
 26 |      * 중성 (21자)
 27 |      */
 28 |     public static final char[] JUNG_SUNG = {
 29 |             'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 
 30 |             'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ'
 31 |     };
 32 |   
 33 |     
 34 |     /**
 35 |      * 종성 (28자) - "빈값" 포함
 36 |      */
 37 |     public static final char[] JONG_SUNG = {
 38 |             ' ', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 
 39 |             'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 
 40 |             'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
 41 |     };
 42 |     
 43 |     
 44 |     /**
 45 |      * 한글 유니코드의 시작값 (가)
 46 |      * 
 47 |      * 16진수 : 0xAC00
 48 |      * 10진수 : 44032
 49 |      * 
 50 |      */
 51 |     public static final int START_KOREA_UNICODE_DECIMAL = 44032;
 52 | 
 53 | 
 54 | 
 55 | 
 56 |     
 57 |     
 58 |     public static int getOneHangulJamoSize(int startIdx, List<String> jamoList) {
 59 |         int remainJamoSize = jamoList.size() - startIdx;
 60 |         
 61 |         if (remainJamoSize == 1) {
 62 |             return -1;
 63 |         }
 64 |         
 65 |         if (remainJamoSize == 2 || remainJamoSize == 3) {
 66 |             return remainJamoSize;
 67 |         }
 68 |         
 69 |         // 초성이나 종성은 겹치는 문자가 존재하기 때문에 
 70 |         // 다음 글자의 중성을 이용하여 한글자의 사이즈를 검사한다.
 71 |         String strJungSung = new String(JUNG_SUNG);
 72 |         String strChar = jamoList.get(startIdx + 3);
 73 |         
 74 |         if (strJungSung.contains(strChar)) {
 75 |             return 2;
 76 |         }
 77 | 
 78 |         return 3;
 79 |     }
 80 |     
 81 |     
 82 |     public static int getChoSungIndex(int startIdx, List<String> jamoList) {
 83 |         String strChoSung = new String(CHO_SUNG);
 84 |         String strChoSungChar = jamoList.get(startIdx);
 85 |         
 86 |         return strChoSung.indexOf(strChoSungChar);
 87 |     }
 88 |     
 89 |     
 90 |     public static int getJungSungIndex(int startIdx, List<String> jamoList) {
 91 |         String strJungSung = new String(HangulUtil.JUNG_SUNG);
 92 |         String strJungSungChar = jamoList.get(startIdx + 1);
 93 |         
 94 |         return strJungSung.indexOf(strJungSungChar);
 95 |     }
 96 |     
 97 |     
 98 |     public static int getJongSungIndex(int startIdx, List<String> jamoList) {
 99 |         String strJongSung = new String(HangulUtil.JONG_SUNG);
100 |         String strJongSungChar = jamoList.get(startIdx + 2);
101 |         
102 |         return strJongSung.indexOf(strJongSungChar);
103 |     }
104 |     
105 |     
106 |     
107 | }
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/util/JamoUtil.java:
--------------------------------------------------------------------------------
  1 | package org.elasticsearch.index.common.util;
  2 | 
  3 | /**
  4 |  * 한글 자모 유니코드 유틸리티
  5 |  * 
  6 |  * https://www.unicode.org/charts/PDF/U1100.pdf
  7 |  *
  8 |  * @author hrkim
  9 |  *
 10 |  */
 11 | public class JamoUtil {
 12 | 
 13 |     
 14 |     /**
 15 |      * 초성 (19자)<br>
 16 |      * <br>
 17 |      * 초성으로 올 수 있는 유니코드들<br>
 18 |      * 총 19자로 구성된다.<br>
 19 |      * <br>
 20 |      * ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ ㅃ ㅅ <br>
 21 |      * ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ ㅌ ㅍ ㅎ<br>
 22 |      * 
 23 |      */
 24 |     public static final char[] UNICODE_CHO_SUNG = {
 25 |             0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 0x3143, 0x3145,
 26 |             0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B, 0x314C, 0x314D, 0x314E
 27 |     };
 28 | 
 29 |     
 30 |     /**
 31 |      * 중성 (21자)<br>
 32 |      * <br>
 33 |      * 중성으로 올 수 있는 유니코드들<br>
 34 |      * 총 21자로 구성된다.<br>
 35 |      * <br>
 36 |      * ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ ㅗ ㅘ <br>
 37 |      * ㅙ ㅚ ㅛ ㅜ ㅝ ㅞ ㅟ ㅠ ㅡ ㅢ <br>
 38 |      * ㅣ<br>
 39 |      * 
 40 |      */
 41 |     public static final char[] UNICODE_JUNG_SUNG = {
 42 |             0x314F, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x3158,
 43 |             0x3159, 0x315A, 0x315B, 0x315C, 0x315D, 0x315E, 0x315F, 0x3160, 0x3161, 0x3162,
 44 |             0x3163
 45 |     };
 46 | 
 47 |     
 48 |     /**
 49 |      * 종성 (28자)<br>
 50 |      * <br>
 51 |      * 종성으로 올 수 있는 유니코드들<br>
 52 |      * 기본 27자와 "빈값"을 표현하는 1자를 합쳐서 총 28자로 구성된다.<br>
 53 |      * <br>
 54 |      *  빈값 ㄱ ㄲ ㄳ ㄴ ㄵ ㄶ ㄷ ㄹ ㄺ <br>
 55 |      *  ㄻ ㄼ ㄽ ㄾ ㄿ ㅀ ㅁ ㅂ ㅄ ㅅ <br>
 56 |      *  ㅆ ㅇ ㅈ ㅊ ㅋ ㅌ ㅍ ㅎ<br>
 57 |      *  
 58 |      */
 59 |     public static final char[] UNICODE_JONG_SUNG = {
 60 |             0x0000, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139, 0x313A,
 61 |             0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141, 0x3142, 0x3144, 0x3145,
 62 |             0x3146, 0x3147, 0x3148, 0x314A, 0x314B, 0x314C, 0x314D, 0x314E
 63 |     };
 64 | 
 65 | 
 66 |     
 67 |     /**
 68 |      * 한글 유니코드의 시작값 (가)<br>
 69 |      * <br>
 70 |      * 한글 유니코드는 0xAC00로 시작하여 0xD79F로 끝난다.<br>
 71 |      * 시작값과 끝값을 벗어난 유니코드는 한글이 아니다.<br>
 72 |      * <br>
 73 |      * 시작값 : 0xAC00 가<br>
 74 |      * 끝값   : 0xD79F 힟<br>
 75 |      */
 76 |     public static final char START_KOREA_UNICODE = 0xAC00;
 77 |     
 78 |     
 79 | 
 80 |     /**
 81 |      * 종성 빈값 유니코드
 82 |      */
 83 |     public static final char UNICODE_JONG_SUNG_EMPTY = 0x0000;
 84 |     
 85 | 
 86 |     
 87 |     
 88 | }
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/common/util/KeyboardUtil.java:
--------------------------------------------------------------------------------
  1 | package org.elasticsearch.index.common.util;
  2 | 
  3 | import java.util.HashMap;
  4 | import java.util.Map;
  5 | 
  6 | import org.elasticsearch.index.common.type.CodeType;
  7 | 
  8 | /**
  9 |  * 한글 키보드 유틸리티
 10 |  * 
 11 |  *
 12 |  * @author hrkim
 13 |  *
 14 |  */
 15 | public class KeyboardUtil {
 16 | 
 17 |     
 18 |     /**
 19 |      * Converter 진행시 무시되는 문자들
 20 |      */
 21 |     public static final String IGNORE_CHAR = "`1234567890-=[]\\;',./~!@#$%^&*()_+{}|:\"<>?\' \' ";
 22 | 
 23 |     
 24 |     
 25 |     /**
 26 |      * 초성 키에 해당하는 키보드상의 영문자 (19자)
 27 |      */
 28 |     public static final String[] KEYBOARD_CHO_SUNG = {
 29 |             "r", "R", "s", "e", "E", "f", "a", "q", "Q", "t", 
 30 |             "T", "d", "w", "W", "c", "z", "x", "v", "g"
 31 |     };
 32 |     
 33 |     /**
 34 |      * 중성 키에 해당하는 키보스상의 영문자 (21자)
 35 |      */
 36 |     public static final String[] KEYBOARD_JUNG_SUNG = {
 37 |             "k", "o", "i", "O", "j", "p", "u", "P", "h", "hk", 
 38 |             "ho", "hl", "y", "n", "nj", "np", "nl", "b", "m", "ml", "l"
 39 |     };
 40 |     
 41 |     /**
 42 |      * 종성 키에 해당하는 키보드상의 영문자 (27자) - "빈값" 제외
 43 |      */
 44 |     public static final String[] KEYBOARD_JONG_SUNG = {
 45 |             "r", "R", "rt", "s", "sw", "sg", "e", "f", "fr", "fa", 
 46 |             "fq", "ft", "fx", "fv", "fg", "a", "q", "qt", "t", "T", 
 47 |             "d", "w", "c", "z", "x", "v", "g"
 48 |     };
 49 |         
 50 | 
 51 | 
 52 |     /**
 53 |      * 키보드상에서 한영키에 의해서 오타 교정이 필요한 키배열 (영문키 33자)
 54 |      */
 55 |     public static final String[] KEYBOARD_KEY_ENG = {
 56 |             "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 
 57 |             "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", 
 58 |             "u", "v", "w", "x", "y", "z", "Q", "W", "E", "R", 
 59 |             "T", "O", "P"
 60 |     };
 61 |     
 62 |     /**
 63 |      * 키보드상에서 한영키에 의해서 오타 교정이 필요한 키배열 (한글키 33자)
 64 |      */
 65 |     public static final String[] KEYBOARD_KEY_KOR  = {
 66 |             "ㅁ", "ㅠ", "ㅊ", "ㅇ", "ㄷ", "ㄹ", "ㅎ", "ㅗ", "ㅑ", "ㅓ", 
 67 |             "ㅏ", "ㅣ", "ㅡ", "ㅜ", "ㅐ", "ㅔ", "ㅂ", "ㄱ", "ㄴ", "ㅅ", 
 68 |             "ㅕ", "ㅍ", "ㅈ", "ㅌ", "ㅛ", "ㅋ", "ㅃ", "ㅉ", "ㄸ", "ㄲ", 
 69 |             "ㅆ", "ㅒ", "ㅖ"
 70 |     };
 71 |    
 72 |     
 73 |     
 74 |     
 75 | 
 76 |     /**
 77 |      * 초성 정보를 제공한다.
 78 |      * 
 79 |      * - 초성과 매칭된 코드 조회 
 80 |      * - 한 자로 이루어진 초성코드만 존재한다.
 81 |      * 
 82 |      * @param index
 83 |      * @param word
 84 |      * @return
 85 |      */
 86 |     public static Map<String, Integer> getInfoForChoSung(int index, String word) {  
 87 |         int code = KeyboardUtil.makeUnicodeIndex(CodeType.CHOSUNG, word.substring(index, index + 1));
 88 |         int idx = index + 1;
 89 |         
 90 |         Map<String, Integer> m = new HashMap<>();
 91 |         m.put("code", code);
 92 |         m.put("idx", idx);
 93 | 
 94 |         return m;
 95 |     }
 96 |     
 97 |     
 98 |     /**
 99 |      * 중성 정보를 제공한다.
100 |      * 
101 |      * - 중성과 매칭된 코드 조회 
102 |      * - 두 자로 이루어진 중성코드가 존재한다.
103 |      * 
104 |      * @param index
105 |      * @param word
106 |      * @return
107 |      */
108 |     public static Map<String, Integer> getInfoForJungSung(int index, String word) {
109 |         int code = KeyboardUtil.getDoubleMedial(index, word);
110 |         int idx = index + 2;
111 |         
112 |         if (-1 == code) {
113 |             code = KeyboardUtil.getSingleMedial(index, word);
114 |             idx = index + 1;                
115 |         }
116 |         
117 |         Map<String, Integer> m = new HashMap<>();
118 |         m.put("code", code);
119 |         m.put("idx", idx);
120 | 
121 |         return m;
122 |     }
123 | 
124 |     
125 |     /**
126 |      * 종성 정보를 제공한다.
127 |      * 
128 |      * - 종성과 매칭된 코드 조회 
129 |      * - 두 자로 이루어진 종성코드가 존재한다.
130 |      * 
131 |      * @param index
132 |      * @param word
133 |      * @return
134 |      */
135 |     public static Map<String, Integer> getInfoForJongSung(int index, String word) {
136 |         int code;
137 |         int idx = index;
138 |         
139 |         int temp = KeyboardUtil.getDoubleFinal(idx, word);
140 |         if (-1 == temp) {
141 |             temp = KeyboardUtil.getSingleMedial(idx + 1, word);
142 |             if (temp != -1) {
143 |                 code = 0;
144 |                 idx--;
145 |             } else {
146 |                 code = KeyboardUtil.getSingleFinal(idx, word);
147 |                 if (code == -1) {
148 |                     code = 0;
149 |                     idx--;
150 |                 }
151 |             }
152 |             
153 |         } else {
154 |             code = temp;
155 |             temp = KeyboardUtil.getSingleMedial(idx + 2, word);
156 |             if (temp != -1) {
157 |                 code = KeyboardUtil.getSingleFinal(idx, word);
158 |             } else {
159 |                 idx++;
160 |             }
161 |             
162 |         }
163 |         
164 |         Map<String, Integer> m = new HashMap<>();
165 |         m.put("code", code);
166 |         m.put("idx", idx);
167 | 
168 |         return m;
169 |     }
170 |     
171 |     
172 |    
173 |     
174 |     /**
175 |      * 1자로 구성된 중성 유니코드 Index를 리턴한다.
176 |      * 
177 |      * @param index
178 |      * @param word
179 |      * @return
180 |      */
181 |     private static int getSingleMedial(int index, String word) {
182 |         if ((index + 1) <= word.length()) {
183 |             return makeUnicodeIndex(CodeType.JUNGSUNG, word.substring(index, index+1));
184 |         } else {
185 |             return -1;
186 |         }
187 |     }
188 | 
189 |     /**
190 |      * 2자로 구성된 중성 유니코드 Index를 리턴한다.
191 |      * 
192 |      * @param index
193 |      * @param word
194 |      * @return
195 |      */
196 |     private static int getDoubleMedial(int index, String word) {
197 |         if ((index + 2) > word.length()) {
198 |             return -1;
199 |         } else {
200 |             return makeUnicodeIndex(CodeType.JUNGSUNG, word.substring(index, index+2));
201 |         }
202 |     }
203 | 
204 |     /**
205 |      * 1자로 구성된 종성 유니코드 Index를 리턴한다.
206 |      * 
207 |      * @param index
208 |      * @param word
209 |      * @return
210 |      */
211 |     private static int getSingleFinal(int index, String word) {
212 |         if ((index + 1) <= word.length())  {
213 |             return makeUnicodeIndex(CodeType.JONGSUNG, word.substring(index, index+1));
214 |         } else {
215 |             return -1;
216 |         }
217 |     }
218 | 
219 |     /**
220 |      * 2자로 구성된 종성 유니코드 Index를 리턴한다.
221 |      * 
222 |      * @param index
223 |      * @param word
224 |      * @return
225 |      */
226 |     private static int getDoubleFinal(int index, String word) {
227 |         if ((index + 2) > word.length()) {
228 |             return -1;
229 |         } else {
230 |             return makeUnicodeIndex(CodeType.JONGSUNG, word.substring(index, index+2));
231 |         }
232 |     }    
233 |     
234 |     
235 |     /**
236 |      * 키보드상에 매칭된 유니코드값 Index를 리턴한다.
237 |      * 
238 |      * @param type
239 |      * @param sub_str
240 |      * @return
241 |      */
242 |     private static int makeUnicodeIndex(CodeType type, String subStr) {
243 |         switch (type) {
244 |             case CHOSUNG:                
245 |                 for (int i=0; i<KEYBOARD_CHO_SUNG.length; i++) {
246 |                     if (KEYBOARD_CHO_SUNG[i].equals(subStr)) {
247 |                         return i * 28 * 21; 
248 |                     }
249 |                 }                
250 |                 break;
251 | 
252 |             case JUNGSUNG:
253 |                 for (int i=0; i<KEYBOARD_JUNG_SUNG.length; i++) {
254 |                     if (KEYBOARD_JUNG_SUNG[i].equals(subStr)) {
255 |                         return i * 28;
256 |                     }
257 |                 }
258 |                 break;
259 | 
260 |             case JONGSUNG:
261 |                 for (int i=0; i<KEYBOARD_JONG_SUNG.length; i++) {
262 |                     if (KEYBOARD_JONG_SUNG[i].equals(subStr)) {
263 |                         return i + 1;
264 |                     }
265 |                 }
266 |                 break;
267 |                 
268 |             default:
269 |                 break;
270 |         }
271 | 
272 |         return -1;
273 |     } 
274 |     
275 |   
276 |     
277 |     
278 | }
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 


--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/plugin/analysis/JavacafePlugin.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.analysis;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | 
 6 | import org.elasticsearch.index.analysis.TokenFilterFactory;
 7 | import org.elasticsearch.index.analysis.chosung.JavacafeChosungTokenFilterFactory;
 8 | import org.elasticsearch.index.analysis.eng2kor.JavacafeEng2KorConvertFilterFactory;
 9 | import org.elasticsearch.index.analysis.jamo.JavacafeJamoTokenFilterFactory;
10 | import org.elasticsearch.index.analysis.kor2eng.JavacafeKor2EngConvertFilterFactory;
11 | import org.elasticsearch.index.analysis.spell.JavacafeSpellFilterFactory;
12 | import org.elasticsearch.indices.analysis.AnalysisModule;
13 | import org.elasticsearch.plugins.AnalysisPlugin;
14 | import org.elasticsearch.plugins.Plugin;
15 | 
16 | /**
17 |  * Javacafe에서 개발한 필터 리스트
18 |  *
19 |  * @author hrkim
20 |  *
21 |  */
22 | public class JavacafePlugin extends Plugin implements AnalysisPlugin {
23 | 
24 |     @Override
25 |     public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {        
26 |         Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>();
27 |         
28 |         // (1) 한글 자모 분석 필터
29 |         extra.put("javacafe_jamo", JavacafeJamoTokenFilterFactory::new);
30 |         
31 |         // (2) 한글 초성 분석 필터
32 |         extra.put("javacafe_chosung", JavacafeChosungTokenFilterFactory::new);
33 |         
34 |         // (3) 영한 오타 변환 필터
35 |         extra.put("javacafe_eng2kor", JavacafeEng2KorConvertFilterFactory::new);
36 |         
37 |         // (4) 한영 오타 변환 필터
38 |         extra.put("javacafe_kor2eng", JavacafeKor2EngConvertFilterFactory::new);
39 | 
40 |         // (5) 한글 스펠링 체크 필터
41 |         extra.put("javacafe_spell", JavacafeSpellFilterFactory::new);
42 |                         
43 |         return extra;
44 |     }
45 | 
46 | }
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/src/main/resources/plugin-descriptor.properties:
--------------------------------------------------------------------------------
1 | name=${project.artifactId}
2 | description=Elasticsearch Javacafe Plugin. 
3 | version=${project.version}
4 | classname=org.elasticsearch.plugin.analysis.JavacafePlugin
5 | elasticsearch.version=${elasticsearch.version}
6 | java.version=1.8
7 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/AbstractPluginTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.esTest;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.StringReader;
 5 | 
 6 | import org.apache.lucene.analysis.TokenStream;
 7 | import org.apache.lucene.analysis.Tokenizer;
 8 | import org.apache.lucene.analysis.standard.StandardTokenizer;
 9 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
10 | import org.elasticsearch.index.analysis.TokenFilterFactory;
11 | import org.elasticsearch.test.ESTestCase;
12 | 
13 | 
14 | public class AbstractPluginTest extends ESTestCase {
15 | 
16 |     
17 |     public void runFilter(TokenFilterFactory myFilter, String source, String[] result) throws IOException {
18 |         init();
19 |         
20 |         // StandardTokenizer 생성
21 |         Tokenizer tokenizer = new StandardTokenizer();
22 |         tokenizer.setReader(new StringReader(source));
23 |         
24 |   
25 |         // 필터를 이용하여 tokenStream 생성
26 |         TokenStream tokenStream = myFilter.create(tokenizer);       
27 |         tokenStream.reset();
28 |         
29 |         CharTermAttribute termAttr = tokenStream.getAttribute(CharTermAttribute.class);
30 |         
31 |         
32 |         // 테스트 시작
33 |         System.out.println("[소스] : " + source);
34 |         
35 |         int i = 0;
36 |         while (tokenStream.incrementToken()) {
37 |             String t = termAttr.toString();
38 |             
39 |             System.out.println("Token[" + i + "] => [예상결과] : " + result[i] + " , [실제결과] : " + t);
40 |             //assertThat("Token 생성이 잘못되었습니다.", t, equalTo(result[i]));
41 |             
42 |             i++;
43 |         }
44 |         
45 |         System.out.println("[결과] 생성된 Token 수 : " + i);
46 |         //assertThat("Token 수가 일치하지 않습니다.", i, equalTo(result.length));
47 |         
48 |         destroy();
49 |     }
50 |     
51 |     
52 |     public void init() {
53 |         System.out.println("-------------------------------");
54 |         System.out.println("테스트를 시작합니다.");
55 |         System.out.println("-------------------------------");
56 |     }
57 |     
58 |     
59 |     public void destroy() {
60 |         System.out.println("-------------------------------");
61 |         System.out.println("테스트를 종료합니다.");
62 |         System.out.println("-------------------------------");
63 |     }
64 |     
65 | }
66 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeChosungTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.esTest;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.index.Index;
 7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
 9 | 
10 | 
11 | public class JavacafeChosungTest extends AbstractPluginTest {
12 | 
13 |     
14 |     /**
15 |      * 초성 필터를 테스트한다.
16 |      * 
17 |      * @throws IOException
18 |      */
19 |     public void testChosungFilter() throws Exception {
20 |                 
21 |         String source = "자바카페 한글";
22 |         
23 |         String[] result = new String[]{
24 |                 "ㅈㅂㅋㅍ", 
25 |                 "ㅎㄱ"
26 |         };
27 |         
28 |         String filterName = "javacafe_chosung";
29 |         
30 | 
31 |         // 실행
32 |         TestAnalysis analysis = createTestAnalysis(
33 |                 new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 |         );
35 |         
36 |         TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 |         runFilter(myFilter, source, result);
38 |     }
39 |     
40 | 
41 | 
42 |     
43 | }
44 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeEng2KorTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.esTest;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.index.Index;
 7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
 9 | 
10 | 
11 | public class JavacafeEng2KorTest extends AbstractPluginTest {
12 | 
13 |     
14 |     /**
15 |      * 영한 오타 변환기를 테스트한다.
16 |      * 
17 |      * @throws IOException
18 |      */
19 |     public void test() throws Exception {
20 |                 
21 |         String source = "wkqkzkvp gksrmf";
22 |         
23 |         String[] result = new String[]{
24 |                 "자바카페", 
25 |                 "한글"
26 |         };
27 |         
28 |         String filterName = "javacafe_eng2kor";
29 |         
30 | 
31 |         // 실행
32 |         TestAnalysis analysis = createTestAnalysis(
33 |                 new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 |         );
35 |         
36 |         TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 |         runFilter(myFilter, source, result);
38 |     }
39 |     
40 |     
41 |     
42 | }
43 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeJamoTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.esTest;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.index.Index;
 7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
 9 | 
10 | 
11 | public class JavacafeJamoTest extends AbstractPluginTest {
12 | 
13 |     
14 |     /**
15 |      * 자모 필터를 테스트한다.
16 |      * 
17 |      * @throws IOException
18 |      */
19 |     public void testJamoFilter() throws Exception {
20 |                 
21 |         String source = "자바카페 한글";
22 |         
23 |         String[] result = new String[]{
24 |                 "ㅈㅏㅂㅏㅋㅏㅍㅔ", 
25 |                 "ㅎㅏㄴㄱㅡㄹ"
26 |         };
27 |         
28 |         String filterName = "javacafe_jamo";
29 |         
30 | 
31 |         // 실행
32 |         TestAnalysis analysis = createTestAnalysis(
33 |                 new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 |         );
35 |         
36 |         TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 |         runFilter(myFilter, source, result);
38 |     }
39 |     
40 | 
41 |     
42 |     
43 |     
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeKor2EngTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.esTest;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.elasticsearch.common.settings.Settings;
 6 | import org.elasticsearch.index.Index;
 7 | import org.elasticsearch.index.analysis.TokenFilterFactory;
 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
 9 | 
10 | 
11 | public class JavacafeKor2EngTest extends AbstractPluginTest {
12 | 
13 |     
14 |     /**
15 |      * 한영 오타 변환기를 테스트한다.
16 |      * 
17 |      * @throws IOException
18 |      */
19 |     public void test() throws Exception {
20 |                 
21 |         String source = "ㅓㅁㅍㅁㅊㅁㄹㄷ ㅑㅔㅗㅐㅜㄷ";
22 |         
23 |         String[] result = new String[]{
24 |                 "javacafe", 
25 |                 "iphone"
26 |         };
27 |         
28 |         String filterName = "javacafe_kor2eng";
29 |         
30 | 
31 |         // 실행
32 |         TestAnalysis analysis = createTestAnalysis(
33 |                 new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
34 |         );
35 |         
36 |         TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
37 |         runFilter(myFilter, source, result);
38 |     }
39 |     
40 |     
41 |     
42 | }
43 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/esTest/JavacafeSpellTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.esTest;
 2 | 
 3 | import org.elasticsearch.common.settings.Settings;
 4 | import org.elasticsearch.index.Index;
 5 | import org.elasticsearch.index.analysis.TokenFilterFactory;
 6 | import org.elasticsearch.plugin.analysis.JavacafePlugin;
 7 | 
 8 | 
 9 | public class JavacafeSpellTest extends AbstractPluginTest {
10 | 
11 |     
12 |     public void test1() throws Exception {
13 |         
14 |         String source = "자바카페 한글";
15 |         
16 |         String[] result = new String[]{
17 |                 "ㅈㅏㅂㅏㅋㅏㅍㅔ", 
18 |                 "ㅎㅏㄴㄱㅡㄹ"
19 |         };
20 |         
21 |         String filterName = "javacafe_spell";
22 |         
23 | 
24 |         // 실행
25 |         TestAnalysis analysis = createTestAnalysis(
26 |                 new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
27 |         );
28 |         
29 |         TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
30 |         runFilter(myFilter, source, result);
31 |     }
32 |     
33 | 
34 |     public void test2() throws Exception {
35 |                 
36 |         String source = "삼성전자";
37 |         
38 |         String[] result = new String[] {
39 |                 "",
40 |                 "ㅅㅏㅁㅅㅓㅇㅈㅓㄴㅈㅏ"
41 |         };
42 |         
43 |         String filterName = "javacafe_spell";
44 |         
45 | 
46 |         // 실행
47 |         TestAnalysis analysis = createTestAnalysis(
48 |                 new Index("test", ""), Settings.builder().build(), new JavacafePlugin()
49 |         );
50 |         
51 |         TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName);
52 |         runFilter(myFilter, source, result);
53 |     }
54 |     
55 | 
56 |     
57 |     
58 |     
59 | }
60 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/luceneTest/TokenTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.luceneTest;
 2 | 
 3 | import static org.junit.Assert.assertTrue;
 4 | 
 5 | import java.io.IOException;
 6 | import java.io.Reader;
 7 | import java.io.StringReader;
 8 | 
 9 | import org.apache.lucene.analysis.TokenStream;
10 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
11 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
12 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
13 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
14 | import org.junit.Test;
15 | 
16 | public class TokenTest {
17 | 
18 | 
19 |     String text = 
20 |             "Every mammal on this planet instinctively develops a natural " + 
21 |             "equilibrium with the surrounding environment; " + 
22 |             "but you humans do not. Instead you multiply, " + 
23 |             "and multiply, until every resource is consumed." +
24 |             
25 |             "The only way for you to survive is to spread to another area. " + 
26 |             
27 |             "There is another organism on this planet that follows the same pattern... a virus.";
28 |     
29 |     String fieldName = "content";
30 |     
31 |     
32 |     @Test
33 |     public void test() throws IOException {
34 | 
35 |         Reader textReader = new StringReader(text);
36 |         
37 |         // 필드명과 텍스트 값을 위한 TokenStream 생성
38 |         StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
39 |         TokenStream tokenStream = standardAnalyzer.tokenStream(fieldName, textReader);
40 | 
41 |         CharTermAttribute terms = tokenStream.addAttribute(CharTermAttribute.class);
42 |         OffsetAttribute offsets = tokenStream.addAttribute(OffsetAttribute.class);
43 |         PositionIncrementAttribute positions = tokenStream.addAttribute(PositionIncrementAttribute.class);
44 |         
45 |         System.out.println("INCR\t(START,\tEND)\tTERM");        
46 |         System.out.println();
47 |         
48 |         tokenStream.reset();
49 |         while (tokenStream.incrementToken()) {
50 |             
51 |             // 다음 토큰을 읽을때마다 attribute 값이 새롭게 세팅되어 제공된다.
52 |             String term = terms.toString();
53 | 
54 |             int increment = positions.getPositionIncrement();
55 |             
56 |             int start = offsets.startOffset();
57 |             int end = offsets.endOffset();
58 |             
59 |             System.out.print(increment + "\t" + "(" + start + ",\t" + end + ")\t" + term);
60 |             System.out.println();
61 |         }
62 |         
63 |         standardAnalyzer.close();
64 |         
65 |         
66 |         assertTrue(true);
67 |     }
68 |     
69 |     
70 | 
71 |     
72 | }
73 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ConverterE2KTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.utilTest;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import org.elasticsearch.index.common.converter.EngToKorConverter;
 6 | import org.junit.Test;
 7 | 
 8 | public class ConverterE2KTest {
 9 |     
10 |     
11 |     @Test
12 |     public void test1() {
13 |         String token = "wkqkzkvp";
14 |         
15 |         EngToKorConverter convert = new EngToKorConverter();
16 |         String result = convert.convert(token);
17 |         
18 |         System.out.println(result);
19 |         assertEquals("자바카페", result);
20 |     }
21 |     
22 | 
23 |     @Test
24 |     public void test2() {
25 |         String token = "tkatjdwjswk";
26 |         
27 |         EngToKorConverter convert = new EngToKorConverter();
28 |         String result = convert.convert(token);
29 |         
30 |         System.out.println(result);
31 |         assertEquals("삼성전자", result);
32 |     }
33 | 
34 |    
35 |     @Test
36 |     public void test3() {
37 |         String token = "gksrmf";
38 |         
39 |         EngToKorConverter convert = new EngToKorConverter();
40 |         String result = convert.convert(token);
41 |         
42 |         System.out.println(result);
43 |         assertEquals("한글", result);
44 |     }
45 |     
46 |     
47 |     @Test
48 |     public void test4() {
49 |         String token = "gksrmf1";
50 |         
51 |         EngToKorConverter convert = new EngToKorConverter();
52 |         String result = convert.convert(token);
53 |         
54 |         System.out.println(result);
55 |         assertEquals("한글1", result);
56 |     }
57 | 
58 |     
59 |     
60 | }
61 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ConverterK2ETest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.utilTest;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import org.elasticsearch.index.common.converter.KorToEngConverter;
 6 | import org.junit.Test;
 7 | 
 8 | public class ConverterK2ETest {
 9 |     
10 |     
11 | 
12 |     @Test
13 |     public void test1() {
14 |         String token = "ㅓㅁㅍㅁㅊㅁㄹㄷ";
15 |         
16 |         KorToEngConverter convert = new KorToEngConverter();
17 |         String result = convert.convert(token);
18 |         
19 |         System.out.println(result);
20 |         assertEquals("javacafe", result);
21 |     }
22 |     
23 |     
24 |     @Test
25 |     public void test2() {
26 |         String token = "ㅑㅔㅗㅐㅜㄷ";
27 |         
28 |         KorToEngConverter convert = new KorToEngConverter();
29 |         String result = convert.convert(token);
30 |         
31 |         System.out.println(result);
32 |         assertEquals("iphone", result);
33 |     }
34 |     
35 |     
36 |     @Test
37 |     public void test3() {
38 |         String token = "재ㅡ무";
39 |         
40 |         KorToEngConverter convert = new KorToEngConverter();
41 |         String result = convert.convert(token);
42 |         
43 |         System.out.println(result);
44 |         assertEquals("woman", result);
45 |     }
46 |     
47 |     
48 |     @Test
49 |     public void test4() {
50 |         String token = "ㄴ므녀ㅜㅎ";
51 |         
52 |         KorToEngConverter convert = new KorToEngConverter();
53 |         String result = convert.convert(token);
54 |         
55 |         System.out.println(result);
56 |         assertEquals("samsung", result);
57 |     }
58 |     
59 | 
60 |     @Test
61 |     public void test5() {
62 |         String token = "ㄴ므녀ㅜㅎ1";
63 |         
64 |         KorToEngConverter convert = new KorToEngConverter();
65 |         String result = convert.convert(token);
66 |         
67 |         System.out.println(result);
68 |         assertEquals("samsung1", result);
69 |     }
70 |     
71 |     
72 |     @Test
73 |     public void test6() {
74 |         String token = "신혼여행(身魂旅行)";
75 |         
76 |         KorToEngConverter convert = new KorToEngConverter();
77 |         String result = convert.convert(token);
78 |         
79 |         System.out.println(result);
80 |         assertEquals("tlsghsdugod()", result);
81 |     }
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/MergerTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.utilTest;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.util.ArrayList;
 6 | import java.util.List;
 7 | 
 8 | import org.elasticsearch.index.common.merger.KoreanMerger;
 9 | import org.junit.Test;
10 | 
11 | public class MergerTest {
12 | 
13 |     
14 |     @Test
15 |     public void mergerTest() throws Exception {
16 |         List<String> jasoList = new ArrayList<>();
17 |         jasoList.add("ㅎ");
18 |         jasoList.add("ㅏ");
19 |         jasoList.add("ㄴ");
20 |         jasoList.add("ㄱ");
21 |         jasoList.add("ㅡ");
22 |         jasoList.add("ㄹ");
23 |         
24 |         KoreanMerger merger = new KoreanMerger();
25 |         String word = merger.merge(jasoList);
26 |         
27 |         System.out.println("결과 : " + word);
28 |         assertEquals("한글", word);
29 |     }
30 |     
31 |     
32 |     @Test
33 |     public void mergerTest2() throws Exception {
34 |         List<String> jasoList = new ArrayList<>();
35 |         jasoList.add("ㅈ");
36 |         jasoList.add("ㅏ");
37 |         jasoList.add("ㅂ");
38 |         jasoList.add("ㅏ");
39 |         jasoList.add("ㅋ");
40 |         jasoList.add("ㅏ");
41 |         jasoList.add("ㅍ");
42 |         jasoList.add("ㅔ");
43 |         
44 |         KoreanMerger merger = new KoreanMerger();
45 |         String word = merger.merge(jasoList);
46 |         
47 |         System.out.println("결과 : " + word);
48 |         assertEquals("자바카페", word);
49 |     }
50 | 
51 |     
52 |     @Test
53 |     public void mergerTest3() throws Exception {
54 |         List<String> jasoList = new ArrayList<>();
55 |         jasoList.add("ㅅ");
56 |         jasoList.add("ㅏ");
57 |         jasoList.add("ㅁ");
58 |         jasoList.add("ㅅ");
59 |         jasoList.add("ㅓ");
60 |         jasoList.add("ㅇ");
61 |         jasoList.add("ㅈ");
62 |         jasoList.add("ㅓ");
63 |         jasoList.add("ㄴ");
64 |         jasoList.add("ㅈ");
65 |         jasoList.add("ㅏ");
66 |         
67 |         KoreanMerger merger = new KoreanMerger();
68 |         String word = merger.merge(jasoList);
69 |         
70 |         System.out.println("결과 : " + word);
71 |         assertEquals("삼성전자", word);
72 |     }
73 |     
74 | }
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ParserChosungTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.utilTest;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import org.elasticsearch.index.common.parser.KoreanChosungParser;
 6 | import org.junit.Test;
 7 | 
 8 | public class ParserChosungTest {
 9 |     
10 |     
11 |     @Test
12 |     public void chosungTest() {
13 |         String token = "자바카페";
14 |         KoreanChosungParser parser = new KoreanChosungParser();
15 |         String result = parser.parse(token);
16 | 
17 |         System.out.println(result);
18 |         assertEquals("ㅈㅂㅋㅍ", result);
19 |     }
20 |     
21 | 
22 |     @Test
23 |     public void chosungTest2() {
24 |         String token = "삼성전자";
25 |         KoreanChosungParser parser = new KoreanChosungParser();
26 |         String result = parser.parse(token);
27 | 
28 |         System.out.println(result);
29 |         assertEquals("ㅅㅅㅈㅈ", result);
30 |     }
31 |     
32 | 
33 |     
34 |     
35 |     
36 | }
37 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/ParserJamoTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.utilTest;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
 6 | import org.junit.Test;
 7 | 
 8 | public class ParserJamoTest {
 9 | 
10 | 
11 |     @Test
12 |     public void jamoTest() {
13 |         String token = "자바카페";
14 |         KoreanJamoParser parser = new KoreanJamoParser();
15 |         String result = parser.parse(token);
16 | 
17 |         System.out.println(result);
18 |         assertEquals("ㅈㅏㅂㅏㅋㅏㅍㅔ", result);
19 |     }
20 |     
21 |     
22 |     @Test
23 |     public void jamoTest2() {
24 |         String token = "삼성전자";
25 |         KoreanJamoParser parser = new KoreanJamoParser();
26 |         String result = parser.parse(token);
27 | 
28 |         System.out.println(result);
29 |         assertEquals("ㅅㅏㅁㅅㅓㅇㅈㅓㄴㅈㅏ", result);
30 |     }
31 |     
32 | }
33 | 


--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/plugin/utilTest/SpellCheckTest.java:
--------------------------------------------------------------------------------
 1 | package org.elasticsearch.plugin.utilTest;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import org.elasticsearch.index.common.parser.KoreanJamoParser;
 6 | import org.junit.Test;
 7 | 
 8 | public class SpellCheckTest {
 9 | 
10 | 
11 |     @Test
12 |     public void spellTest() {
13 |         String token = "자바카페";
14 |         KoreanJamoParser parser = new KoreanJamoParser();
15 |         String result = parser.parse(token);
16 | 
17 |         System.out.println(result);
18 |         assertEquals("ㅈㅏㅂㅏㅋㅏㅍㅔ", result);
19 |     }
20 |     
21 | 
22 |     
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------