├── .gitignore ├── README.md ├── pom.xml ├── release.xml └── src ├── main ├── java │ └── org │ │ ├── apache │ │ └── solr │ │ │ └── index │ │ │ └── analysis │ │ │ ├── chosung │ │ │ └── JavacafeChosungTokenFilterFactory.java │ │ │ ├── eng2kor │ │ │ └── JavacafeEng2KorConvertFilterFactory.java │ │ │ ├── jamo │ │ │ └── JavacafeJamoTokenFilterFactory.java │ │ │ └── kor2eng │ │ │ └── JavacafeKor2EngConvertFilterFactory.java │ │ └── elasticsearch │ │ ├── index │ │ ├── analysis │ │ │ ├── chosung │ │ │ │ ├── JavacafeChosungTokenFilter.java │ │ │ │ └── JavacafeChosungTokenFilterFactory.java │ │ │ ├── eng2kor │ │ │ │ ├── JavacafeEng2KorConvertFilter.java │ │ │ │ ├── JavacafeEng2KorConvertFilter2.java │ │ │ │ ├── JavacafeEng2KorConvertFilter3.java │ │ │ │ └── JavacafeEng2KorConvertFilterFactory.java │ │ │ ├── jamo │ │ │ │ ├── JavacafeJamoTokenFilter.java │ │ │ │ └── JavacafeJamoTokenFilterFactory.java │ │ │ ├── kor2eng │ │ │ │ ├── JavacafeKor2EngConvertFilter.java │ │ │ │ └── JavacafeKor2EngConvertFilterFactory.java │ │ │ └── spell │ │ │ │ ├── JavacafeSpellFilter.java │ │ │ │ └── JavacafeSpellFilterFactory.java │ │ └── common │ │ │ ├── converter │ │ │ ├── EngToKorConverter.java │ │ │ └── KorToEngConverter.java │ │ │ ├── merger │ │ │ └── KoreanMerger.java │ │ │ ├── parser │ │ │ ├── AbstractKoreanParser.java │ │ │ ├── KoreanChosungParser.java │ │ │ └── KoreanJamoParser.java │ │ │ ├── type │ │ │ └── CodeType.java │ │ │ └── util │ │ │ ├── HangulUtil.java │ │ │ ├── JamoUtil.java │ │ │ └── KeyboardUtil.java │ │ └── plugin │ │ └── analysis │ │ └── JavacafePlugin.java └── resources │ └── plugin-descriptor.properties └── test └── java └── org └── elasticsearch └── plugin ├── esTest ├── AbstractPluginTest.java ├── JavacafeChosungTest.java ├── JavacafeEng2KorTest.java ├── JavacafeJamoTest.java ├── JavacafeKor2EngTest.java └── JavacafeSpellTest.java ├── luceneTest └── TokenTest.java └── utilTest ├── ConverterE2KTest.java ├── ConverterK2ETest.java ├── MergerTest.java ├── ParserChosungTest.java ├── ParserJamoTest.java └── SpellCheckTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | ### Eclipse ### 2 | *.pydevproject 3 | .metadata 4 | .gradle 5 | bin/ 6 | tmp/ 7 | target/ 8 | *.tmp 9 | *.bak 10 | *.swp 11 | *~.nib 12 | local.properties 13 | .settings/ 14 | .loadpath 15 | .factorypath 16 | .classpath 17 | .project 18 | logs/ 19 | .idea 20 | work/Tomcat/ 21 | 22 | # Spring 23 | .springBeans 24 | 25 | # External tool builders 26 | .externalToolBuilders/ 27 | 28 | # Locally stored ��Eclipse launch configurations�� 29 | *.launch 30 | 31 | # CDT-specific 32 | .cproject 33 | 34 | # PDT-specific 35 | .buildpath 36 | 37 | # sbteclipse plugin 38 | .target 39 | 40 | # TeXlipse plugin 41 | .texlipse 42 | 43 | ### Maven ### 44 | pom.xml.tag 45 | pom.xml.releaseBackup 46 | pom.xml.versionsBackup 47 | pom.xml.next 48 | release.properties 49 | 50 | ### Java ### 51 | *.class 52 | 53 | # Mobile Tools for Java (J2ME) 54 | .mtj.tmp/ 55 | 56 | # Package Files # 57 | *.jar 58 | *.war 59 | *.ear 60 | 61 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 62 | hs_err_pid* 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elasticsearch-plugin 2 | 자바카페 Elasticsearch 플러그인 3 | 4 | [elasticsearch-plugin](https://github.com/javacafe-project/elasticsearch-plugin)은 사용자가 한글을 검색하기 쉽게 만들어진 플러그인 입니다. 5 | 6 | > 링크 다운로드 7 | > 8 | >[7.0.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v7.0.0) 9 | > 10 | >[6.7.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.7.0) 11 | > 12 | >[6.6.2](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.2) 13 | > 14 | >[6.6.1](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.1) 15 | > 16 | >[6.6.0](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.6.0) 17 | > 18 | >[6.5.4](https://github.com/javacafe-project/elasticsearch-plugin/releases/tag/v6.5.4) 19 | 20 | # 설치방법 21 | >~~~~ 22 | >elasticsearch-plugin install https://github.com/javacafe-project/elastic-book-etc/raw/master/plugin/javacafe-analyzer-6.4.3.zip 23 | > 24 | 25 | # 제공기능 26 | 27 | 엘라스틱서치 혹은 솔라의 최신버전에서 사용가능한 한글기반의 자동완성/검색결과를 더욱 효율적으로 사용하기 위해 개발된 플러그인 이며 아래와 같은 기능을 제공합니다. 28 | 29 | ## 초성추출 30 | 검색어로 들어오는 단어가 초성인 경우 검색 결과 혹은 자동완성의 결과를 초성으로 매칭하여 검색되게 하는 플러그인 입니다. 31 | 32 | ### 사용방법 33 | 34 | 35 | ## 자소분해 36 | 자동완성에서 한글을 검색 가능한 형태로 변형하는 플러그인 입니다. 예를 들어 삼성전자의 경우 삼ㅅ만 검색하여도 삼성전자가 검색 될수 있도록 한글의 자소를 분해하여 검색 할 수 있도록 합니다. 37 | 38 | ### 사용방법 39 | 40 | 41 | ## 한영/영한 오타교정 42 | 한글을 영문으로, 영문을 한글로 검색한 결과를 보정해주는 플러그인 입니다. 예를들어 삼성전자를 tkatjdwjswk 라고 검색하거나 ㅑㅔㅙㅜㄷ와 같이 iphone 을 잘못 검색한 경우 검색 결과를 도출 할수 있도록 도와줍니다. 43 | 44 | ### 사용방법 45 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 4.0.0 6 | 7 | org.elasticsearch.plugin 8 | javacafe-analyzer 9 | 1.0 10 | 11 | jar 12 | 13 | 14 | 15 | 6.4.3 16 | 17 | 7.2.1 18 | 19 | 20 | 21 | 22 | 23 | 24 | org.elasticsearch 25 | elasticsearch 26 | ${elasticsearch.version} 27 | provided 28 | 29 | 30 | org.apache.solr 31 | solr-core 32 | ${lucene.version} 33 | jar 34 | provided 35 | 36 | 37 | org.apache.lucene 38 | lucene-core 39 | ${lucene.version} 40 | provided 41 | 42 | 43 | 44 | org.apache.commons 45 | commons-lang3 46 | 3.5 47 | 48 | 49 | 50 | org.apache.logging.log4j 51 | log4j-core 52 | 2.16.0 53 | provided 54 | 55 | 56 | 57 | 58 | 59 | org.elasticsearch.test 60 | framework 61 | ${elasticsearch.version} 62 | provided 63 | 64 | 65 | junit 66 | junit 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | junit 75 | junit 76 | 4.11 77 | provided 78 | 79 | 80 | org.hamcrest 81 | hamcrest-core 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | javacafe-analyzer-${elasticsearch.version} 92 | 93 | 94 | src/main/resources 95 | false 96 | 97 | *.properties 98 | 99 | 100 | 101 | 102 | 103 | org.apache.maven.plugins 104 | maven-compiler-plugin 105 | 106 | 1.8 107 | 1.8 108 | UTF-8 109 | 110 | 111 | 112 | org.apache.maven.plugins 113 | maven-dependency-plugin 114 | 3.0.0 115 | 116 | 117 | copy-dependencies 118 | package 119 | 120 | copy-dependencies 121 | 122 | 123 | ${project.build.directory}/lib 124 | 125 | 126 | 127 | 128 | 129 | org.apache.maven.plugins 130 | maven-surefire-plugin 131 | 2.12.1 132 | 133 | -Dtests.security.manager=false 134 | true 135 | 136 | 137 | 138 | org.apache.maven.plugins 139 | maven-assembly-plugin 140 | 3.0.0 141 | 142 | false 143 | ${project.build.directory}/releases 144 | 145 | release.xml 146 | 147 | 148 | 149 | 150 | zip-with-dependencies 151 | package 152 | 153 | single 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /release.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | zip-with-dependencies 6 | 7 | zip 8 | 9 | false 10 | 11 | 12 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 13 | 14 | true 15 | 16 | 17 | 18 | 19 | 20 | true 21 | true 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/main/java/org/apache/solr/index/analysis/chosung/JavacafeChosungTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.apache.solr.index.analysis.chosung; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.apache.lucene.analysis.util.TokenFilterFactory; 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.env.Environment; 7 | import org.elasticsearch.index.IndexSettings; 8 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 9 | import org.elasticsearch.index.analysis.chosung.JavacafeChosungTokenFilter; 10 | 11 | import java.util.Map; 12 | /** 13 | * 14 | * 15 | * 16 | * 17 | * 18 | * 19 | * 20 | * 21 | * 22 | * 23 | * 24 | * 25 | * 26 | * 27 | * 28 | * 29 | * 30 | * 31 | * 32 | * */ 33 | public class JavacafeChosungTokenFilterFactory extends TokenFilterFactory { 34 | 35 | 36 | public JavacafeChosungTokenFilterFactory(Map args) { 37 | super(args); 38 | } 39 | 40 | @Override 41 | public TokenStream create(TokenStream stream) { 42 | return new JavacafeChosungTokenFilter(stream); 43 | } 44 | 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/apache/solr/index/analysis/eng2kor/JavacafeEng2KorConvertFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.apache.solr.index.analysis.eng2kor; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.apache.lucene.analysis.util.TokenFilterFactory; 5 | import org.elasticsearch.index.analysis.eng2kor.JavacafeEng2KorConvertFilter; 6 | 7 | import java.util.Map; 8 | /** 9 | * 10 | * 11 | * 12 | * 13 | * 14 | * 15 | * 16 | * 17 | * 18 | * 19 | * 20 | * 21 | * 22 | * 23 | * 24 | * 25 | * 26 | * */ 27 | public class JavacafeEng2KorConvertFilterFactory extends TokenFilterFactory { 28 | 29 | 30 | public JavacafeEng2KorConvertFilterFactory(Map args) { 31 | super(args); 32 | } 33 | 34 | 35 | @Override 36 | public TokenStream create(TokenStream tokenStream) { 37 | return new JavacafeEng2KorConvertFilter(tokenStream); 38 | } 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/org/apache/solr/index/analysis/jamo/JavacafeJamoTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.apache.solr.index.analysis.jamo; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.apache.lucene.analysis.util.TokenFilterFactory; 5 | import org.elasticsearch.index.analysis.jamo.JavacafeJamoTokenFilter; 6 | 7 | import java.util.Map; 8 | 9 | public class JavacafeJamoTokenFilterFactory extends TokenFilterFactory { 10 | 11 | 12 | public JavacafeJamoTokenFilterFactory(Map args) { 13 | super(args); 14 | } 15 | 16 | 17 | @Override 18 | public TokenStream create(TokenStream stream) { 19 | return new JavacafeJamoTokenFilter(stream); 20 | } 21 | 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/solr/index/analysis/kor2eng/JavacafeKor2EngConvertFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.apache.solr.index.analysis.kor2eng; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.apache.lucene.analysis.util.TokenFilterFactory; 5 | import org.elasticsearch.index.analysis.kor2eng.JavacafeKor2EngConvertFilter; 6 | 7 | import java.util.Map; 8 | 9 | /** 10 | * 11 | * 12 | * 13 | * 14 | * 15 | * 16 | * 17 | * 18 | * 19 | * 20 | * 21 | * 22 | * 23 | * 24 | * 25 | * 26 | * 27 | * */ 28 | public class JavacafeKor2EngConvertFilterFactory extends TokenFilterFactory { 29 | 30 | 31 | public JavacafeKor2EngConvertFilterFactory(Map args) { 32 | super(args); 33 | } 34 | 35 | 36 | @Override 37 | public TokenStream create(TokenStream tokenStream) { 38 | return new JavacafeKor2EngConvertFilter(tokenStream); 39 | } 40 | 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/chosung/JavacafeChosungTokenFilter.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.chosung; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.elasticsearch.index.common.parser.KoreanChosungParser; 9 | 10 | /** 11 | * 한글 초성 분석 필터 12 | * 13 | * @author hrkim 14 | * 15 | */ 16 | public final class JavacafeChosungTokenFilter extends TokenFilter { 17 | 18 | private KoreanChosungParser parser; 19 | private CharTermAttribute termAtt; 20 | 21 | 22 | public JavacafeChosungTokenFilter(TokenStream stream) { 23 | super(stream); 24 | this.parser = new KoreanChosungParser(); 25 | this.termAtt = addAttribute(CharTermAttribute.class); 26 | } 27 | 28 | 29 | /** 30 | * 한글 초성 Parser를 이용하여 토큰을 파싱하고 Term을 구한다. 31 | */ 32 | @Override 33 | public boolean incrementToken() throws IOException { 34 | 35 | if (input.incrementToken()) { 36 | CharSequence parserdData = parser.parse(termAtt.toString()); 37 | termAtt.setEmpty(); 38 | termAtt.append(parserdData); 39 | 40 | return true; 41 | } 42 | 43 | return false; 44 | } 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/chosung/JavacafeChosungTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.chosung; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.settings.Settings; 5 | import org.elasticsearch.env.Environment; 6 | import org.elasticsearch.index.IndexSettings; 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 8 | 9 | public class JavacafeChosungTokenFilterFactory extends AbstractTokenFilterFactory { 10 | 11 | 12 | public JavacafeChosungTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { 13 | super(indexSettings, name, settings); 14 | } 15 | 16 | 17 | @Override 18 | public TokenStream create(TokenStream stream) { 19 | return new JavacafeChosungTokenFilter(stream); 20 | } 21 | 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.eng2kor; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.elasticsearch.index.common.converter.EngToKorConverter; 9 | 10 | /** 11 | * 영한 변환 필터 12 | * 13 | * @author hrkim 14 | * 15 | */ 16 | public final class JavacafeEng2KorConvertFilter extends TokenFilter { 17 | 18 | private EngToKorConverter converter; 19 | private CharTermAttribute termAtt; 20 | 21 | 22 | public JavacafeEng2KorConvertFilter(TokenStream stream) { 23 | super(stream); 24 | this.converter = new EngToKorConverter(); 25 | this.termAtt = addAttribute(CharTermAttribute.class); 26 | } 27 | 28 | 29 | @Override 30 | public boolean incrementToken() throws IOException { 31 | 32 | if (input.incrementToken()) { 33 | CharSequence parserdData = converter.convert(termAtt.toString()); 34 | termAtt.setEmpty(); 35 | termAtt.append(parserdData); 36 | 37 | return true; 38 | } 39 | 40 | return false; 41 | } 42 | 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter2.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.eng2kor; 2 | 3 | import org.apache.lucene.analysis.TokenFilter; 4 | import org.apache.lucene.analysis.TokenStream; 5 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 6 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 7 | import org.elasticsearch.index.common.converter.EngToKorConverter; 8 | 9 | import java.io.IOException; 10 | import java.util.LinkedList; 11 | import java.util.Queue; 12 | 13 | /** 14 | * 영한 변환 필터 15 | * 16 | * @author hrkim 17 | * 18 | */ 19 | public final class JavacafeEng2KorConvertFilter2 extends TokenFilter { 20 | 21 | private EngToKorConverter converter; 22 | private CharTermAttribute termAtt; 23 | private PositionIncrementAttribute positionIncrementAttribute; 24 | 25 | private Queue simpleQueue; 26 | 27 | 28 | public JavacafeEng2KorConvertFilter2(TokenStream stream) { 29 | super(stream); 30 | 31 | this.converter = new EngToKorConverter(); 32 | this.termAtt = addAttribute(CharTermAttribute.class); 33 | this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); 34 | 35 | this.simpleQueue = new LinkedList(); 36 | } 37 | 38 | 39 | @Override 40 | public boolean incrementToken() throws IOException { 41 | 42 | if (!simpleQueue.isEmpty()) { 43 | char[] buffer = simpleQueue.poll(); 44 | termAtt.setEmpty(); 45 | termAtt.copyBuffer(buffer, 0, buffer.length); 46 | positionIncrementAttribute.setPositionIncrement(0); 47 | 48 | return true; 49 | } 50 | 51 | if (!input.incrementToken()) { 52 | return false; 53 | 54 | } else { 55 | String result = converter.convert(termAtt.toString()); 56 | simpleQueue.add(result.toCharArray()); 57 | return true; 58 | } 59 | } 60 | 61 | 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilter3.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.eng2kor; 2 | 3 | import org.apache.lucene.analysis.TokenFilter; 4 | import org.apache.lucene.analysis.TokenStream; 5 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 6 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 7 | import org.elasticsearch.index.common.converter.EngToKorConverter; 8 | 9 | import java.io.IOException; 10 | import java.util.LinkedList; 11 | import java.util.Queue; 12 | 13 | /** 14 | * 영한 변환 필터 15 | * 16 | * @author hrkim 17 | * 18 | */ 19 | public final class JavacafeEng2KorConvertFilter3 extends TokenFilter { 20 | 21 | private EngToKorConverter converter; 22 | private CharTermAttribute termAtt; 23 | 24 | private PositionIncrementAttribute positionIncrementAttribute; 25 | private Queue simpleQueue; 26 | 27 | 28 | public JavacafeEng2KorConvertFilter3(TokenStream stream) { 29 | super(stream); 30 | this.converter = new EngToKorConverter(); 31 | this.termAtt = addAttribute(CharTermAttribute.class); 32 | 33 | this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); 34 | 35 | this.simpleQueue = new LinkedList(); 36 | } 37 | 38 | 39 | @Override 40 | public boolean incrementToken() throws IOException { 41 | 42 | if (!simpleQueue.isEmpty()) { 43 | char[] buffer = simpleQueue.poll(); 44 | termAtt.setEmpty(); 45 | termAtt.copyBuffer(buffer, 0, buffer.length); 46 | 47 | positionIncrementAttribute.setPositionIncrement(0); 48 | 49 | return true; 50 | } 51 | 52 | if (input.incrementToken()) { 53 | String result = converter.convert(termAtt.toString()); 54 | simpleQueue.add(result.toCharArray()); 55 | 56 | return true; 57 | } 58 | 59 | return false; 60 | } 61 | 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/eng2kor/JavacafeEng2KorConvertFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.eng2kor; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.settings.Settings; 5 | import org.elasticsearch.env.Environment; 6 | import org.elasticsearch.index.IndexSettings; 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 8 | 9 | 10 | public class JavacafeEng2KorConvertFilterFactory extends AbstractTokenFilterFactory { 11 | 12 | 13 | public JavacafeEng2KorConvertFilterFactory(IndexSettings indexSettings, Environment env , String name, Settings settings) { 14 | super(indexSettings, name, settings); 15 | } 16 | 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new JavacafeEng2KorConvertFilter(tokenStream); 21 | } 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/jamo/JavacafeJamoTokenFilter.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.jamo; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.elasticsearch.index.common.parser.KoreanJamoParser; 9 | 10 | /** 11 | * 한글 자모 분석 필터 12 | * 13 | * @author hrkim 14 | * 15 | */ 16 | public final class JavacafeJamoTokenFilter extends TokenFilter { 17 | 18 | private KoreanJamoParser parser; 19 | private CharTermAttribute termAtt; 20 | 21 | 22 | public JavacafeJamoTokenFilter(TokenStream stream) { 23 | super(stream); 24 | this.parser = new KoreanJamoParser(); 25 | this.termAtt = addAttribute(CharTermAttribute.class); 26 | } 27 | 28 | 29 | /** 30 | * 한글 자모 Parser를 이용하여 토큰을 파싱하고 Term을 구한다. 31 | */ 32 | @Override 33 | public boolean incrementToken() throws IOException { 34 | 35 | if (input.incrementToken()) { 36 | CharSequence parserdData = parser.parse(termAtt.toString()); 37 | termAtt.setEmpty(); 38 | termAtt.append(parserdData); 39 | 40 | return true; 41 | } 42 | 43 | return false; 44 | } 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/jamo/JavacafeJamoTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.jamo; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.settings.Settings; 5 | import org.elasticsearch.env.Environment; 6 | import org.elasticsearch.index.IndexSettings; 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 8 | 9 | public class JavacafeJamoTokenFilterFactory extends AbstractTokenFilterFactory { 10 | 11 | 12 | public JavacafeJamoTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { 13 | super(indexSettings, name, settings); 14 | } 15 | 16 | 17 | @Override 18 | public TokenStream create(TokenStream stream) { 19 | return new JavacafeJamoTokenFilter(stream); 20 | } 21 | 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/kor2eng/JavacafeKor2EngConvertFilter.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.kor2eng; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.elasticsearch.index.common.converter.KorToEngConverter; 9 | 10 | /** 11 | * 한영 변환 필터 12 | * 13 | * @author hrkim 14 | * 15 | */ 16 | public final class JavacafeKor2EngConvertFilter extends TokenFilter { 17 | 18 | private KorToEngConverter converter; 19 | private CharTermAttribute termAtt; 20 | 21 | 22 | public JavacafeKor2EngConvertFilter(TokenStream stream) { 23 | super(stream); 24 | this.converter = new KorToEngConverter(); 25 | this.termAtt = addAttribute(CharTermAttribute.class); 26 | } 27 | 28 | 29 | @Override 30 | public boolean incrementToken() throws IOException { 31 | 32 | if (input.incrementToken()) { 33 | CharSequence parserdData = converter.convert(termAtt.toString()); 34 | termAtt.setEmpty(); 35 | termAtt.append(parserdData); 36 | 37 | return true; 38 | } 39 | 40 | return false; 41 | } 42 | 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/kor2eng/JavacafeKor2EngConvertFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.kor2eng; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.settings.Settings; 5 | import org.elasticsearch.env.Environment; 6 | import org.elasticsearch.index.IndexSettings; 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 8 | 9 | 10 | public class JavacafeKor2EngConvertFilterFactory extends AbstractTokenFilterFactory { 11 | 12 | 13 | public JavacafeKor2EngConvertFilterFactory(IndexSettings indexSettings, Environment env , String name, Settings settings) { 14 | super(indexSettings, name, settings); 15 | } 16 | 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new JavacafeKor2EngConvertFilter(tokenStream); 21 | } 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/spell/JavacafeSpellFilter.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.spell; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.elasticsearch.index.common.parser.KoreanJamoParser; 9 | 10 | /** 11 | * 스펠링 체크 필터 12 | * 13 | * @author hrkim 14 | * 15 | */ 16 | public final class JavacafeSpellFilter extends TokenFilter { 17 | 18 | private KoreanJamoParser parser; 19 | private CharTermAttribute termAtt; 20 | 21 | 22 | public JavacafeSpellFilter(TokenStream stream) { 23 | super(stream); 24 | this.parser = new KoreanJamoParser(); 25 | this.termAtt = addAttribute(CharTermAttribute.class); 26 | } 27 | 28 | 29 | /** 30 | * 한글 자모 Parser를 이용하여 토큰을 파싱하고 Term을 구한다. 31 | */ 32 | @Override 33 | public boolean incrementToken() throws IOException { 34 | 35 | if (input.incrementToken()) { 36 | CharSequence parserdData = parser.parse(termAtt.toString()); 37 | termAtt.setEmpty(); 38 | termAtt.append(parserdData); 39 | 40 | return true; 41 | } 42 | 43 | return false; 44 | } 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/spell/JavacafeSpellFilterFactory.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.analysis.spell; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.settings.Settings; 5 | import org.elasticsearch.env.Environment; 6 | import org.elasticsearch.index.IndexSettings; 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 8 | 9 | public class JavacafeSpellFilterFactory extends AbstractTokenFilterFactory { 10 | 11 | 12 | public JavacafeSpellFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { 13 | super(indexSettings, name, settings); 14 | } 15 | 16 | 17 | @Override 18 | public TokenStream create(TokenStream stream) { 19 | return new JavacafeSpellFilter(stream); 20 | } 21 | 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/converter/EngToKorConverter.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.converter; 2 | 3 | import java.util.Map; 4 | 5 | import org.elasticsearch.index.common.util.JamoUtil; 6 | import org.elasticsearch.index.common.util.KeyboardUtil; 7 | 8 | /** 9 | * 영한 오타 변환기 (Eng -> Kor) 10 | * 11 | * @author hrkim 12 | * 13 | */ 14 | public class EngToKorConverter { 15 | 16 | 17 | /** 18 | * 토큰을 영문 키보드 기준으로 변환한다. 19 | * 20 | * @param token 21 | * @return 22 | */ 23 | public String convert(String token) { 24 | StringBuilder sb = new StringBuilder(); 25 | 26 | // 문자열을 한글자씩 잘라서 처리한다. 27 | String word = token.trim(); 28 | for (int index = 0; index < word.length(); index++) { 29 | 30 | // 처리 불가능한 글자는 그냥 넘긴다. 31 | if (KeyboardUtil.IGNORE_CHAR.indexOf(word.substring(index, index + 1)) > -1) { 32 | sb.append(word.substring(index, index + 1)); 33 | index++; 34 | } 35 | if (index >= word.length()) { 36 | break; 37 | } 38 | 39 | try { 40 | // 초성 정보를 구한다. 41 | Map mChoSung = KeyboardUtil.getInfoForChoSung(index, word); 42 | int cho = mChoSung.get("code"); 43 | index = mChoSung.get("idx"); 44 | 45 | // 중성 정보를 구한다. 46 | Map mJungSung = KeyboardUtil.getInfoForJungSung(index, word); 47 | int jung = mJungSung.get("code"); 48 | index = mJungSung.get("idx"); 49 | 50 | // 종성 정보를 구한다. 51 | Map mJongSung = KeyboardUtil.getInfoForJongSung(index, word); 52 | int jong = mJongSung.get("code"); 53 | index = mJongSung.get("idx"); 54 | 55 | // 한글 유니코드를 생성한다. 56 | sb.append((char) (JamoUtil.START_KOREA_UNICODE + cho + jung + jong)); 57 | 58 | } catch(Exception e) {} 59 | } 60 | 61 | return sb.toString(); 62 | } 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/converter/KorToEngConverter.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.converter; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | import org.elasticsearch.index.common.type.CodeType; 5 | import org.elasticsearch.index.common.util.JamoUtil; 6 | import org.elasticsearch.index.common.util.KeyboardUtil; 7 | 8 | /** 9 | * 한영 오타 변환기 (Kor -> Eng) 10 | * 11 | * @author hrkim 12 | * 13 | */ 14 | public class KorToEngConverter { 15 | 16 | 17 | /** 18 | * 토큰을 한글 키보드 기준으로 변환한다. 19 | * 20 | * @param token 21 | * @return 22 | */ 23 | public String convert(String token) { 24 | StringBuilder sb = new StringBuilder(); 25 | 26 | // 문자열을 한글자씩 잘라서 처리한다. 27 | String word = token.trim(); 28 | for (int index = 0; index < word.length(); index++) { 29 | 30 | // 처리 불가능한 글자는 그냥 넘긴다. 31 | if (KeyboardUtil.IGNORE_CHAR.indexOf(word.substring(index, index + 1)) > -1) { 32 | sb.append(word.substring(index, index + 1)); 33 | index++; 34 | } 35 | if (index >= word.length()) { 36 | break; 37 | } 38 | 39 | try { 40 | int init = word.charAt(index); 41 | int initUnicode = init - JamoUtil.START_KOREA_UNICODE; 42 | 43 | if (initUnicode > 0) { 44 | /** 45 | * 1글자로 조합형 한글이 들어올 경우 처리 46 | */ 47 | int cho = initUnicode / 21 / 28; // 0 ~ 18 48 | String strCho = getSameEngChar(CodeType.CHOSUNG, cho); 49 | if (StringUtils.isNotEmpty(strCho)) { 50 | sb.append(strCho); 51 | } 52 | 53 | 54 | int jung = initUnicode / 28 % 21; // 0 ~ 20 55 | String strJung = getSameEngChar(CodeType.JUNGSUNG, jung); 56 | if (StringUtils.isNotEmpty(strJung)) { 57 | sb.append(strJung); 58 | } 59 | 60 | int jong = initUnicode % 28; // 0 ~ 27 61 | String strJong = getSameEngChar(CodeType.JONGSUNG, jong); 62 | if (StringUtils.isNotEmpty(strJong)) { 63 | sb.append(strJong); 64 | } 65 | 66 | } else { 67 | /** 68 | * 1글자로 자모가 들어올 경우 처리 69 | */ 70 | String subStr = String.valueOf((char) init); 71 | sb.append(getSameEngCharForJamo(subStr, 0)); 72 | } 73 | } catch(Exception e) {} 74 | } 75 | 76 | return sb.toString(); 77 | } 78 | 79 | 80 | 81 | 82 | private String getSameEngChar(CodeType type, int pos) { 83 | switch (type) { 84 | case CHOSUNG: 85 | return KeyboardUtil.KEYBOARD_CHO_SUNG[pos]; 86 | 87 | case JUNGSUNG: 88 | return KeyboardUtil.KEYBOARD_JUNG_SUNG[pos]; 89 | 90 | case JONGSUNG: 91 | if ((pos - 1) > -1) { 92 | return KeyboardUtil.KEYBOARD_JONG_SUNG[pos - 1]; 93 | } 94 | return ""; 95 | } 96 | 97 | return ""; 98 | } 99 | 100 | 101 | private String getSameEngCharForJamo(String key, int pos) { 102 | for (int i=0; i jamoList) throws Exception { 22 | String result = ""; 23 | 24 | if (jamoList.size() == 0) { 25 | return ""; 26 | } 27 | 28 | int jungSungSize = HangulUtil.JUNG_SUNG.length; 29 | int jongSungSize = HangulUtil.JONG_SUNG.length; 30 | 31 | int startIdx = 0; 32 | while (true) { 33 | if (startIdx >= jamoList.size()) { 34 | break; 35 | } 36 | 37 | // 자모 리스트에서 한글 한글자에 해당하는 사이즈를 구한다. 38 | int oneHangulJamoSize = HangulUtil.getOneHangulJamoSize(startIdx, jamoList); 39 | if (oneHangulJamoSize == -1) { 40 | throw new Exception("한글은 최소 2개 이상의 유니코드 조합으로 이루어져야 합니다."); 41 | } 42 | 43 | // 한글 유니코드가 시작되는 Decimal값을 구한다. 44 | int decimalCode = HangulUtil.START_KOREA_UNICODE_DECIMAL; 45 | 46 | // 초성에 해당하는 Decimal값을 더한다. 47 | int chosungIdx = HangulUtil.getChoSungIndex(startIdx, jamoList); 48 | if (chosungIdx >= 0) { 49 | decimalCode = decimalCode + (jongSungSize * jungSungSize * chosungIdx); 50 | } 51 | 52 | // 중성에 해당하는 Decimal값을 더한다. 53 | int jungsungIdx = HangulUtil.getJungSungIndex(startIdx, jamoList); 54 | if (jungsungIdx >= 0) { 55 | decimalCode = decimalCode + (jongSungSize * jungsungIdx); 56 | } 57 | 58 | // 종성에 해당하는 Decimal값을 더한다. 59 | if (oneHangulJamoSize > 2) { 60 | int jongsungIdx = HangulUtil.getJongSungIndex(startIdx, jamoList); 61 | if (jongsungIdx >= 0) { 62 | decimalCode = decimalCode + jongsungIdx; 63 | } 64 | } 65 | 66 | // Decimal값을 String으로 변환한다. 67 | String hangul = Character.toString((char)decimalCode); 68 | result = result + hangul; 69 | 70 | startIdx = startIdx + oneHangulJamoSize; 71 | } 72 | 73 | return result; 74 | } 75 | 76 | 77 | 78 | } 79 | 80 | 81 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/parser/AbstractKoreanParser.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.parser; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | import org.elasticsearch.index.common.util.JamoUtil; 5 | 6 | /** 7 | * 한글 기본 Parser 8 | * 9 | * @author hrkim 10 | * 11 | */ 12 | public abstract class AbstractKoreanParser { 13 | 14 | 15 | /** 16 | * 토큰을 자음과 모음으로 파싱한다. 17 | * 18 | * @param token 19 | * @return 20 | */ 21 | public String parse(String token) { 22 | if (StringUtils.isBlank(token)) { 23 | return ""; 24 | } 25 | 26 | StringBuilder result = new StringBuilder(); 27 | 28 | // 토큰을 한글자씩 잘라서 처리한다. 29 | char[] arrCh = token.toCharArray(); 30 | for(char ch : arrCh) { 31 | 32 | // 처리 할 char의 유니코드 인덱스를 구한다. 33 | char unicodeIndex = (char)(ch - JamoUtil.START_KOREA_UNICODE); 34 | 35 | // 한글 유니코드 범위 : 0xAC00 ~ 0xD7AF (11184개) 36 | // 한글 유니코드인지 검사한다. 37 | if(unicodeIndex >= 0 && unicodeIndex <= 11184) { 38 | 39 | // 초성 유니코드 40 | int idxChoSung = unicodeIndex / (28 * 21); 41 | char chosung = JamoUtil.UNICODE_CHO_SUNG[idxChoSung]; 42 | 43 | // 중성 유니코드 44 | int idxJungSung = unicodeIndex % (28 * 21) / 28; 45 | char jungsung = JamoUtil.UNICODE_JUNG_SUNG[idxJungSung]; 46 | 47 | // 종성 유니코드 48 | int idxJongSung = unicodeIndex % (28 * 21) % 28; 49 | char jongsung = JamoUtil.UNICODE_JONG_SUNG[idxJongSung]; 50 | 51 | // 한글 한글자를 처리한다. 52 | processForKoreanChar(result, chosung, jungsung, jongsung); 53 | 54 | } else { 55 | 56 | // 한글이 아닌 한글자를 처리한다. 57 | processForOther(result, ch); 58 | } 59 | } 60 | 61 | // 토큰을 분석한 최종 결과를 리턴한다. 62 | return result.toString(); 63 | } 64 | 65 | 66 | /** 67 | * 한글 문자를 처리한다. 68 | * 69 | * @param sb 70 | * @param chosung 71 | * @param jungsung 72 | * @param jongsung 73 | */ 74 | protected abstract void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung); 75 | 76 | 77 | /** 78 | * 한글 문자를 제외한 일반 문자를 처리한다. 79 | * 80 | * @param sb 81 | * @param eachToken 82 | */ 83 | protected abstract void processForOther(StringBuilder sb, char eachToken); 84 | 85 | 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/parser/KoreanChosungParser.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.parser; 2 | 3 | /** 4 | * 한글 초성 Parser 5 | * 6 | * @author hrkim 7 | * 8 | */ 9 | public class KoreanChosungParser extends AbstractKoreanParser { 10 | 11 | 12 | @Override 13 | protected void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung) { 14 | sb.append(chosung); 15 | } 16 | 17 | 18 | 19 | @Override 20 | protected void processForOther(StringBuilder sb, char eachToken) { 21 | sb.append(eachToken); 22 | } 23 | 24 | 25 | 26 | } 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/parser/KoreanJamoParser.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.parser; 2 | 3 | import org.elasticsearch.index.common.util.JamoUtil; 4 | 5 | /** 6 | * 한글 자모 Parser 7 | * 8 | * @author hrkim 9 | * 10 | */ 11 | public class KoreanJamoParser extends AbstractKoreanParser { 12 | 13 | 14 | 15 | 16 | @Override 17 | protected void processForKoreanChar(StringBuilder sb, char chosung, char jungsung, char jongsung) { 18 | sb.append(chosung).append(jungsung); 19 | 20 | if(jongsung != JamoUtil.UNICODE_JONG_SUNG_EMPTY) { 21 | sb.append(jongsung); 22 | } 23 | } 24 | 25 | 26 | 27 | @Override 28 | protected void processForOther(StringBuilder sb, char eachToken) { 29 | sb.append(eachToken); 30 | } 31 | 32 | 33 | 34 | } 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/type/CodeType.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.type; 2 | 3 | /** 4 | * 한글 구성요소 기본 타입 5 | * 6 | * @author hrkim 7 | * 8 | */ 9 | public enum CodeType { 10 | 11 | /** 12 | * 초성 13 | */ 14 | CHOSUNG, 15 | 16 | /** 17 | * 중성 18 | */ 19 | JUNGSUNG, 20 | 21 | /** 22 | * 종성 23 | */ 24 | JONGSUNG 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/util/HangulUtil.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.util; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * 한글 유니코드 유틸리티 7 | * 8 | * http://www.unicode.org/charts/PDF/UAC00.pdf 9 | * 10 | * @author hrkim 11 | * 12 | */ 13 | public class HangulUtil { 14 | 15 | 16 | /** 17 | * 초성 (19자) 18 | */ 19 | public static final char[] CHO_SUNG = { 20 | 'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 21 | 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ' 22 | }; 23 | 24 | 25 | /** 26 | * 중성 (21자) 27 | */ 28 | public static final char[] JUNG_SUNG = { 29 | 'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 30 | 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ' 31 | }; 32 | 33 | 34 | /** 35 | * 종성 (28자) - "빈값" 포함 36 | */ 37 | public static final char[] JONG_SUNG = { 38 | ' ', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 39 | 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 40 | 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ' 41 | }; 42 | 43 | 44 | /** 45 | * 한글 유니코드의 시작값 (가) 46 | * 47 | * 16진수 : 0xAC00 48 | * 10진수 : 44032 49 | * 50 | */ 51 | public static final int START_KOREA_UNICODE_DECIMAL = 44032; 52 | 53 | 54 | 55 | 56 | 57 | 58 | public static int getOneHangulJamoSize(int startIdx, List jamoList) { 59 | int remainJamoSize = jamoList.size() - startIdx; 60 | 61 | if (remainJamoSize == 1) { 62 | return -1; 63 | } 64 | 65 | if (remainJamoSize == 2 || remainJamoSize == 3) { 66 | return remainJamoSize; 67 | } 68 | 69 | // 초성이나 종성은 겹치는 문자가 존재하기 때문에 70 | // 다음 글자의 중성을 이용하여 한글자의 사이즈를 검사한다. 71 | String strJungSung = new String(JUNG_SUNG); 72 | String strChar = jamoList.get(startIdx + 3); 73 | 74 | if (strJungSung.contains(strChar)) { 75 | return 2; 76 | } 77 | 78 | return 3; 79 | } 80 | 81 | 82 | public static int getChoSungIndex(int startIdx, List jamoList) { 83 | String strChoSung = new String(CHO_SUNG); 84 | String strChoSungChar = jamoList.get(startIdx); 85 | 86 | return strChoSung.indexOf(strChoSungChar); 87 | } 88 | 89 | 90 | public static int getJungSungIndex(int startIdx, List jamoList) { 91 | String strJungSung = new String(HangulUtil.JUNG_SUNG); 92 | String strJungSungChar = jamoList.get(startIdx + 1); 93 | 94 | return strJungSung.indexOf(strJungSungChar); 95 | } 96 | 97 | 98 | public static int getJongSungIndex(int startIdx, List jamoList) { 99 | String strJongSung = new String(HangulUtil.JONG_SUNG); 100 | String strJongSungChar = jamoList.get(startIdx + 2); 101 | 102 | return strJongSung.indexOf(strJongSungChar); 103 | } 104 | 105 | 106 | 107 | } 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/util/JamoUtil.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.util; 2 | 3 | /** 4 | * 한글 자모 유니코드 유틸리티 5 | * 6 | * https://www.unicode.org/charts/PDF/U1100.pdf 7 | * 8 | * @author hrkim 9 | * 10 | */ 11 | public class JamoUtil { 12 | 13 | 14 | /** 15 | * 초성 (19자)
16 | *
17 | * 초성으로 올 수 있는 유니코드들
18 | * 총 19자로 구성된다.
19 | *
20 | * ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ ㅃ ㅅ
21 | * ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ ㅌ ㅍ ㅎ
22 | * 23 | */ 24 | public static final char[] UNICODE_CHO_SUNG = { 25 | 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 0x3143, 0x3145, 26 | 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B, 0x314C, 0x314D, 0x314E 27 | }; 28 | 29 | 30 | /** 31 | * 중성 (21자)
32 | *
33 | * 중성으로 올 수 있는 유니코드들
34 | * 총 21자로 구성된다.
35 | *
36 | * ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ ㅗ ㅘ
37 | * ㅙ ㅚ ㅛ ㅜ ㅝ ㅞ ㅟ ㅠ ㅡ ㅢ
38 | * ㅣ
39 | * 40 | */ 41 | public static final char[] UNICODE_JUNG_SUNG = { 42 | 0x314F, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x3158, 43 | 0x3159, 0x315A, 0x315B, 0x315C, 0x315D, 0x315E, 0x315F, 0x3160, 0x3161, 0x3162, 44 | 0x3163 45 | }; 46 | 47 | 48 | /** 49 | * 종성 (28자)
50 | *
51 | * 종성으로 올 수 있는 유니코드들
52 | * 기본 27자와 "빈값"을 표현하는 1자를 합쳐서 총 28자로 구성된다.
53 | *
54 | * 빈값 ㄱ ㄲ ㄳ ㄴ ㄵ ㄶ ㄷ ㄹ ㄺ
55 | * ㄻ ㄼ ㄽ ㄾ ㄿ ㅀ ㅁ ㅂ ㅄ ㅅ
56 | * ㅆ ㅇ ㅈ ㅊ ㅋ ㅌ ㅍ ㅎ
57 | * 58 | */ 59 | public static final char[] UNICODE_JONG_SUNG = { 60 | 0x0000, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139, 0x313A, 61 | 0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141, 0x3142, 0x3144, 0x3145, 62 | 0x3146, 0x3147, 0x3148, 0x314A, 0x314B, 0x314C, 0x314D, 0x314E 63 | }; 64 | 65 | 66 | 67 | /** 68 | * 한글 유니코드의 시작값 (가)
69 | *
70 | * 한글 유니코드는 0xAC00로 시작하여 0xD79F로 끝난다.
71 | * 시작값과 끝값을 벗어난 유니코드는 한글이 아니다.
72 | *
73 | * 시작값 : 0xAC00 가
74 | * 끝값 : 0xD79F 힟
75 | */ 76 | public static final char START_KOREA_UNICODE = 0xAC00; 77 | 78 | 79 | 80 | /** 81 | * 종성 빈값 유니코드 82 | */ 83 | public static final char UNICODE_JONG_SUNG_EMPTY = 0x0000; 84 | 85 | 86 | 87 | 88 | } 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/common/util/KeyboardUtil.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.index.common.util; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import org.elasticsearch.index.common.type.CodeType; 7 | 8 | /** 9 | * 한글 키보드 유틸리티 10 | * 11 | * 12 | * @author hrkim 13 | * 14 | */ 15 | public class KeyboardUtil { 16 | 17 | 18 | /** 19 | * Converter 진행시 무시되는 문자들 20 | */ 21 | public static final String IGNORE_CHAR = "`1234567890-=[]\\;',./~!@#$%^&*()_+{}|:\"<>?\' \' "; 22 | 23 | 24 | 25 | /** 26 | * 초성 키에 해당하는 키보드상의 영문자 (19자) 27 | */ 28 | public static final String[] KEYBOARD_CHO_SUNG = { 29 | "r", "R", "s", "e", "E", "f", "a", "q", "Q", "t", 30 | "T", "d", "w", "W", "c", "z", "x", "v", "g" 31 | }; 32 | 33 | /** 34 | * 중성 키에 해당하는 키보스상의 영문자 (21자) 35 | */ 36 | public static final String[] KEYBOARD_JUNG_SUNG = { 37 | "k", "o", "i", "O", "j", "p", "u", "P", "h", "hk", 38 | "ho", "hl", "y", "n", "nj", "np", "nl", "b", "m", "ml", "l" 39 | }; 40 | 41 | /** 42 | * 종성 키에 해당하는 키보드상의 영문자 (27자) - "빈값" 제외 43 | */ 44 | public static final String[] KEYBOARD_JONG_SUNG = { 45 | "r", "R", "rt", "s", "sw", "sg", "e", "f", "fr", "fa", 46 | "fq", "ft", "fx", "fv", "fg", "a", "q", "qt", "t", "T", 47 | "d", "w", "c", "z", "x", "v", "g" 48 | }; 49 | 50 | 51 | 52 | /** 53 | * 키보드상에서 한영키에 의해서 오타 교정이 필요한 키배열 (영문키 33자) 54 | */ 55 | public static final String[] KEYBOARD_KEY_ENG = { 56 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 57 | "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", 58 | "u", "v", "w", "x", "y", "z", "Q", "W", "E", "R", 59 | "T", "O", "P" 60 | }; 61 | 62 | /** 63 | * 키보드상에서 한영키에 의해서 오타 교정이 필요한 키배열 (한글키 33자) 64 | */ 65 | public static final String[] KEYBOARD_KEY_KOR = { 66 | "ㅁ", "ㅠ", "ㅊ", "ㅇ", "ㄷ", "ㄹ", "ㅎ", "ㅗ", "ㅑ", "ㅓ", 67 | "ㅏ", "ㅣ", "ㅡ", "ㅜ", "ㅐ", "ㅔ", "ㅂ", "ㄱ", "ㄴ", "ㅅ", 68 | "ㅕ", "ㅍ", "ㅈ", "ㅌ", "ㅛ", "ㅋ", "ㅃ", "ㅉ", "ㄸ", "ㄲ", 69 | "ㅆ", "ㅒ", "ㅖ" 70 | }; 71 | 72 | 73 | 74 | 75 | 76 | /** 77 | * 초성 정보를 제공한다. 78 | * 79 | * - 초성과 매칭된 코드 조회 80 | * - 한 자로 이루어진 초성코드만 존재한다. 81 | * 82 | * @param index 83 | * @param word 84 | * @return 85 | */ 86 | public static Map getInfoForChoSung(int index, String word) { 87 | int code = KeyboardUtil.makeUnicodeIndex(CodeType.CHOSUNG, word.substring(index, index + 1)); 88 | int idx = index + 1; 89 | 90 | Map m = new HashMap<>(); 91 | m.put("code", code); 92 | m.put("idx", idx); 93 | 94 | return m; 95 | } 96 | 97 | 98 | /** 99 | * 중성 정보를 제공한다. 100 | * 101 | * - 중성과 매칭된 코드 조회 102 | * - 두 자로 이루어진 중성코드가 존재한다. 103 | * 104 | * @param index 105 | * @param word 106 | * @return 107 | */ 108 | public static Map getInfoForJungSung(int index, String word) { 109 | int code = KeyboardUtil.getDoubleMedial(index, word); 110 | int idx = index + 2; 111 | 112 | if (-1 == code) { 113 | code = KeyboardUtil.getSingleMedial(index, word); 114 | idx = index + 1; 115 | } 116 | 117 | Map m = new HashMap<>(); 118 | m.put("code", code); 119 | m.put("idx", idx); 120 | 121 | return m; 122 | } 123 | 124 | 125 | /** 126 | * 종성 정보를 제공한다. 127 | * 128 | * - 종성과 매칭된 코드 조회 129 | * - 두 자로 이루어진 종성코드가 존재한다. 130 | * 131 | * @param index 132 | * @param word 133 | * @return 134 | */ 135 | public static Map getInfoForJongSung(int index, String word) { 136 | int code; 137 | int idx = index; 138 | 139 | int temp = KeyboardUtil.getDoubleFinal(idx, word); 140 | if (-1 == temp) { 141 | temp = KeyboardUtil.getSingleMedial(idx + 1, word); 142 | if (temp != -1) { 143 | code = 0; 144 | idx--; 145 | } else { 146 | code = KeyboardUtil.getSingleFinal(idx, word); 147 | if (code == -1) { 148 | code = 0; 149 | idx--; 150 | } 151 | } 152 | 153 | } else { 154 | code = temp; 155 | temp = KeyboardUtil.getSingleMedial(idx + 2, word); 156 | if (temp != -1) { 157 | code = KeyboardUtil.getSingleFinal(idx, word); 158 | } else { 159 | idx++; 160 | } 161 | 162 | } 163 | 164 | Map m = new HashMap<>(); 165 | m.put("code", code); 166 | m.put("idx", idx); 167 | 168 | return m; 169 | } 170 | 171 | 172 | 173 | 174 | /** 175 | * 1자로 구성된 중성 유니코드 Index를 리턴한다. 176 | * 177 | * @param index 178 | * @param word 179 | * @return 180 | */ 181 | private static int getSingleMedial(int index, String word) { 182 | if ((index + 1) <= word.length()) { 183 | return makeUnicodeIndex(CodeType.JUNGSUNG, word.substring(index, index+1)); 184 | } else { 185 | return -1; 186 | } 187 | } 188 | 189 | /** 190 | * 2자로 구성된 중성 유니코드 Index를 리턴한다. 191 | * 192 | * @param index 193 | * @param word 194 | * @return 195 | */ 196 | private static int getDoubleMedial(int index, String word) { 197 | if ((index + 2) > word.length()) { 198 | return -1; 199 | } else { 200 | return makeUnicodeIndex(CodeType.JUNGSUNG, word.substring(index, index+2)); 201 | } 202 | } 203 | 204 | /** 205 | * 1자로 구성된 종성 유니코드 Index를 리턴한다. 206 | * 207 | * @param index 208 | * @param word 209 | * @return 210 | */ 211 | private static int getSingleFinal(int index, String word) { 212 | if ((index + 1) <= word.length()) { 213 | return makeUnicodeIndex(CodeType.JONGSUNG, word.substring(index, index+1)); 214 | } else { 215 | return -1; 216 | } 217 | } 218 | 219 | /** 220 | * 2자로 구성된 종성 유니코드 Index를 리턴한다. 221 | * 222 | * @param index 223 | * @param word 224 | * @return 225 | */ 226 | private static int getDoubleFinal(int index, String word) { 227 | if ((index + 2) > word.length()) { 228 | return -1; 229 | } else { 230 | return makeUnicodeIndex(CodeType.JONGSUNG, word.substring(index, index+2)); 231 | } 232 | } 233 | 234 | 235 | /** 236 | * 키보드상에 매칭된 유니코드값 Index를 리턴한다. 237 | * 238 | * @param type 239 | * @param sub_str 240 | * @return 241 | */ 242 | private static int makeUnicodeIndex(CodeType type, String subStr) { 243 | switch (type) { 244 | case CHOSUNG: 245 | for (int i=0; i> getTokenFilters() { 26 | Map> extra = new HashMap<>(); 27 | 28 | // (1) 한글 자모 분석 필터 29 | extra.put("javacafe_jamo", JavacafeJamoTokenFilterFactory::new); 30 | 31 | // (2) 한글 초성 분석 필터 32 | extra.put("javacafe_chosung", JavacafeChosungTokenFilterFactory::new); 33 | 34 | // (3) 영한 오타 변환 필터 35 | extra.put("javacafe_eng2kor", JavacafeEng2KorConvertFilterFactory::new); 36 | 37 | // (4) 한영 오타 변환 필터 38 | extra.put("javacafe_kor2eng", JavacafeKor2EngConvertFilterFactory::new); 39 | 40 | // (5) 한글 스펠링 체크 필터 41 | extra.put("javacafe_spell", JavacafeSpellFilterFactory::new); 42 | 43 | return extra; 44 | } 45 | 46 | } 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | name=${project.artifactId} 2 | description=Elasticsearch Javacafe Plugin. 3 | version=${project.version} 4 | classname=org.elasticsearch.plugin.analysis.JavacafePlugin 5 | elasticsearch.version=${elasticsearch.version} 6 | java.version=1.8 7 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/esTest/AbstractPluginTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.esTest; 2 | 3 | import java.io.IOException; 4 | import java.io.StringReader; 5 | 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.Tokenizer; 8 | import org.apache.lucene.analysis.standard.StandardTokenizer; 9 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 10 | import org.elasticsearch.index.analysis.TokenFilterFactory; 11 | import org.elasticsearch.test.ESTestCase; 12 | 13 | 14 | public class AbstractPluginTest extends ESTestCase { 15 | 16 | 17 | public void runFilter(TokenFilterFactory myFilter, String source, String[] result) throws IOException { 18 | init(); 19 | 20 | // StandardTokenizer 생성 21 | Tokenizer tokenizer = new StandardTokenizer(); 22 | tokenizer.setReader(new StringReader(source)); 23 | 24 | 25 | // 필터를 이용하여 tokenStream 생성 26 | TokenStream tokenStream = myFilter.create(tokenizer); 27 | tokenStream.reset(); 28 | 29 | CharTermAttribute termAttr = tokenStream.getAttribute(CharTermAttribute.class); 30 | 31 | 32 | // 테스트 시작 33 | System.out.println("[소스] : " + source); 34 | 35 | int i = 0; 36 | while (tokenStream.incrementToken()) { 37 | String t = termAttr.toString(); 38 | 39 | System.out.println("Token[" + i + "] => [예상결과] : " + result[i] + " , [실제결과] : " + t); 40 | //assertThat("Token 생성이 잘못되었습니다.", t, equalTo(result[i])); 41 | 42 | i++; 43 | } 44 | 45 | System.out.println("[결과] 생성된 Token 수 : " + i); 46 | //assertThat("Token 수가 일치하지 않습니다.", i, equalTo(result.length)); 47 | 48 | destroy(); 49 | } 50 | 51 | 52 | public void init() { 53 | System.out.println("-------------------------------"); 54 | System.out.println("테스트를 시작합니다."); 55 | System.out.println("-------------------------------"); 56 | } 57 | 58 | 59 | public void destroy() { 60 | System.out.println("-------------------------------"); 61 | System.out.println("테스트를 종료합니다."); 62 | System.out.println("-------------------------------"); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/esTest/JavacafeChosungTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.esTest; 2 | 3 | import java.io.IOException; 4 | 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.index.Index; 7 | import org.elasticsearch.index.analysis.TokenFilterFactory; 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin; 9 | 10 | 11 | public class JavacafeChosungTest extends AbstractPluginTest { 12 | 13 | 14 | /** 15 | * 초성 필터를 테스트한다. 16 | * 17 | * @throws IOException 18 | */ 19 | public void testChosungFilter() throws Exception { 20 | 21 | String source = "자바카페 한글"; 22 | 23 | String[] result = new String[]{ 24 | "ㅈㅂㅋㅍ", 25 | "ㅎㄱ" 26 | }; 27 | 28 | String filterName = "javacafe_chosung"; 29 | 30 | 31 | // 실행 32 | TestAnalysis analysis = createTestAnalysis( 33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin() 34 | ); 35 | 36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName); 37 | runFilter(myFilter, source, result); 38 | } 39 | 40 | 41 | 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/esTest/JavacafeEng2KorTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.esTest; 2 | 3 | import java.io.IOException; 4 | 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.index.Index; 7 | import org.elasticsearch.index.analysis.TokenFilterFactory; 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin; 9 | 10 | 11 | public class JavacafeEng2KorTest extends AbstractPluginTest { 12 | 13 | 14 | /** 15 | * 영한 오타 변환기를 테스트한다. 16 | * 17 | * @throws IOException 18 | */ 19 | public void test() throws Exception { 20 | 21 | String source = "wkqkzkvp gksrmf"; 22 | 23 | String[] result = new String[]{ 24 | "자바카페", 25 | "한글" 26 | }; 27 | 28 | String filterName = "javacafe_eng2kor"; 29 | 30 | 31 | // 실행 32 | TestAnalysis analysis = createTestAnalysis( 33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin() 34 | ); 35 | 36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName); 37 | runFilter(myFilter, source, result); 38 | } 39 | 40 | 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/esTest/JavacafeJamoTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.esTest; 2 | 3 | import java.io.IOException; 4 | 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.index.Index; 7 | import org.elasticsearch.index.analysis.TokenFilterFactory; 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin; 9 | 10 | 11 | public class JavacafeJamoTest extends AbstractPluginTest { 12 | 13 | 14 | /** 15 | * 자모 필터를 테스트한다. 16 | * 17 | * @throws IOException 18 | */ 19 | public void testJamoFilter() throws Exception { 20 | 21 | String source = "자바카페 한글"; 22 | 23 | String[] result = new String[]{ 24 | "ㅈㅏㅂㅏㅋㅏㅍㅔ", 25 | "ㅎㅏㄴㄱㅡㄹ" 26 | }; 27 | 28 | String filterName = "javacafe_jamo"; 29 | 30 | 31 | // 실행 32 | TestAnalysis analysis = createTestAnalysis( 33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin() 34 | ); 35 | 36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName); 37 | runFilter(myFilter, source, result); 38 | } 39 | 40 | 41 | 42 | 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/esTest/JavacafeKor2EngTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.esTest; 2 | 3 | import java.io.IOException; 4 | 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.index.Index; 7 | import org.elasticsearch.index.analysis.TokenFilterFactory; 8 | import org.elasticsearch.plugin.analysis.JavacafePlugin; 9 | 10 | 11 | public class JavacafeKor2EngTest extends AbstractPluginTest { 12 | 13 | 14 | /** 15 | * 한영 오타 변환기를 테스트한다. 16 | * 17 | * @throws IOException 18 | */ 19 | public void test() throws Exception { 20 | 21 | String source = "ㅓㅁㅍㅁㅊㅁㄹㄷ ㅑㅔㅗㅐㅜㄷ"; 22 | 23 | String[] result = new String[]{ 24 | "javacafe", 25 | "iphone" 26 | }; 27 | 28 | String filterName = "javacafe_kor2eng"; 29 | 30 | 31 | // 실행 32 | TestAnalysis analysis = createTestAnalysis( 33 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin() 34 | ); 35 | 36 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName); 37 | runFilter(myFilter, source, result); 38 | } 39 | 40 | 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/esTest/JavacafeSpellTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.esTest; 2 | 3 | import org.elasticsearch.common.settings.Settings; 4 | import org.elasticsearch.index.Index; 5 | import org.elasticsearch.index.analysis.TokenFilterFactory; 6 | import org.elasticsearch.plugin.analysis.JavacafePlugin; 7 | 8 | 9 | public class JavacafeSpellTest extends AbstractPluginTest { 10 | 11 | 12 | public void test1() throws Exception { 13 | 14 | String source = "자바카페 한글"; 15 | 16 | String[] result = new String[]{ 17 | "ㅈㅏㅂㅏㅋㅏㅍㅔ", 18 | "ㅎㅏㄴㄱㅡㄹ" 19 | }; 20 | 21 | String filterName = "javacafe_spell"; 22 | 23 | 24 | // 실행 25 | TestAnalysis analysis = createTestAnalysis( 26 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin() 27 | ); 28 | 29 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName); 30 | runFilter(myFilter, source, result); 31 | } 32 | 33 | 34 | public void test2() throws Exception { 35 | 36 | String source = "삼성전자"; 37 | 38 | String[] result = new String[] { 39 | "", 40 | "ㅅㅏㅁㅅㅓㅇㅈㅓㄴㅈㅏ" 41 | }; 42 | 43 | String filterName = "javacafe_spell"; 44 | 45 | 46 | // 실행 47 | TestAnalysis analysis = createTestAnalysis( 48 | new Index("test", ""), Settings.builder().build(), new JavacafePlugin() 49 | ); 50 | 51 | TokenFilterFactory myFilter = analysis.tokenFilter.get(filterName); 52 | runFilter(myFilter, source, result); 53 | } 54 | 55 | 56 | 57 | 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/luceneTest/TokenTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.luceneTest; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | 5 | import java.io.IOException; 6 | import java.io.Reader; 7 | import java.io.StringReader; 8 | 9 | import org.apache.lucene.analysis.TokenStream; 10 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 11 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 12 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; 13 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 14 | import org.junit.Test; 15 | 16 | public class TokenTest { 17 | 18 | 19 | String text = 20 | "Every mammal on this planet instinctively develops a natural " + 21 | "equilibrium with the surrounding environment; " + 22 | "but you humans do not. Instead you multiply, " + 23 | "and multiply, until every resource is consumed." + 24 | 25 | "The only way for you to survive is to spread to another area. " + 26 | 27 | "There is another organism on this planet that follows the same pattern... a virus."; 28 | 29 | String fieldName = "content"; 30 | 31 | 32 | @Test 33 | public void test() throws IOException { 34 | 35 | Reader textReader = new StringReader(text); 36 | 37 | // 필드명과 텍스트 값을 위한 TokenStream 생성 38 | StandardAnalyzer standardAnalyzer = new StandardAnalyzer(); 39 | TokenStream tokenStream = standardAnalyzer.tokenStream(fieldName, textReader); 40 | 41 | CharTermAttribute terms = tokenStream.addAttribute(CharTermAttribute.class); 42 | OffsetAttribute offsets = tokenStream.addAttribute(OffsetAttribute.class); 43 | PositionIncrementAttribute positions = tokenStream.addAttribute(PositionIncrementAttribute.class); 44 | 45 | System.out.println("INCR\t(START,\tEND)\tTERM"); 46 | System.out.println(); 47 | 48 | tokenStream.reset(); 49 | while (tokenStream.incrementToken()) { 50 | 51 | // 다음 토큰을 읽을때마다 attribute 값이 새롭게 세팅되어 제공된다. 52 | String term = terms.toString(); 53 | 54 | int increment = positions.getPositionIncrement(); 55 | 56 | int start = offsets.startOffset(); 57 | int end = offsets.endOffset(); 58 | 59 | System.out.print(increment + "\t" + "(" + start + ",\t" + end + ")\t" + term); 60 | System.out.println(); 61 | } 62 | 63 | standardAnalyzer.close(); 64 | 65 | 66 | assertTrue(true); 67 | } 68 | 69 | 70 | 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/utilTest/ConverterE2KTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.utilTest; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.elasticsearch.index.common.converter.EngToKorConverter; 6 | import org.junit.Test; 7 | 8 | public class ConverterE2KTest { 9 | 10 | 11 | @Test 12 | public void test1() { 13 | String token = "wkqkzkvp"; 14 | 15 | EngToKorConverter convert = new EngToKorConverter(); 16 | String result = convert.convert(token); 17 | 18 | System.out.println(result); 19 | assertEquals("자바카페", result); 20 | } 21 | 22 | 23 | @Test 24 | public void test2() { 25 | String token = "tkatjdwjswk"; 26 | 27 | EngToKorConverter convert = new EngToKorConverter(); 28 | String result = convert.convert(token); 29 | 30 | System.out.println(result); 31 | assertEquals("삼성전자", result); 32 | } 33 | 34 | 35 | @Test 36 | public void test3() { 37 | String token = "gksrmf"; 38 | 39 | EngToKorConverter convert = new EngToKorConverter(); 40 | String result = convert.convert(token); 41 | 42 | System.out.println(result); 43 | assertEquals("한글", result); 44 | } 45 | 46 | 47 | @Test 48 | public void test4() { 49 | String token = "gksrmf1"; 50 | 51 | EngToKorConverter convert = new EngToKorConverter(); 52 | String result = convert.convert(token); 53 | 54 | System.out.println(result); 55 | assertEquals("한글1", result); 56 | } 57 | 58 | 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/utilTest/ConverterK2ETest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.utilTest; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.elasticsearch.index.common.converter.KorToEngConverter; 6 | import org.junit.Test; 7 | 8 | public class ConverterK2ETest { 9 | 10 | 11 | 12 | @Test 13 | public void test1() { 14 | String token = "ㅓㅁㅍㅁㅊㅁㄹㄷ"; 15 | 16 | KorToEngConverter convert = new KorToEngConverter(); 17 | String result = convert.convert(token); 18 | 19 | System.out.println(result); 20 | assertEquals("javacafe", result); 21 | } 22 | 23 | 24 | @Test 25 | public void test2() { 26 | String token = "ㅑㅔㅗㅐㅜㄷ"; 27 | 28 | KorToEngConverter convert = new KorToEngConverter(); 29 | String result = convert.convert(token); 30 | 31 | System.out.println(result); 32 | assertEquals("iphone", result); 33 | } 34 | 35 | 36 | @Test 37 | public void test3() { 38 | String token = "재ㅡ무"; 39 | 40 | KorToEngConverter convert = new KorToEngConverter(); 41 | String result = convert.convert(token); 42 | 43 | System.out.println(result); 44 | assertEquals("woman", result); 45 | } 46 | 47 | 48 | @Test 49 | public void test4() { 50 | String token = "ㄴ므녀ㅜㅎ"; 51 | 52 | KorToEngConverter convert = new KorToEngConverter(); 53 | String result = convert.convert(token); 54 | 55 | System.out.println(result); 56 | assertEquals("samsung", result); 57 | } 58 | 59 | 60 | @Test 61 | public void test5() { 62 | String token = "ㄴ므녀ㅜㅎ1"; 63 | 64 | KorToEngConverter convert = new KorToEngConverter(); 65 | String result = convert.convert(token); 66 | 67 | System.out.println(result); 68 | assertEquals("samsung1", result); 69 | } 70 | 71 | 72 | @Test 73 | public void test6() { 74 | String token = "신혼여행(身魂旅行)"; 75 | 76 | KorToEngConverter convert = new KorToEngConverter(); 77 | String result = convert.convert(token); 78 | 79 | System.out.println(result); 80 | assertEquals("tlsghsdugod()", result); 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/utilTest/MergerTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.utilTest; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.elasticsearch.index.common.merger.KoreanMerger; 9 | import org.junit.Test; 10 | 11 | public class MergerTest { 12 | 13 | 14 | @Test 15 | public void mergerTest() throws Exception { 16 | List jasoList = new ArrayList<>(); 17 | jasoList.add("ㅎ"); 18 | jasoList.add("ㅏ"); 19 | jasoList.add("ㄴ"); 20 | jasoList.add("ㄱ"); 21 | jasoList.add("ㅡ"); 22 | jasoList.add("ㄹ"); 23 | 24 | KoreanMerger merger = new KoreanMerger(); 25 | String word = merger.merge(jasoList); 26 | 27 | System.out.println("결과 : " + word); 28 | assertEquals("한글", word); 29 | } 30 | 31 | 32 | @Test 33 | public void mergerTest2() throws Exception { 34 | List jasoList = new ArrayList<>(); 35 | jasoList.add("ㅈ"); 36 | jasoList.add("ㅏ"); 37 | jasoList.add("ㅂ"); 38 | jasoList.add("ㅏ"); 39 | jasoList.add("ㅋ"); 40 | jasoList.add("ㅏ"); 41 | jasoList.add("ㅍ"); 42 | jasoList.add("ㅔ"); 43 | 44 | KoreanMerger merger = new KoreanMerger(); 45 | String word = merger.merge(jasoList); 46 | 47 | System.out.println("결과 : " + word); 48 | assertEquals("자바카페", word); 49 | } 50 | 51 | 52 | @Test 53 | public void mergerTest3() throws Exception { 54 | List jasoList = new ArrayList<>(); 55 | jasoList.add("ㅅ"); 56 | jasoList.add("ㅏ"); 57 | jasoList.add("ㅁ"); 58 | jasoList.add("ㅅ"); 59 | jasoList.add("ㅓ"); 60 | jasoList.add("ㅇ"); 61 | jasoList.add("ㅈ"); 62 | jasoList.add("ㅓ"); 63 | jasoList.add("ㄴ"); 64 | jasoList.add("ㅈ"); 65 | jasoList.add("ㅏ"); 66 | 67 | KoreanMerger merger = new KoreanMerger(); 68 | String word = merger.merge(jasoList); 69 | 70 | System.out.println("결과 : " + word); 71 | assertEquals("삼성전자", word); 72 | } 73 | 74 | } 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/utilTest/ParserChosungTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.utilTest; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.elasticsearch.index.common.parser.KoreanChosungParser; 6 | import org.junit.Test; 7 | 8 | public class ParserChosungTest { 9 | 10 | 11 | @Test 12 | public void chosungTest() { 13 | String token = "자바카페"; 14 | KoreanChosungParser parser = new KoreanChosungParser(); 15 | String result = parser.parse(token); 16 | 17 | System.out.println(result); 18 | assertEquals("ㅈㅂㅋㅍ", result); 19 | } 20 | 21 | 22 | @Test 23 | public void chosungTest2() { 24 | String token = "삼성전자"; 25 | KoreanChosungParser parser = new KoreanChosungParser(); 26 | String result = parser.parse(token); 27 | 28 | System.out.println(result); 29 | assertEquals("ㅅㅅㅈㅈ", result); 30 | } 31 | 32 | 33 | 34 | 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/utilTest/ParserJamoTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.utilTest; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.elasticsearch.index.common.parser.KoreanJamoParser; 6 | import org.junit.Test; 7 | 8 | public class ParserJamoTest { 9 | 10 | 11 | @Test 12 | public void jamoTest() { 13 | String token = "자바카페"; 14 | KoreanJamoParser parser = new KoreanJamoParser(); 15 | String result = parser.parse(token); 16 | 17 | System.out.println(result); 18 | assertEquals("ㅈㅏㅂㅏㅋㅏㅍㅔ", result); 19 | } 20 | 21 | 22 | @Test 23 | public void jamoTest2() { 24 | String token = "삼성전자"; 25 | KoreanJamoParser parser = new KoreanJamoParser(); 26 | String result = parser.parse(token); 27 | 28 | System.out.println(result); 29 | assertEquals("ㅅㅏㅁㅅㅓㅇㅈㅓㄴㅈㅏ", result); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/utilTest/SpellCheckTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.plugin.utilTest; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.elasticsearch.index.common.parser.KoreanJamoParser; 6 | import org.junit.Test; 7 | 8 | public class SpellCheckTest { 9 | 10 | 11 | @Test 12 | public void spellTest() { 13 | String token = "자바카페"; 14 | KoreanJamoParser parser = new KoreanJamoParser(); 15 | String result = parser.parse(token); 16 | 17 | System.out.println(result); 18 | assertEquals("ㅈㅏㅂㅏㅋㅏㅍㅔ", result); 19 | } 20 | 21 | 22 | 23 | } 24 | 25 | --------------------------------------------------------------------------------