├── settings.gradle ├── images ├── hanhinsam_zip.png └── hanhinsam_install.png ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── zip ├── elasticsearch-8.1.2 │ └── hanhinsam-0.1.zip ├── elasticsearch-8.2.3 │ └── hanhinsam-0.1.zip └── elasticsearch-8.3.3 │ └── hanhinsam-0.1.zip ├── .gitignore ├── src ├── main │ └── java │ │ └── com │ │ └── yaincoding │ │ └── hanhinsam │ │ ├── filters │ │ ├── chosung │ │ │ ├── ChosungFilterFactory.java │ │ │ └── ChosungFilter.java │ │ ├── engtohan │ │ │ ├── EngToHanFilterFactory.java │ │ │ └── EngToHanFilter.java │ │ ├── hantoeng │ │ │ ├── HanToEngFilterFactory.java │ │ │ └── HanToEngFilter.java │ │ └── jamo │ │ │ ├── JamoDecomposeFilterFactory.java │ │ │ └── JamoDecomposeFilter.java │ │ ├── plugin │ │ └── HanHinSamPlugin.java │ │ └── hangul_util │ │ ├── HanEngUtil.java │ │ └── JamoUtil.java └── test │ └── java │ └── com │ └── yaincoding │ └── hanhinsam │ └── filters │ ├── hantoeng │ └── HanToEngFilterTest.java │ ├── engtohan │ └── EngToHanFilterTest.java │ ├── chosung │ └── ChosungFilterTest.java │ └── jamo │ └── JamoDecomposeFilterTest.java ├── gradlew.bat ├── gradlew └── Readme.md /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'hanhinsam' 2 | -------------------------------------------------------------------------------- /images/hanhinsam_zip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yainage90/hanhinsam/HEAD/images/hanhinsam_zip.png -------------------------------------------------------------------------------- /images/hanhinsam_install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yainage90/hanhinsam/HEAD/images/hanhinsam_install.png -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yainage90/hanhinsam/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /zip/elasticsearch-8.1.2/hanhinsam-0.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yainage90/hanhinsam/HEAD/zip/elasticsearch-8.1.2/hanhinsam-0.1.zip -------------------------------------------------------------------------------- /zip/elasticsearch-8.2.3/hanhinsam-0.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yainage90/hanhinsam/HEAD/zip/elasticsearch-8.2.3/hanhinsam-0.1.zip -------------------------------------------------------------------------------- /zip/elasticsearch-8.3.3/hanhinsam-0.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yainage90/hanhinsam/HEAD/zip/elasticsearch-8.3.3/hanhinsam-0.1.zip -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | .gradle 3 | build/ 4 | !gradle/wrapper/gradle-wrapper.jar 5 | !**/src/main/**/build/ 6 | !**/src/test/**/build/ 7 | 8 | ### STS ### 9 | .apt_generated 10 | .classpath 11 | .factorypath 12 | .project 13 | .settings 14 | .springBeans 15 | .sts4-cache 16 | bin/ 17 | !**/src/main/**/bin/ 18 | !**/src/test/**/bin/ 19 | 20 | ### IntelliJ IDEA ### 21 | .idea 22 | *.iws 23 | *.iml 24 | *.ipr 25 | out/ 26 | !**/src/main/**/out/ 27 | !**/src/test/**/out/ 28 | 29 | ### NetBeans ### 30 | /nbproject/private/ 31 | /nbbuild/ 32 | /dist/ 33 | /nbdist/ 34 | /.nb-gradle/ 35 | 36 | ### VS Code ### 37 | .vscode/ 38 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/chosung/ChosungFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.chosung; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.settings.Settings; 5 | import org.elasticsearch.env.Environment; 6 | import org.elasticsearch.index.IndexSettings; 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 8 | 9 | public class ChosungFilterFactory extends AbstractTokenFilterFactory { 10 | 11 | public ChosungFilterFactory(IndexSettings indexSettings, Environment env, String name, 12 | Settings settings) { 13 | super(indexSettings, name, settings); 14 | } 15 | 16 | @Override 17 | public TokenStream create(TokenStream tokenStream) { 18 | return new ChosungFilter(tokenStream); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/engtohan/EngToHanFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.engtohan; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.settings.Settings; 5 | import org.elasticsearch.env.Environment; 6 | import org.elasticsearch.index.IndexSettings; 7 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 8 | 9 | public class EngToHanFilterFactory extends AbstractTokenFilterFactory { 10 | 11 | public EngToHanFilterFactory(IndexSettings indexSettings, Environment env, String name, 12 | Settings settings) { 13 | super(indexSettings, name, settings); 14 | } 15 | 16 | @Override 17 | public TokenStream create(TokenStream tokenStream) { 18 | return new EngToHanFilter(tokenStream); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/hantoeng/HanToEngFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.hantoeng; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.env.Environment; 7 | import org.elasticsearch.index.IndexSettings; 8 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 9 | 10 | public class HanToEngFilterFactory extends AbstractTokenFilterFactory { 11 | @Inject 12 | public HanToEngFilterFactory(IndexSettings indexSettings, Environment env, String name, 13 | Settings settings) { 14 | super(indexSettings, name, settings); 15 | } 16 | 17 | @Override 18 | public TokenStream create(TokenStream tokenStream) { 19 | return new HanToEngFilter(tokenStream); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/jamo/JamoDecomposeFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.jamo; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.env.Environment; 7 | import org.elasticsearch.index.IndexSettings; 8 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 9 | 10 | public class JamoDecomposeFilterFactory extends AbstractTokenFilterFactory { 11 | 12 | @Inject 13 | public JamoDecomposeFilterFactory(IndexSettings indexSettings, Environment env, String name, 14 | Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new JamoDecomposeFilter(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/chosung/ChosungFilter.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.chosung; 2 | 3 | import java.io.IOException; 4 | import com.yaincoding.hanhinsam.hangul_util.JamoUtil; 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | 9 | public class ChosungFilter extends TokenFilter { 10 | 11 | private final CharTermAttribute charAttr; 12 | private final JamoUtil jamoUtil; 13 | 14 | public ChosungFilter(TokenStream input) { 15 | super(input); 16 | jamoUtil = new JamoUtil(); 17 | charAttr = addAttribute(CharTermAttribute.class); 18 | } 19 | 20 | @Override 21 | public final boolean incrementToken() throws IOException { 22 | if (input.incrementToken()) { 23 | String chosung = jamoUtil.chosung(charAttr.toString()); 24 | charAttr.setEmpty().append(chosung); 25 | return true; 26 | } 27 | 28 | return false; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/jamo/JamoDecomposeFilter.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.jamo; 2 | 3 | import java.io.IOException; 4 | import com.yaincoding.hanhinsam.hangul_util.JamoUtil; 5 | import org.apache.lucene.analysis.TokenFilter; 6 | import org.apache.lucene.analysis.TokenStream; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | 9 | public class JamoDecomposeFilter extends TokenFilter { 10 | 11 | private final CharTermAttribute charAttr; 12 | private final JamoUtil jamoUtil; 13 | 14 | public JamoDecomposeFilter(TokenStream input) { 15 | super(input); 16 | jamoUtil = new JamoUtil(); 17 | charAttr = addAttribute(CharTermAttribute.class); 18 | } 19 | 20 | @Override 21 | public final boolean incrementToken() throws IOException { 22 | 23 | if (input.incrementToken()) { 24 | String jamo = jamoUtil.decompose(charAttr.toString(), true); 25 | charAttr.setEmpty().append(jamo); 26 | return true; 27 | } 28 | 29 | return false; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/engtohan/EngToHanFilter.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.engtohan; 2 | 3 | import java.io.IOException; 4 | import com.yaincoding.hanhinsam.hangul_util.HanEngUtil; 5 | import com.yaincoding.hanhinsam.hangul_util.JamoUtil; 6 | import org.apache.lucene.analysis.TokenFilter; 7 | import org.apache.lucene.analysis.TokenStream; 8 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 9 | 10 | public class EngToHanFilter extends TokenFilter { 11 | 12 | private final CharTermAttribute charAttr; 13 | private final JamoUtil jamoUtil; 14 | private final HanEngUtil hanEngUtil; 15 | 16 | public EngToHanFilter(TokenStream input) { 17 | super(input); 18 | jamoUtil = new JamoUtil(); 19 | hanEngUtil = new HanEngUtil(); 20 | charAttr = addAttribute(CharTermAttribute.class); 21 | } 22 | 23 | @Override 24 | public final boolean incrementToken() throws IOException { 25 | if (input.incrementToken()) { 26 | String engToHan = jamoUtil.compose(hanEngUtil.transformEnglishToHangul(charAttr.toString())); 27 | charAttr.setEmpty().append(engToHan); 28 | return true; 29 | } 30 | 31 | return false; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/filters/hantoeng/HanToEngFilter.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.hantoeng; 2 | 3 | import java.io.IOException; 4 | import com.yaincoding.hanhinsam.hangul_util.HanEngUtil; 5 | import com.yaincoding.hanhinsam.hangul_util.JamoUtil; 6 | import org.apache.lucene.analysis.TokenFilter; 7 | import org.apache.lucene.analysis.TokenStream; 8 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 9 | 10 | public class HanToEngFilter extends TokenFilter { 11 | 12 | private final CharTermAttribute charAttr; 13 | private final JamoUtil jamoUtil; 14 | private final HanEngUtil hanEngUtil; 15 | 16 | public HanToEngFilter(TokenStream input) { 17 | super(input); 18 | jamoUtil = new JamoUtil(); 19 | hanEngUtil = new HanEngUtil(); 20 | charAttr = addAttribute(CharTermAttribute.class); 21 | } 22 | 23 | @Override 24 | public final boolean incrementToken() throws IOException { 25 | if (input.incrementToken()) { 26 | String hanToEng = 27 | hanEngUtil.transformHangulToEnglish(jamoUtil.decompose(charAttr.toString(), true)); 28 | charAttr.setEmpty().append(hanToEng); 29 | return true; 30 | } 31 | 32 | return false; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/plugin/HanHinSamPlugin.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.plugin; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import com.yaincoding.hanhinsam.filters.chosung.ChosungFilterFactory; 6 | import com.yaincoding.hanhinsam.filters.engtohan.EngToHanFilterFactory; 7 | import com.yaincoding.hanhinsam.filters.hantoeng.HanToEngFilterFactory; 8 | import com.yaincoding.hanhinsam.filters.jamo.JamoDecomposeFilterFactory; 9 | import org.elasticsearch.index.analysis.TokenFilterFactory; 10 | import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 11 | import org.elasticsearch.plugins.AnalysisPlugin; 12 | import org.elasticsearch.plugins.Plugin; 13 | 14 | public class HanHinSamPlugin extends Plugin implements AnalysisPlugin { 15 | 16 | @Override 17 | public Map> getTokenFilters() { 18 | Map> extra = new HashMap<>(); 19 | extra.put("hanhinsam_chosung", ChosungFilterFactory::new); 20 | extra.put("hanhinsam_jamo", JamoDecomposeFilterFactory::new); 21 | extra.put("hanhinsam_engtohan", EngToHanFilterFactory::new); 22 | extra.put("hanhinsam_hantoeng", HanToEngFilterFactory::new); 23 | 24 | return extra; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/com/yaincoding/hanhinsam/filters/hantoeng/HanToEngFilterTest.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.hantoeng; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import java.io.IOException; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import org.apache.lucene.analysis.Analyzer; 8 | import org.apache.lucene.analysis.TokenStream; 9 | import org.apache.lucene.analysis.Tokenizer; 10 | import org.apache.lucene.analysis.core.KeywordTokenizer; 11 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 12 | import org.junit.jupiter.api.BeforeEach; 13 | import org.junit.jupiter.api.Test; 14 | 15 | public class HanToEngFilterTest { 16 | 17 | private Analyzer analyzer; 18 | 19 | private String getHangulToEnglish(String text) throws IOException { 20 | TokenStream stream = analyzer.tokenStream("field", text); 21 | 22 | CharTermAttribute charAttr = stream.addAttribute(CharTermAttribute.class); 23 | 24 | stream.reset(); 25 | 26 | List tokenStrs = new ArrayList<>(); 27 | while (stream.incrementToken()) { 28 | tokenStrs.add(charAttr.toString()); 29 | } 30 | stream.close(); 31 | 32 | String result = String.join(" ", tokenStrs); 33 | System.out.println(result); 34 | 35 | return result; 36 | } 37 | 38 | @BeforeEach 39 | public void setup() { 40 | analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { 41 | @Override 42 | protected TokenStreamComponents createComponents(String fieldName) { 43 | Tokenizer tokenizer = new KeywordTokenizer(); 44 | TokenStream tokenFilter = new HanToEngFilter(tokenizer); 45 | return new TokenStreamComponents(tokenizer, tokenFilter); 46 | } 47 | }; 48 | } 49 | 50 | @Test 51 | void testOnlyHangul() throws IOException { 52 | assertEquals("elasticsearch", getHangulToEnglish("딤ㄴ샻ㄴㄷㅁㄱ초")); 53 | } 54 | 55 | @Test 56 | void testContainsEnglish() throws IOException { 57 | assertEquals("google.com", getHangulToEnglish("해ㅐ힏.채ㅡ")); 58 | } 59 | 60 | @Test 61 | void testContainsSpecialCharacters() throws IOException { 62 | assertEquals("elasticsearch!@#$%^&&**((", getHangulToEnglish("딤ㄴ샻ㄴㄷㅁㄱ초!@#$%^&&**((")); 63 | } 64 | 65 | @Test 66 | void testContainsStacking() throws IOException { 67 | assertEquals("sword", getHangulToEnglish("ㄴ잭ㅇ")); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/test/java/com/yaincoding/hanhinsam/filters/engtohan/EngToHanFilterTest.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.engtohan; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import java.io.IOException; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import org.apache.lucene.analysis.Analyzer; 8 | import org.apache.lucene.analysis.TokenStream; 9 | import org.apache.lucene.analysis.Tokenizer; 10 | import org.apache.lucene.analysis.core.KeywordTokenizer; 11 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 12 | import org.junit.jupiter.api.BeforeEach; 13 | import org.junit.jupiter.api.Test; 14 | 15 | public class EngToHanFilterTest { 16 | 17 | private Analyzer analyzer; 18 | 19 | private String getEnglishToHangul(String text) throws IOException { 20 | TokenStream stream = analyzer.tokenStream("field", text); 21 | 22 | CharTermAttribute charAttr = stream.addAttribute(CharTermAttribute.class); 23 | 24 | stream.reset(); 25 | 26 | List tokenStrs = new ArrayList<>(); 27 | while (stream.incrementToken()) { 28 | tokenStrs.add(charAttr.toString()); 29 | } 30 | stream.close(); 31 | 32 | String result = String.join(" ", tokenStrs); 33 | System.out.println(result); 34 | 35 | return result; 36 | } 37 | 38 | @BeforeEach 39 | public void setup() { 40 | analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { 41 | @Override 42 | protected TokenStreamComponents createComponents(String fieldName) { 43 | Tokenizer tokenizer = new KeywordTokenizer(); 44 | TokenStream tokenFilter = new EngToHanFilter(tokenizer); 45 | return new TokenStreamComponents(tokenizer, tokenFilter); 46 | } 47 | }; 48 | } 49 | 50 | @Test 51 | void testOnlyEnglish() throws IOException { 52 | assertEquals("엘라스틱 서치", getEnglishToHangul("dpffktmxlr tjcl")); 53 | } 54 | 55 | @Test 56 | void testContainsHangul() throws IOException { 57 | assertEquals("엘라스틱 서치", getEnglishToHangul("dpffktmxlr 서치")); 58 | } 59 | 60 | @Test 61 | void testContainsSpecialCharacters() throws IOException { 62 | assertEquals("엘라스틱!@# 서치(*&^$%", getEnglishToHangul("dpffktmxlr!@# tjcl(*&^$%")); 63 | } 64 | 65 | @Test 66 | void testContainsStacking() throws IOException { 67 | assertEquals("값지다", getEnglishToHangul("rkqtwlek")); 68 | assertEquals("앉다", getEnglishToHangul("dkswek")); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/test/java/com/yaincoding/hanhinsam/filters/chosung/ChosungFilterTest.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.chosung; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import java.io.IOException; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import org.apache.lucene.analysis.Analyzer; 8 | import org.apache.lucene.analysis.TokenStream; 9 | import org.apache.lucene.analysis.Tokenizer; 10 | import org.apache.lucene.analysis.core.KeywordTokenizer; 11 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 12 | import org.junit.jupiter.api.BeforeEach; 13 | import org.junit.jupiter.api.Test; 14 | 15 | public class ChosungFilterTest { 16 | 17 | private Analyzer analyzer; 18 | 19 | private String getChosungString(String text) throws IOException { 20 | TokenStream stream = analyzer.tokenStream("field", text); 21 | 22 | CharTermAttribute charAttr = stream.addAttribute(CharTermAttribute.class); 23 | 24 | stream.reset(); 25 | 26 | List tokenStrs = new ArrayList<>(); 27 | while (stream.incrementToken()) { 28 | tokenStrs.add(charAttr.toString()); 29 | } 30 | stream.close(); 31 | 32 | String result = String.join(" ", tokenStrs); 33 | System.out.println(result); 34 | 35 | return result; 36 | } 37 | 38 | @BeforeEach 39 | public void setup() { 40 | analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { 41 | @Override 42 | protected TokenStreamComponents createComponents(String fieldName) { 43 | Tokenizer tokenizer = new KeywordTokenizer(); 44 | TokenStream tokenFilter = new ChosungFilter(tokenizer); 45 | return new TokenStreamComponents(tokenizer, tokenFilter); 46 | } 47 | }; 48 | } 49 | 50 | @Test 51 | void testOnlyHangul() throws IOException { 52 | assertEquals("ㅇㄹㅅㅌ ㅅㅊ", getChosungString("엘라스틱 서치")); 53 | } 54 | 55 | @Test 56 | void testContainsEnglish() throws IOException { 57 | assertEquals("ㅇㄹㅅㅌ search", getChosungString("엘라스틱 search")); 58 | } 59 | 60 | @Test 61 | void testContainsSpecialCharacters() throws IOException { 62 | assertEquals("([]ㅇㄹㅅㅌ!@#ㅅㅊ", getChosungString("([]엘라스틱!@#서치")); 63 | } 64 | 65 | @Test 66 | void testReturnOriginalJamoIfContainsJamo() throws IOException { 67 | assertEquals("ㅇㄹㅅㅌㅣㄱ ㅅㅓㅊ", getChosungString("엘라스ㅌㅣㄱ ㅅㅓ치")); 68 | } 69 | 70 | @Test 71 | void testContainsStacking() throws IOException { 72 | assertEquals("ㄱㅈㄷ", getChosungString("값지다")); 73 | assertEquals("ㅇㄷ", getChosungString("앉다")); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/test/java/com/yaincoding/hanhinsam/filters/jamo/JamoDecomposeFilterTest.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.filters.jamo; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import java.io.IOException; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import org.apache.lucene.analysis.Analyzer; 8 | import org.apache.lucene.analysis.TokenStream; 9 | import org.apache.lucene.analysis.Tokenizer; 10 | import org.apache.lucene.analysis.core.KeywordTokenizer; 11 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 12 | import org.junit.jupiter.api.BeforeEach; 13 | import org.junit.jupiter.api.Test; 14 | 15 | public class JamoDecomposeFilterTest { 16 | 17 | private Analyzer analyzer; 18 | 19 | private String getChosungString(String text) throws IOException { 20 | TokenStream stream = analyzer.tokenStream("field", text); 21 | 22 | CharTermAttribute charAttr = stream.addAttribute(CharTermAttribute.class); 23 | 24 | stream.reset(); 25 | 26 | List tokenStrs = new ArrayList<>(); 27 | while (stream.incrementToken()) { 28 | tokenStrs.add(charAttr.toString()); 29 | } 30 | stream.close(); 31 | 32 | String result = String.join(" ", tokenStrs); 33 | System.out.println(result); 34 | 35 | return result; 36 | } 37 | 38 | @BeforeEach 39 | public void setup() { 40 | analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { 41 | @Override 42 | protected TokenStreamComponents createComponents(String fieldName) { 43 | Tokenizer tokenizer = new KeywordTokenizer(); 44 | TokenStream tokenFilter = new JamoDecomposeFilter(tokenizer); 45 | return new TokenStreamComponents(tokenizer, tokenFilter); 46 | } 47 | }; 48 | } 49 | 50 | @Test 51 | void testOnlyHangul() throws IOException { 52 | assertEquals("ㅇㅔㄹㄹㅏㅅㅡㅌㅣㄱ ㅅㅓㅊㅣ", getChosungString("엘라스틱 서치")); 53 | } 54 | 55 | @Test 56 | void testContainsEnglish() throws IOException { 57 | assertEquals("ㅇㅔㄹㄹㅏㅅㅡㅌㅣㄱ search", getChosungString("엘라스틱 search")); 58 | } 59 | 60 | @Test 61 | void testContainsSpecialCharacters() throws IOException { 62 | assertEquals("ㅇㅔㄹㄹㅏㅅㅡㅌㅣㄱ!@# ㅅㅓㅊㅣ(*&^$%", getChosungString("엘라스틱!@# 서치(*&^$%")); 63 | } 64 | 65 | @Test 66 | void testContainsJamo() throws IOException { 67 | assertEquals("ㅇㅔㄹㄹㅏㅅㅡㅌㅣㄱ ㅅㅓㅊㅣ", getChosungString("엘라스ㅌㅣㄱ ㅅㅓ치")); 68 | } 69 | 70 | @Test 71 | void testContainsStacking() throws IOException { 72 | assertEquals("ㄱㅏㅂㅅㅈㅣㄷㅏ", getChosungString("값지다")); 73 | assertEquals("ㅇㅏㄴㅈㄷㅏ", getChosungString("앉다")); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/hangul_util/HanEngUtil.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.hangul_util; 2 | 3 | public class HanEngUtil { 4 | 5 | private char[] engHanKeyMap; 6 | private String[] hanEngKeyMap; 7 | 8 | private final JamoUtil jamoUtil; 9 | 10 | public HanEngUtil() { 11 | jamoUtil = new JamoUtil(); 12 | initialize(); 13 | } 14 | 15 | private void initialize() { 16 | initializeEngKeyToHanKeyMap(); 17 | initializeHanKeyToEngKeyMap(); 18 | } 19 | 20 | private void initializeEngKeyToHanKeyMap() { 21 | engHanKeyMap = new char['z' + 1]; 22 | engHanKeyMap['a'] = 'ㅁ'; 23 | engHanKeyMap['A'] = 'ㅁ'; 24 | engHanKeyMap['b'] = 'ㅠ'; 25 | engHanKeyMap['B'] = 'ㅠ'; 26 | engHanKeyMap['c'] = 'ㅊ'; 27 | engHanKeyMap['C'] = 'ㅊ'; 28 | engHanKeyMap['d'] = 'ㅇ'; 29 | engHanKeyMap['D'] = 'ㅇ'; 30 | engHanKeyMap['e'] = 'ㄷ'; 31 | engHanKeyMap['E'] = 'ㄸ'; 32 | engHanKeyMap['f'] = 'ㄹ'; 33 | engHanKeyMap['F'] = 'ㄹ'; 34 | engHanKeyMap['g'] = 'ㅎ'; 35 | engHanKeyMap['G'] = 'ㅎ'; 36 | engHanKeyMap['h'] = 'ㅗ'; 37 | engHanKeyMap['H'] = 'ㅗ'; 38 | engHanKeyMap['i'] = 'ㅑ'; 39 | engHanKeyMap['I'] = 'ㅑ'; 40 | engHanKeyMap['j'] = 'ㅓ'; 41 | engHanKeyMap['J'] = 'ㅓ'; 42 | engHanKeyMap['k'] = 'ㅏ'; 43 | engHanKeyMap['K'] = 'ㅏ'; 44 | engHanKeyMap['l'] = 'ㅣ'; 45 | engHanKeyMap['L'] = 'ㅣ'; 46 | engHanKeyMap['m'] = 'ㅡ'; 47 | engHanKeyMap['M'] = 'ㅡ'; 48 | engHanKeyMap['n'] = 'ㅜ'; 49 | engHanKeyMap['N'] = 'ㅜ'; 50 | engHanKeyMap['o'] = 'ㅐ'; 51 | engHanKeyMap['O'] = 'ㅒ'; 52 | engHanKeyMap['p'] = 'ㅔ'; 53 | engHanKeyMap['P'] = 'ㅖ'; 54 | engHanKeyMap['q'] = 'ㅂ'; 55 | engHanKeyMap['Q'] = 'ㅃ'; 56 | engHanKeyMap['r'] = 'ㄱ'; 57 | engHanKeyMap['R'] = 'ㄲ'; 58 | engHanKeyMap['s'] = 'ㄴ'; 59 | engHanKeyMap['S'] = 'ㄴ'; 60 | engHanKeyMap['t'] = 'ㅅ'; 61 | engHanKeyMap['T'] = 'ㅆ'; 62 | engHanKeyMap['u'] = 'ㅕ'; 63 | engHanKeyMap['U'] = 'ㅕ'; 64 | engHanKeyMap['v'] = 'ㅍ'; 65 | engHanKeyMap['V'] = 'ㅍ'; 66 | engHanKeyMap['w'] = 'ㅈ'; 67 | engHanKeyMap['W'] = 'ㅉ'; 68 | engHanKeyMap['x'] = 'ㅌ'; 69 | engHanKeyMap['X'] = 'ㅌ'; 70 | engHanKeyMap['y'] = 'ㅛ'; 71 | engHanKeyMap['Y'] = 'ㅛ'; 72 | engHanKeyMap['z'] = 'ㅋ'; 73 | engHanKeyMap['Z'] = 'ㅋ'; 74 | } 75 | 76 | private void initializeHanKeyToEngKeyMap() { 77 | hanEngKeyMap = new String['ㅣ' + 1]; 78 | 79 | for (int i = 0; i < hanEngKeyMap.length; i++) { 80 | hanEngKeyMap[i] = String.valueOf((char) i); 81 | } 82 | 83 | hanEngKeyMap['ㄱ'] = "r"; 84 | hanEngKeyMap['ㄲ'] = "R"; 85 | hanEngKeyMap['ㄴ'] = "s"; 86 | hanEngKeyMap['ㄷ'] = "e"; 87 | hanEngKeyMap['ㄸ'] = "E"; 88 | hanEngKeyMap['ㄹ'] = "f"; 89 | hanEngKeyMap['ㅁ'] = "a"; 90 | hanEngKeyMap['ㅂ'] = "q"; 91 | hanEngKeyMap['ㅃ'] = "Q"; 92 | hanEngKeyMap['ㅅ'] = "t"; 93 | hanEngKeyMap['ㅆ'] = "T"; 94 | hanEngKeyMap['ㅇ'] = "d"; 95 | hanEngKeyMap['ㅈ'] = "w"; 96 | hanEngKeyMap['ㅉ'] = "W"; 97 | hanEngKeyMap['ㅊ'] = "c"; 98 | hanEngKeyMap['ㅋ'] = "z"; 99 | hanEngKeyMap['ㅌ'] = "x"; 100 | hanEngKeyMap['ㅍ'] = "v"; 101 | hanEngKeyMap['ㅎ'] = "g"; 102 | hanEngKeyMap['ㅏ'] = "k"; 103 | hanEngKeyMap['ㅐ'] = "o"; 104 | hanEngKeyMap['ㅑ'] = "i"; 105 | hanEngKeyMap['ㅒ'] = "O"; 106 | hanEngKeyMap['ㅓ'] = "j"; 107 | hanEngKeyMap['ㅔ'] = "p"; 108 | hanEngKeyMap['ㅕ'] = "u"; 109 | hanEngKeyMap['ㅖ'] = "P"; 110 | hanEngKeyMap['ㅗ'] = "h"; 111 | hanEngKeyMap['ㅛ'] = "y"; 112 | hanEngKeyMap['ㅜ'] = "n"; 113 | hanEngKeyMap['ㅠ'] = "b"; 114 | hanEngKeyMap['ㅡ'] = "m"; 115 | hanEngKeyMap['ㅣ'] = "l"; 116 | } 117 | 118 | public String transformHangulToEnglish(String hangul) { 119 | hangul = jamoUtil.decompose(hangul, true); 120 | 121 | StringBuilder englishBuilder = new StringBuilder(); 122 | for (char ch : hangul.toCharArray()) { 123 | if (isHangulCharacter(ch)) { 124 | englishBuilder.append(hanEngKeyMap[ch]); 125 | } else { 126 | englishBuilder.append(ch); 127 | } 128 | } 129 | 130 | return englishBuilder.toString(); 131 | } 132 | 133 | public String transformEnglishToHangul(String english) { 134 | StringBuilder hangulBuilder = new StringBuilder(); 135 | for (char ch : english.toCharArray()) { 136 | if (isEnglishCharacter(ch)) { 137 | hangulBuilder.append(engHanKeyMap[ch]); 138 | } else { 139 | hangulBuilder.append(ch); 140 | } 141 | } 142 | 143 | return jamoUtil.compose(hangulBuilder.toString()); 144 | } 145 | 146 | private boolean isHangulCharacter(char ch) { 147 | return ch >= 'ㄱ' && ch <= 'ㅣ'; 148 | } 149 | 150 | private boolean isEnglishCharacter(char ch) { 151 | return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MSYS* | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | 86 | # Determine the Java command to use to start the JVM. 87 | if [ -n "$JAVA_HOME" ] ; then 88 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 89 | # IBM's JDK on AIX uses strange locations for the executables 90 | JAVACMD="$JAVA_HOME/jre/sh/java" 91 | else 92 | JAVACMD="$JAVA_HOME/bin/java" 93 | fi 94 | if [ ! -x "$JAVACMD" ] ; then 95 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 96 | 97 | Please set the JAVA_HOME variable in your environment to match the 98 | location of your Java installation." 99 | fi 100 | else 101 | JAVACMD="java" 102 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 103 | 104 | Please set the JAVA_HOME variable in your environment to match the 105 | location of your Java installation." 106 | fi 107 | 108 | # Increase the maximum file descriptors if we can. 109 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 110 | MAX_FD_LIMIT=`ulimit -H -n` 111 | if [ $? -eq 0 ] ; then 112 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 113 | MAX_FD="$MAX_FD_LIMIT" 114 | fi 115 | ulimit -n $MAX_FD 116 | if [ $? -ne 0 ] ; then 117 | warn "Could not set maximum file descriptor limit: $MAX_FD" 118 | fi 119 | else 120 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 121 | fi 122 | fi 123 | 124 | # For Darwin, add options to specify how the application appears in the dock 125 | if $darwin; then 126 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 127 | fi 128 | 129 | # For Cygwin or MSYS, switch paths to Windows format before running java 130 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 131 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 132 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 133 | 134 | JAVACMD=`cygpath --unix "$JAVACMD"` 135 | 136 | # We build the pattern for arguments to be converted via cygpath 137 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 138 | SEP="" 139 | for dir in $ROOTDIRSRAW ; do 140 | ROOTDIRS="$ROOTDIRS$SEP$dir" 141 | SEP="|" 142 | done 143 | OURCYGPATTERN="(^($ROOTDIRS))" 144 | # Add a user-defined pattern to the cygpath arguments 145 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 146 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 147 | fi 148 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 149 | i=0 150 | for arg in "$@" ; do 151 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 152 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 153 | 154 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 155 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 156 | else 157 | eval `echo args$i`="\"$arg\"" 158 | fi 159 | i=`expr $i + 1` 160 | done 161 | case $i in 162 | 0) set -- ;; 163 | 1) set -- "$args0" ;; 164 | 2) set -- "$args0" "$args1" ;; 165 | 3) set -- "$args0" "$args1" "$args2" ;; 166 | 4) set -- "$args0" "$args1" "$args2" "$args3" ;; 167 | 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 168 | 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 169 | 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 170 | 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 171 | 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 172 | esac 173 | fi 174 | 175 | # Escape application args 176 | save () { 177 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 178 | echo " " 179 | } 180 | APP_ARGS=`save "$@"` 181 | 182 | # Collect all arguments for the java command, following the shell quoting and substitution rules 183 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 184 | 185 | exec "$JAVACMD" "$@" 186 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | ## 1. 소개 2 | 3 | 엘라스틱서치(ElasticSearch)에서 한글 검색 확장 기능을 위해 만든 토큰 필터 플러그인입니다. 4 | 5 | 이 프로젝트는 elasticsearch 8.1.2으로 작성되었습니다. 버전 차이가 크지 않다면 큰 상관이 없을 수 있지만 혹시 버전 차이로 빌드나 동작에 문제가 발생한다면 `build.gradle`에서 elasticsearch와 org.elasticsearch.gradle:build-tools 버전을 수정해야 합니다. 버전 수정 후 라이브러리 버전 차이에 따른 문제가 발생하면 해당 부분의 코드를 수정 후 빌드하여 사용해야 합니다. 6 | 7 | *빌드 참고 사항* 8 | java17을 사용해서 빌드 되었습니다. java17의 경우 gradle 7.3버전부터 완전히 지원합니다. `build.gradle`에 명시된 elasticsearch와 다른 버전을 사용할 경우 java, gradle 버전이 맞지 않으면 빌드에 실패할 수 있습니다. 9 | Unsupported class file major version 에러가 날 경우, 에러 메시지에 따라서 java 버전을 수정하고 `settings.gradle`에서 gradle 버전을 변경해주면 됩니다. 10 | 11 | 12 |
13 | 14 | 해당 `Readme.md` 후반부의 예제는 설명을 위해 매우 간단한 case들에 적용하였으며 실제 검색에 적용하기 위해서는 더 복잡한 필드 매핑 설계 및 커스텀 분석기 디자인이 필요합니다. 15 | 아래 링크는 이 플러그인을 적용하여 만든 도서 검색 API 프로젝트 github repository 입니다. 16 | [https://github.com/yaincoding/yacobooks-api](https://github.com/yaincoding/yacobooks-api) 17 | 18 |
19 | 20 | 지원하는 토큰 필터 종류는 아래와 같습니다. 21 | 22 | **1) 자모 분리 필터(hanhinsam_jamo)** 23 | 24 | 한글 토큰 문자열을 자음/모음 단위로 분리해줍니다. 한글은 유니코드 특성상 초성/중성/종성이 결합된 형태여서 엘라스틱서치 Term Suggest API를 그대로 사용하기에는 무리가 있습니다. 따라서 자모단위로 분리된 필드를 추가적으로 색인하고 해당 필드를 통해 오타교정을 진행합니다. 25 | 26 | ex) 엘라스틱서치 → ㅇㅔㄹㄹㅏㅅㅡㅌㅣㄱㅅㅓㅊㅣ 27 | 28 | **2) 초성 필터(hanhinsam_chosung)** 29 | 30 | 한글 토큰 문자열의 초성을 추출합니다. 초성 검색에 사용됩니다. 31 | 32 | ex) 엘라스틱서치 → ㅇㄹㅅㅌㅅㅊ 33 | 34 | **3) 한 → 영 변환 필터(hanhinsam_hantoeng)** 35 | 36 | 한글 토큰 문자열을 자모 단위로 분해한 후 키보드 배열의 매칭되는 영어 문자열로 변환합니다. 37 | 38 | ex) 딤ㄴ샻ㄴㄷㅁㄱ초 → elasticsearch 39 | 40 | **4) 영 → 한 변환 필터(hanhinsam_engtohan)** 41 | 42 | 영어 토큰 문자열을 키보드 배열에 매칭되는 한글 문자열로 변환합니다. 43 | 44 | ex) dpffktmxlrtjcl → 엘라스틱서치 45 | 46 |
47 | 48 | ## 2. 빌드 및 설치 49 | 50 | #### 2-1. 빌드 51 | 52 | 프로젝트 clone 후 루트 디렉터리에서 gradle 빌드 실행 53 | 54 | ``` shell 55 | ./gradlew clean assemble 56 | ``` 57 | 58 |
59 | 60 | #### 2-2. 플러그인 zip 압축파일 생성 확인 61 | 62 | build/distributions/hanhinsam-0.1.zip 63 | 64 | ![hanhinsam_zip](./images/hanhinsam_zip.png) 65 | 66 | 1. jar 라이브러리 67 | 2. plugin-descriptor.properties 68 | 69 |
70 | 71 | #### 2-3. 엘라스틱서치에 플러그인 설치 72 | 73 | sudo 권한으로 elasticsearch/bin/plugin-install을 실행하여 플러그인을 설치합니다. 74 | 75 | ``` shell 76 | sudo bin/elasticsearch-plugin install file:// 77 | ``` 78 | 79 | ![hanhinsam_install](./images/hanhinsam_install.png) 80 | 81 |
82 | 83 | docker-elk 등 컨테이너에서 엘라스틱서치를 실행하는 경우 Dockerfile에서 이미지 빌드시에 플러그인을 설치하도록 작성합니다. 84 | 85 | ``` yml 86 | ... 87 | 88 | COPY plugins/hanhinsam-0.1.zip /plugins/hanhinsam.zip 89 | RUN elasticsearch-plugin install file:///plugins/hanhinsam.zip 90 | ``` 91 | 92 | #### 2-4. 엘라스틱서치 재시작 93 | 94 | 엘라스틱서치를 실행중이었다면 재시작해야 플러그인이 적용됩니다. 95 | 96 |
97 | 98 | ## 3. 예제 99 | 100 | **1) 오타 교정** 101 | 102 | 특정 필드 검색시 오타교정을 할 계획이라면 자모 분리한 문자열을 색인하기 위한 필드를 추가적으로 생성합니다. 해당 필드는 분석이 필요하므로 `text` 타입입니다. 이 필드를 분석하기 위한 분석기를 만들고 필터에 `jamo_filter`를 적용합니다. Term Suggest API 를 사용하면 해당 필드를 통해 오타 교정이 가능합니다. 103 | 104 | ``` javascript 105 | //인덱스 생성 106 | PUT /spell_test 107 | { 108 | "settings": { 109 | "number_of_shards": 1, 110 | "number_of_replicas": 0, 111 | "index.max_ngram_diff": 10, 112 | "analysis": { 113 | "analyzer": { 114 | "jamo_analyzer": { 115 | "type": "custom", 116 | "tokenizer": "standard", 117 | "filter": [ 118 | "lowercase", 119 | "hanhinsam_jamo" 120 | ] 121 | } 122 | } 123 | } 124 | }, 125 | "mappings": { 126 | "properties": { 127 | "name": { 128 | "type": "keyword", 129 | "copy_to": ["name_jamo"] 130 | }, 131 | "name_jamo": { 132 | "type": "text", 133 | "analyzer": "jamo_analyzer" 134 | } 135 | } 136 | } 137 | } 138 | 139 | //데이터 색인 140 | POST /_bulk 141 | { "index" : { "_index" : "spell_test", "_id" : "1" } } 142 | { "name" : "손오공" } 143 | { "index" : { "_index" : "spell_test", "_id" : "2" } } 144 | { "name" : "엘라스틱서치" } 145 | { "index" : { "_index" : "spell_test", "_id" : "3" } } 146 | { "name" : "아메리카노" } 147 | 148 | //오타교정 검색 테스트 149 | POST /spell_test/_search 150 | { 151 | "suggest": { 152 | "name_suggest": { 153 | "text": "아메리치노", 154 | "term": { 155 | "field": "name_jamo", 156 | "max_edits": 2 157 | } 158 | } 159 | } 160 | } 161 | ``` 162 | 163 | ``` javascript 164 | //응답결과 165 | { 166 | "took" : 7, 167 | "timed_out" : false, 168 | "_shards" : { 169 | "total" : 1, 170 | "successful" : 1, 171 | "skipped" : 0, 172 | "failed" : 0 173 | }, 174 | "hits" : { 175 | "total" : { 176 | "value" : 0, 177 | "relation" : "eq" 178 | }, 179 | "max_score" : null, 180 | "hits" : [ ] 181 | }, 182 | "suggest" : { 183 | "name_suggest" : [ 184 | { 185 | "text" : "ㅇㅏㅁㅔㄹㅣㅊㅣㄴㅗ", 186 | "offset" : 0, 187 | "length" : 5, 188 | "options" : [ 189 | { 190 | "text" : "ㅇㅏㅁㅔㄹㅣㅋㅏㄴㅗ", 191 | "score" : 0.8, 192 | "freq" : 1 193 | } 194 | ] 195 | } 196 | ] 197 | } 198 | } 199 | ``` 200 | 201 | **2) 한/영 변환 오타 교정** 202 | 203 | 한/영 변환한 문자열을 색인하기 위한 필드를 각각 추가적으로 생성합니다. 해당 필드는 분석이 필요하기때문에 `text` 타입입니다. 한/영 변환 필터가 적용된 분석기를 만들고 각 분석기를 한/영 변환 필드의 `search_analyzer`로 지정합니다. 204 | 205 | ``` javascript 206 | //인덱스 생성 207 | PUT /haneng_test 208 | { 209 | "settings": { 210 | "number_of_shards": 1, 211 | "number_of_replicas": 0, 212 | "index.max_ngram_diff": 10, 213 | "analysis": { 214 | "analyzer": { 215 | "engtohan_analyzer": { 216 | "type": "custom", 217 | "tokenizer": "standard", 218 | "filter": [ 219 | "lowercase", 220 | "hanhinsam_engtohan" 221 | ] 222 | }, 223 | "hantoeng_analyzer": { 224 | "type": "custom", 225 | "tokenizer": "standard", 226 | "filter": [ 227 | "lowercase", 228 | "hanhinsam_hantoeng" 229 | ] 230 | } 231 | } 232 | } 233 | }, 234 | "mappings": { 235 | "properties": { 236 | "name": { 237 | "type": "keyword", 238 | "copy_to": ["name_hantoeng", "name_engtohan"] 239 | }, 240 | "name_hantoeng": { 241 | "type": "text", 242 | "search_analyzer": "hantoeng_analyzer" 243 | }, 244 | "name_engtohan": { 245 | "type": "text", 246 | "search_analyzer": "engtohan_analyzer" 247 | } 248 | } 249 | } 250 | } 251 | 252 | //데이터 색인 253 | POST /_bulk 254 | { "index" : { "_index" : "haneng_test", "_id" : "1" } } 255 | { "name" : "손오공" } 256 | { "index" : { "_index" : "haneng_test", "_id" : "2" } } 257 | { "name" : "elastic" } 258 | { "index" : { "_index" : "haneng_test", "_id" : "3" } } 259 | { "name" : "아메리카노" } 260 | 261 | //한영 변환 오타교정 검색 테스트 262 | POST /haneng_test/_search 263 | { 264 | "query": { 265 | "match": { 266 | "name_hantoeng": "딤ㄴ샻" 267 | } 268 | } 269 | } 270 | 271 | //영한 변환 오타교정 검색 테스트 272 | POST /haneng_test/_search 273 | { 274 | "query": { 275 | "match": { 276 | "name_engtohan": "thsdhrhd" 277 | } 278 | } 279 | } 280 | ``` 281 | 282 | ``` javascript 283 | //영한 변환 오타교정 검색 테스트 응답결과 284 | { 285 | "took" : 2, 286 | "timed_out" : false, 287 | "_shards" : { 288 | "total" : 1, 289 | "successful" : 1, 290 | "skipped" : 0, 291 | "failed" : 0 292 | }, 293 | "hits" : { 294 | "total" : { 295 | "value" : 1, 296 | "relation" : "eq" 297 | }, 298 | "max_score" : 0.9808291, 299 | "hits" : [ 300 | { 301 | "_index" : "haneng_test", 302 | "_type" : "_doc", 303 | "_id" : "2", 304 | "_score" : 0.9808291, 305 | "_source" : { 306 | "name" : "elastic" 307 | } 308 | } 309 | ] 310 | } 311 | } 312 | 313 | //한영 변환 오타교정 검색 테스트 응답결과 314 | { 315 | "took" : 2, 316 | "timed_out" : false, 317 | "_shards" : { 318 | "total" : 1, 319 | "successful" : 1, 320 | "skipped" : 0, 321 | "failed" : 0 322 | }, 323 | "hits" : { 324 | "total" : { 325 | "value" : 1, 326 | "relation" : "eq" 327 | }, 328 | "max_score" : 0.9808291, 329 | "hits" : [ 330 | { 331 | "_index" : "haneng_test", 332 | "_type" : "_doc", 333 | "_id" : "1", 334 | "_score" : 0.9808291, 335 | "_source" : { 336 | "name" : "손오공" 337 | } 338 | } 339 | ] 340 | } 341 | } 342 | ``` 343 | 344 | **3) 초성 검색** 345 | 346 | 초성이 분리된 문자열을 색인하기 위한 `text` 타입의 필드를 추가적으로 생성하고 초성 필터가 적용된 분석기를 만듭니다. 이후 해당 필드를 통해 초성 검색이 가능합니다. 347 | 348 | ``` javascript 349 | //인덱스 생성 350 | PUT /chosung_test 351 | { 352 | "settings": { 353 | "number_of_shards": 1, 354 | "number_of_replicas": 0, 355 | "index.max_ngram_diff": 10, 356 | "analysis": { 357 | "analyzer": { 358 | "chosung_analyzer": { 359 | "type": "custom", 360 | "tokenizer": "standard", 361 | "filter": [ 362 | "lowercase", 363 | "hanhinsam_chosung" 364 | ] 365 | } 366 | } 367 | } 368 | }, 369 | "mappings": { 370 | "properties": { 371 | "name": { 372 | "type": "keyword", 373 | "copy_to": ["name_chosung"] 374 | }, 375 | "name_chosung": { 376 | "type": "text", 377 | "analyzer": "chosung_analyzer" 378 | } 379 | } 380 | } 381 | } 382 | 383 | //데이터 색인 384 | POST /_bulk 385 | { "index" : { "_index" : "chosung_test", "_id" : "2" } } 386 | { "name" : "엘라스틱서치" } 387 | { "index" : { "_index" : "chosung_test", "_id" : "3" } } 388 | { "name" : "아메리카노" } 389 | 390 | //초성검색 테스트 391 | POST /chosung_test/_search 392 | { 393 | "query": { 394 | "match": { 395 | "name_chosung": "ㅇㄹㅅㅌㅅㅊ" 396 | } 397 | } 398 | } 399 | ``` 400 | 401 | ``` javascript 402 | //응답결과 403 | { 404 | "took" : 1, 405 | "timed_out" : false, 406 | "_shards" : { 407 | "total" : 1, 408 | "successful" : 1, 409 | "skipped" : 0, 410 | "failed" : 0 411 | }, 412 | "hits" : { 413 | "total" : { 414 | "value" : 1, 415 | "relation" : "eq" 416 | }, 417 | "max_score" : 0.6931471, 418 | "hits" : [ 419 | { 420 | "_index" : "chosung_test", 421 | "_type" : "_doc", 422 | "_id" : "2", 423 | "_score" : 0.6931471, 424 | "_source" : { 425 | "name" : "엘라스틱서치" 426 | } 427 | } 428 | ] 429 | } 430 | } 431 | ``` 432 | 433 | **4) 자동완성** 434 | 435 | 자동완성을 위한 `text`타입의 필드를 추가적으로 생성합니다. 색인 분석기는 ngram 토크나이저를 통해 부분 문자열이 같이 색인되도록 합니다. 검색 분석기에는 `jamo_filter`만 적용합니다. 이후 분석을 위해 추가 생성한 필드를 통해 부분일치를 통한 검색이 가능하며 이 기능을 통해 서비스에서는 자동완성 기능을 구현할 수 있게됩니다. 436 | 437 | ``` javascript 438 | //인덱스 생성 439 | PUT /ac_test 440 | { 441 | "settings": { 442 | "number_of_shards": 1, 443 | "number_of_replicas": 0, 444 | "index.max_ngram_diff": 30, 445 | "analysis": { 446 | "filter": { 447 | "ngram_filter": { 448 | "type": "ngram", 449 | "min_gram": 1, 450 | "max_gram": 20 451 | } 452 | }, 453 | "analyzer": { 454 | "jamo_analyzer": { 455 | "type": "custom", 456 | "tokenizer": "standard", 457 | "filter": [ 458 | "lowercase", 459 | "hanhinsam_jamo" 460 | ] 461 | }, 462 | "ngram_jamo_analyzer": { 463 | "type": "custom", 464 | "tokenizer": "standard", 465 | "filter": [ 466 | "lowercase", 467 | "hanhinsam_jamo", 468 | "ngram_filter" 469 | ] 470 | } 471 | } 472 | } 473 | }, 474 | "mappings": { 475 | "properties": { 476 | "name": { 477 | "type": "keyword", 478 | "copy_to": "name_ngram" 479 | }, 480 | "name_ngram": { 481 | "type": "text", 482 | "analyzer": "ngram_jamo_analyzer", 483 | "search_analyzer": "jamo_analyzer" 484 | } 485 | } 486 | } 487 | } 488 | 489 | //데이터 색인 490 | POST /_bulk 491 | { "index" : { "_index" : "ac_test", "_id" : "1" } } 492 | { "name" : "손오공" } 493 | { "index" : { "_index" : "ac_test", "_id" : "2" } } 494 | { "name" : "elastic" } 495 | { "index" : { "_index" : "ac_test", "_id" : "3" } } 496 | { "name" : "아메리카노" } 497 | 498 | //자동완성 검색 테스트 499 | POST /ac_test/_search 500 | { 501 | "query": { 502 | "match": { 503 | "name_ngram": "아멜" 504 | } 505 | } 506 | } 507 | ``` 508 | 509 | ``` javascript 510 | //응답결과 511 | { 512 | "took" : 1, 513 | "timed_out" : false, 514 | "_shards" : { 515 | "total" : 1, 516 | "successful" : 1, 517 | "skipped" : 0, 518 | "failed" : 0 519 | }, 520 | "hits" : { 521 | "total" : { 522 | "value" : 1, 523 | "relation" : "eq" 524 | }, 525 | "max_score" : 1.631392, 526 | "hits" : [ 527 | { 528 | "_index" : "ac_test", 529 | "_type" : "_doc", 530 | "_id" : "3", 531 | "_score" : 1.631392, 532 | "_source" : { 533 | "name" : "아메리카노" 534 | } 535 | } 536 | ] 537 | } 538 | } 539 | ``` -------------------------------------------------------------------------------- /src/main/java/com/yaincoding/hanhinsam/hangul_util/JamoUtil.java: -------------------------------------------------------------------------------- 1 | package com.yaincoding.hanhinsam.hangul_util; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | 7 | public class JamoUtil { 8 | 9 | // 초성 19개 10 | private final Map CHOSUNG_MAP = new HashMap<>(19); 11 | private final char[] CHOSUNG_LIST = {'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 12 | 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'}; 13 | 14 | // 중성 21개 15 | private final Map JUNGSUNG_MAP = new HashMap<>(21); 16 | private final char[] JUNGSUNG_LIST = {'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 'ㅙ', 17 | 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ'}; 18 | 19 | // 종성 28개(없는 경우 - 공백 포함) 20 | private final Map JONGSUNG_MAP = new HashMap<>(28); 21 | private final char[] JONGSUNG_LIST = {' ', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 'ㄻ', 22 | 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'}; 23 | 24 | private final Map DUAL_JUNGSUNG_MAP = new HashMap<>(7); 25 | private final Map DUAL_JONGSUNG_MAP = new HashMap<>(9); 26 | 27 | private final Map LAYER_CHARACTER_MAP = new HashMap<>(16); 28 | 29 | 30 | public JamoUtil() { 31 | initialize(); 32 | } 33 | 34 | private void initialize() { 35 | 36 | initializeChoSungMap(); 37 | initializeJungSungMap(); 38 | initializeJongSungMap(); 39 | 40 | initializeDualJungSungMap(); 41 | initializeDualJongSungMap(); 42 | 43 | initializeLayerCharacterMap(); 44 | } 45 | 46 | 47 | private void initializeLayerCharacterMap() { 48 | 49 | LAYER_CHARACTER_MAP.put('ㄳ', "ㄱㅅ"); 50 | LAYER_CHARACTER_MAP.put('ㄵ', "ㄴㅈ"); 51 | LAYER_CHARACTER_MAP.put('ㄶ', "ㄴㅎ"); 52 | LAYER_CHARACTER_MAP.put('ㄺ', "ㄹㄱ"); 53 | LAYER_CHARACTER_MAP.put('ㄻ', "ㄹㅁ"); 54 | LAYER_CHARACTER_MAP.put('ㄼ', "ㄹㅂ"); 55 | LAYER_CHARACTER_MAP.put('ㄽ', "ㄹㅅ"); 56 | LAYER_CHARACTER_MAP.put('ㄾ', "ㄹㅌ"); 57 | LAYER_CHARACTER_MAP.put('ㅀ', "ㄹㅎ"); 58 | LAYER_CHARACTER_MAP.put('ㅄ', "ㅂㅅ"); 59 | LAYER_CHARACTER_MAP.put('ㅘ', "ㅗㅏ"); 60 | LAYER_CHARACTER_MAP.put('ㅙ', "ㅗㅐ"); 61 | LAYER_CHARACTER_MAP.put('ㅚ', "ㅗㅣ"); 62 | LAYER_CHARACTER_MAP.put('ㅝ', "ㅜㅓ"); 63 | LAYER_CHARACTER_MAP.put('ㅞ', "ㅜㅔ"); 64 | LAYER_CHARACTER_MAP.put('ㅟ', "ㅜㅣ"); 65 | LAYER_CHARACTER_MAP.put('ㅢ', "ㅡㅣ"); 66 | } 67 | 68 | private void initializeChoSungMap() { 69 | CHOSUNG_MAP.put('ㄱ', 0); 70 | CHOSUNG_MAP.put('ㄲ', 1); 71 | CHOSUNG_MAP.put('ㄴ', 2); 72 | CHOSUNG_MAP.put('ㄷ', 3); 73 | CHOSUNG_MAP.put('ㄸ', 4); 74 | CHOSUNG_MAP.put('ㄹ', 5); 75 | CHOSUNG_MAP.put('ㅁ', 6); 76 | CHOSUNG_MAP.put('ㅂ', 7); 77 | CHOSUNG_MAP.put('ㅃ', 8); 78 | CHOSUNG_MAP.put('ㅅ', 9); 79 | CHOSUNG_MAP.put('ㅆ', 10); 80 | CHOSUNG_MAP.put('ㅇ', 11); 81 | CHOSUNG_MAP.put('ㅈ', 12); 82 | CHOSUNG_MAP.put('ㅉ', 13); 83 | CHOSUNG_MAP.put('ㅊ', 14); 84 | CHOSUNG_MAP.put('ㅋ', 15); 85 | CHOSUNG_MAP.put('ㅌ', 16); 86 | CHOSUNG_MAP.put('ㅍ', 17); 87 | CHOSUNG_MAP.put('ㅎ', 18); 88 | } 89 | 90 | private void initializeJungSungMap() { 91 | 92 | JUNGSUNG_MAP.put('ㅏ', 0); 93 | JUNGSUNG_MAP.put('ㅐ', 1); 94 | JUNGSUNG_MAP.put('ㅑ', 2); 95 | JUNGSUNG_MAP.put('ㅒ', 3); 96 | JUNGSUNG_MAP.put('ㅓ', 4); 97 | JUNGSUNG_MAP.put('ㅔ', 5); 98 | JUNGSUNG_MAP.put('ㅕ', 6); 99 | JUNGSUNG_MAP.put('ㅖ', 7); 100 | JUNGSUNG_MAP.put('ㅗ', 8); 101 | JUNGSUNG_MAP.put('ㅘ', 9); 102 | JUNGSUNG_MAP.put('ㅙ', 10); 103 | JUNGSUNG_MAP.put('ㅚ', 11); 104 | JUNGSUNG_MAP.put('ㅛ', 12); 105 | JUNGSUNG_MAP.put('ㅜ', 13); 106 | JUNGSUNG_MAP.put('ㅝ', 14); 107 | JUNGSUNG_MAP.put('ㅞ', 15); 108 | JUNGSUNG_MAP.put('ㅟ', 16); 109 | JUNGSUNG_MAP.put('ㅠ', 17); 110 | JUNGSUNG_MAP.put('ㅡ', 18); 111 | JUNGSUNG_MAP.put('ㅢ', 19); 112 | JUNGSUNG_MAP.put('ㅣ', 20); 113 | } 114 | 115 | private void initializeJongSungMap() { 116 | 117 | JONGSUNG_MAP.put(' ', 0); 118 | JONGSUNG_MAP.put('ㄱ', 1); 119 | JONGSUNG_MAP.put('ㄲ', 2); 120 | JONGSUNG_MAP.put('ㄳ', 3); 121 | JONGSUNG_MAP.put('ㄴ', 4); 122 | JONGSUNG_MAP.put('ㄵ', 5); 123 | JONGSUNG_MAP.put('ㄶ', 6); 124 | JONGSUNG_MAP.put('ㄷ', 7); 125 | JONGSUNG_MAP.put('ㄹ', 8); 126 | JONGSUNG_MAP.put('ㄺ', 9); 127 | JONGSUNG_MAP.put('ㄻ', 10); 128 | JONGSUNG_MAP.put('ㄼ', 11); 129 | JONGSUNG_MAP.put('ㄽ', 12); 130 | JONGSUNG_MAP.put('ㄾ', 13); 131 | JONGSUNG_MAP.put('ㄿ', 14); 132 | JONGSUNG_MAP.put('ㅀ', 15); 133 | JONGSUNG_MAP.put('ㅁ', 16); 134 | JONGSUNG_MAP.put('ㅂ', 17); 135 | JONGSUNG_MAP.put('ㅄ', 18); 136 | JONGSUNG_MAP.put('ㅅ', 19); 137 | JONGSUNG_MAP.put('ㅆ', 20); 138 | JONGSUNG_MAP.put('ㅇ', 21); 139 | JONGSUNG_MAP.put('ㅈ', 22); 140 | JONGSUNG_MAP.put('ㅊ', 23); 141 | JONGSUNG_MAP.put('ㅋ', 24); 142 | JONGSUNG_MAP.put('ㅌ', 25); 143 | JONGSUNG_MAP.put('ㅍ', 26); 144 | JONGSUNG_MAP.put('ㅎ', 27); 145 | } 146 | 147 | private void initializeDualJungSungMap() { 148 | 149 | DUAL_JUNGSUNG_MAP.put("ㅜㅣ", "ㅟ"); 150 | DUAL_JUNGSUNG_MAP.put("ㅡㅣ", "ㅢ"); 151 | DUAL_JUNGSUNG_MAP.put("ㅗㅏ", "ㅘ"); 152 | DUAL_JUNGSUNG_MAP.put("ㅜㅓ", "ㅝ"); 153 | DUAL_JUNGSUNG_MAP.put("ㅜㅔ", "ㅞ"); 154 | DUAL_JUNGSUNG_MAP.put("ㅗㅣ", "ㅚ"); 155 | DUAL_JUNGSUNG_MAP.put("ㅗㅐ", "ㅙ"); 156 | } 157 | 158 | private void initializeDualJongSungMap() { 159 | 160 | DUAL_JONGSUNG_MAP.put("ㄱㅅ", "ㄳ"); 161 | DUAL_JONGSUNG_MAP.put("ㄴㅈ", "ㄵ"); 162 | DUAL_JONGSUNG_MAP.put("ㄴㅎ", "ㄶ"); 163 | DUAL_JONGSUNG_MAP.put("ㄹㄱ", "ㄺ"); 164 | DUAL_JONGSUNG_MAP.put("ㄹㅁ", "ㄻ"); 165 | DUAL_JONGSUNG_MAP.put("ㄹㅎ", "ㅀ"); 166 | DUAL_JONGSUNG_MAP.put("ㄹㅂ", "ㄼ"); 167 | DUAL_JONGSUNG_MAP.put("ㄹㅌ", "ㄾ"); 168 | DUAL_JONGSUNG_MAP.put("ㅂㅅ", "ㅄ"); 169 | } 170 | 171 | // 문자열 분해 172 | public String decompose(String hangulString, boolean delayer) { 173 | 174 | StringBuilder jasoBuilder = new StringBuilder(); 175 | for (char ch : hangulString.toCharArray()) { 176 | String jaso = decompose(ch); 177 | 178 | if (delayer) { 179 | jaso = deLayer(jaso); 180 | } 181 | 182 | jasoBuilder.append(jaso); 183 | } 184 | 185 | return jasoBuilder.toString(); 186 | } 187 | 188 | // 한 글자 분해 189 | private String decompose(char hangul) { 190 | 191 | if (hangul < '가' || hangul > '힣') 192 | return String.valueOf(hangul); 193 | 194 | StringBuilder jasoBuilder = new StringBuilder(); 195 | 196 | int GA = '\uAC00'; // '가' 197 | int diff = hangul - GA; 198 | 199 | final int chosungIndex = diff / (JUNGSUNG_MAP.size() * JONGSUNG_MAP.size()); 200 | jasoBuilder.append(CHOSUNG_LIST[chosungIndex]); 201 | 202 | final int jungsungIndex = 203 | (diff - ((JONGSUNG_MAP.size() * JUNGSUNG_MAP.size()) * chosungIndex)) 204 | / JONGSUNG_MAP.size(); 205 | jasoBuilder.append(JUNGSUNG_LIST[jungsungIndex]); 206 | 207 | final int jongsungIndex = 208 | (diff - ((JONGSUNG_MAP.size() * JUNGSUNG_MAP.size()) * chosungIndex) 209 | - (JONGSUNG_MAP.size() * jungsungIndex)); 210 | if (jongsungIndex > 0) { 211 | jasoBuilder.append(JONGSUNG_LIST[jongsungIndex]); 212 | } 213 | 214 | return jasoBuilder.toString(); 215 | } 216 | 217 | // 겹받침('ㄺ', 'ㄼ', ...등), 이중모음('ㅘ', 'ㅞ', ...등) 분리 218 | private String deLayer(String str) { 219 | 220 | StringBuilder jasoBuilder = new StringBuilder(); 221 | for (char ch : str.toCharArray()) { 222 | if (LAYER_CHARACTER_MAP.containsKey(ch)) { 223 | jasoBuilder.append(LAYER_CHARACTER_MAP.get(ch)); 224 | } else { 225 | jasoBuilder.append(ch); 226 | } 227 | } 228 | 229 | return jasoBuilder.toString(); 230 | } 231 | 232 | public String compose(String str) { 233 | 234 | int start = 0; 235 | StringBuilder result = new StringBuilder(); 236 | 237 | while (start < str.length()) { 238 | int result_char = 0xAC00; 239 | 240 | // 초성인지 체크 241 | if (CHOSUNG_MAP.containsKey(str.charAt(start))) { 242 | int chosungIdx = CHOSUNG_MAP.get(str.charAt(start)); 243 | result_char += chosungIdx * JUNGSUNG_MAP.size() * JONGSUNG_MAP.size(); 244 | start++; 245 | 246 | // 중성인지 체크 247 | if (start < str.length() && JUNGSUNG_MAP.containsKey(str.charAt(start))) { 248 | int jungsungIdx = JUNGSUNG_MAP.get(str.charAt(start)); 249 | result_char += JONGSUNG_MAP.size() * jungsungIdx; 250 | start++; 251 | 252 | // 종성인지 체크 253 | if (start < str.length() && JONGSUNG_MAP.containsKey(str.charAt(start)) 254 | && !(start + 1 < str.length() 255 | && JUNGSUNG_MAP.containsKey(str.charAt(start + 1)))) { 256 | int jongsungIdx = JONGSUNG_MAP.get(str.charAt(start)); 257 | result_char += jongsungIdx; 258 | if (str.charAt(start) != ' ') { 259 | start++; 260 | // 종성이 겹받침인지 체크 261 | if (start < str.length() && JONGSUNG_MAP.containsKey(str.charAt(start)) 262 | && !(start + 1 < str.length() 263 | && JUNGSUNG_MAP.containsKey(str.charAt(start + 1)))) { 264 | String dualJongSung = 265 | String.valueOf(str.charAt(start - 1)) + str.charAt(start); 266 | if (DUAL_JONGSUNG_MAP.containsKey(dualJongSung)) { 267 | result_char -= jongsungIdx; 268 | jongsungIdx = JONGSUNG_MAP 269 | .get(DUAL_JONGSUNG_MAP.get(dualJongSung).charAt(0)); 270 | result_char += jongsungIdx; 271 | start++; 272 | } 273 | } 274 | } 275 | 276 | } 277 | // 중성이 이중모음인지 체크 278 | else if (start < str.length() && JUNGSUNG_MAP.containsKey(str.charAt(start))) { 279 | String dualJungSung = 280 | String.valueOf(str.charAt(start - 1)) + str.charAt(start); 281 | if (DUAL_JUNGSUNG_MAP.containsKey(dualJungSung)) { 282 | result_char -= JONGSUNG_MAP.size() * jungsungIdx; 283 | jungsungIdx = 284 | JUNGSUNG_MAP.get(DUAL_JUNGSUNG_MAP.get(dualJungSung).charAt(0)); 285 | result_char += JONGSUNG_MAP.size() * jungsungIdx; 286 | start++; 287 | 288 | // 이중모음 이후 종성이 있는지 체크 289 | if (start < str.length() && JONGSUNG_MAP.containsKey(str.charAt(start)) 290 | && !(start + 1 < str.length() 291 | && JUNGSUNG_MAP.containsKey(str.charAt(start + 1)))) { 292 | int jongsungIdx = JONGSUNG_MAP.get(str.charAt(start)); 293 | result_char += jongsungIdx; 294 | start++; 295 | 296 | // 이중모음 + 겹받침 종성 체크 297 | if (start < str.length() 298 | && JONGSUNG_MAP.containsKey(str.charAt(start)) 299 | && !(start + 1 < str.length() && JUNGSUNG_MAP 300 | .containsKey(str.charAt(start + 1)))) { 301 | String dualJongSung = String.valueOf(str.charAt(start - 1)) 302 | + str.charAt(start); 303 | if (DUAL_JONGSUNG_MAP.containsKey(dualJongSung)) { 304 | result_char -= jongsungIdx; 305 | jongsungIdx = JONGSUNG_MAP 306 | .get(DUAL_JONGSUNG_MAP.get(dualJongSung).charAt(0)); 307 | result_char += jongsungIdx; 308 | start++; 309 | } 310 | } 311 | } 312 | } 313 | } 314 | 315 | result.append((char) result_char); 316 | } else { 317 | result.append(str.charAt(start - 1)); 318 | } 319 | } else if (JUNGSUNG_MAP.containsKey(str.charAt(start)) && (start + 1 < str.length() 320 | && JUNGSUNG_MAP.containsKey(str.charAt(start + 1)))) { 321 | String dualJungSung = String.valueOf(str.charAt(start)) + str.charAt(start + 1); 322 | if (DUAL_JUNGSUNG_MAP.containsKey(dualJungSung)) { 323 | result.append(DUAL_JUNGSUNG_MAP.get(dualJungSung)); 324 | start += 2; 325 | } else { 326 | result.append(str.charAt(start)); 327 | start++; 328 | } 329 | } else { 330 | result.append(str.charAt(start)); 331 | start++; 332 | } 333 | } 334 | 335 | return result.toString(); 336 | } 337 | 338 | public String chosung(String str) { 339 | StringBuilder chosungBuilder = new StringBuilder(); 340 | for (char ch : str.toCharArray()) { 341 | chosungBuilder.append(decompose(ch).charAt(0)); 342 | } 343 | 344 | return chosungBuilder.toString(); 345 | } 346 | } 347 | --------------------------------------------------------------------------------