├── Gemfile ├── .document ├── .travis.yml ├── .gitignore ├── Rakefile ├── lib ├── gimchi │ ├── patch_1.8.rb │ ├── char.rb │ └── pronouncer.rb └── gimchi.rb ├── test ├── helper.rb ├── romanization.yml ├── test_gimchi.rb └── pronunciation.yml ├── gimchi.gemspec ├── LICENSE.txt ├── CHANGELOG.rdoc ├── crawler └── crawler.rb ├── config └── default.yml ├── README.ko.md └── README.md /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gemspec 3 | -------------------------------------------------------------------------------- /.document: -------------------------------------------------------------------------------- 1 | lib/**/*.rb 2 | bin/* 3 | - 4 | features/**/*.feature 5 | LICENSE.txt 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - "1.8.7" 4 | - "1.9.2" 5 | - "1.9.3" 6 | - "2.0.0" 7 | - jruby-18mode # JRuby in 1.8 mode 8 | - jruby-19mode # JRuby in 1.9 mode 9 | - rbx-18mode 10 | - rbx-19mode 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require 'rake/testtask' 3 | Rake::TestTask.new(:test) do |test| 4 | test.libs << 'lib' << 'test' 5 | test.pattern = 'test/**/test_*.rb' 6 | test.verbose = true 7 | end 8 | 9 | task :default => :test 10 | -------------------------------------------------------------------------------- /lib/gimchi/patch_1.8.rb: -------------------------------------------------------------------------------- 1 | if RUBY_VERSION =~ /^1\.8\./ 2 | $KCODE = 'U' 3 | 4 | class Gimchi 5 | class << self 6 | private 7 | def str_length str 8 | str.scan(/./mu).length 9 | end 10 | end 11 | end#Gimchi 12 | end 13 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | $VERBOSE = true 2 | require 'rubygems' 3 | require 'bundler' 4 | begin 5 | Bundler.setup(:default, :development) 6 | rescue Bundler::BundlerError => e 7 | $stderr.puts e.message 8 | $stderr.puts "Run `bundle install` to install missing gems" 9 | exit e.status_code 10 | end 11 | require 'test/unit' 12 | 13 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 14 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 15 | require 'gimchi' 16 | 17 | class Test::Unit::TestCase 18 | end 19 | -------------------------------------------------------------------------------- /gimchi.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | 5 | Gem::Specification.new do |gem| 6 | gem.name = %q{gimchi} 7 | gem.version = "0.2.1" 8 | gem.authors = ["Junegunn Choi"] 9 | gem.email = ["junegunn.c@gmail.com"] 10 | gem.description = %q{A Ruby gem for Korean characters} 11 | gem.summary = %q{A Ruby gem for Korean characters} 12 | gem.homepage = "https://github.com/junegunn/gimchi" 13 | 14 | gem.files = `git ls-files`.split($/).reject { |f| f =~ %r[^viz/] } 15 | gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } 16 | gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) 17 | gem.require_paths = ["lib"] 18 | gem.license = "MIT" 19 | 20 | gem.add_development_dependency 'ansi' 21 | gem.add_development_dependency 'rake' 22 | end 23 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Junegunn Choi 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /CHANGELOG.rdoc: -------------------------------------------------------------------------------- 1 | === 0.2.0 / 2013/03/28 2 | * Completely backward-incompatable release :p 3 | 4 | === 0.1.9 / 2012/02/20 5 | * Bug fix: Failing test on 1.8 6 | * `Gimchi::Korean#dissect` renamed to `Gimchi::Korean#convert` 7 | * `Gimchi::korean#dissect` completely dissects the given String into an Array of Korean character components in String 8 | 9 | === 0.1.8 / 2011/12/02 10 | * Added `Gimchi::Korean#kchar` 11 | 12 | === 0.1.7 / 2011/10/17 13 | * Bug fix: Fixed reading 0 14 | * Bug fix: Failing test on 1.8 15 | * Bug fix: Fixed 'incompatible encoding regexp' problem on 1.9 16 | 17 | === 0.1.6 / 2011/04/13 18 | * More post substitution for read_number 19 | 20 | === 0.1.5 / 2011/04/12 21 | * Removed possible loss of precision during read_number 22 | * read_number extended to read exponential notation properly. 23 | 24 | === 0.1.4 / 2011/04/08 25 | * Minor improvement in romanization output. -y => y 26 | 27 | === 0.1.3 / 2011/04/08 28 | * Now compatible with Ruby 1.8 29 | 30 | === 0.1.2 / 2011/04/08 31 | * Bug fix in pronouncer.rb. It was undetectable on Ruby 1.9, but on 1.8 32 | 33 | === 0.1.1 / 2011/04/07 34 | * Removed Gimchi::Korean::Char#org 35 | * Code refactoring 36 | * `Gimchi::Korean#romanize` no more capitalizes the output string 37 | * `Gimchi::Korean#romanize` does not affect non-Korean characters 38 | * yard documentation 39 | 40 | === 0.1.0 / 2011/04/05 41 | * Prototype release. 42 | 43 | -------------------------------------------------------------------------------- /crawler/crawler.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # encoding: UTF-8 3 | # Junegunn Choi (junegunn.c@gmail.com) 4 | # 2011/04/02- 5 | 6 | # A dirty little script to fetch test sets from http://www.korean.go.kr 7 | 8 | require 'open-uri' 9 | require 'yaml' 10 | 11 | # Crawl romanization test set 12 | rdata = open('http://www.korean.go.kr/09_new/dic/rule/rule_roman_0101.jsp').read. 13 | scan(%r{th>(.*?)}m }. 14 | select { |e| e.length == 2 } 15 | 16 | File.open(File.dirname(__FILE__) + '/../test/romanization.yml', 'w') do | f | 17 | f.puts "---" 18 | 19 | rdata.each do | arr | 20 | f.puts "\"#{arr.first}\": \"#{arr.last}\"" 21 | end 22 | end 23 | 24 | exit 25 | 26 | # Crawl pronunciation test set 27 | m = {} 28 | %w[ 29 | http://www.korean.go.kr/09_new/dic/rule/rule02_0202.jsp 30 | http://www.korean.go.kr/09_new/dic/rule/rule02_0204.jsp 31 | http://www.korean.go.kr/09_new/dic/rule/rule02_0205.jsp 32 | http://www.korean.go.kr/09_new/dic/rule/rule02_0206.jsp 33 | http://www.korean.go.kr/09_new/dic/rule/rule02_0207.jsp 34 | ].each do | url | 35 | open(url).read.scan(/>([^0-9<>);?]+?)\[(.*?)\] ') 37 | m[match[0]] = match[1] 38 | end 39 | end 40 | 41 | File.open(File.dirname(__FILE__) + '/../test/pronunciation.yml', 'w') do | f | 42 | f.puts "---" 43 | m.each do | k, v | 44 | k = k.sub(/.*→/, '').gsub(/-/, '') 45 | v = v.sub(/.*→/, '').gsub(/[\(:ː\)]/, '').split(%r{[/∼]}) 46 | f.puts "\"#{k}\": [#{v.join(', ')}]" 47 | end 48 | end 49 | 50 | -------------------------------------------------------------------------------- /test/romanization.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "똑같은": "Ttok-kkateun" 3 | "반기문": "Ban-gimun" 4 | "방이문": "Bang-imun" 5 | "구미": "Gumi" 6 | "영동": "Yeongdong" 7 | "백암": "Baegam" 8 | "옥천": "Okcheon" 9 | "합덕": "Hapdeok" 10 | "호법": "Hobeop" 11 | "월곶[월곧]": "Wolgot" 12 | "벚꽃[벋꼳]": "Beotkkot" 13 | "한밭[한받]": "Hanbat" 14 | "구리": "Guri" 15 | "설악": "Seorak" 16 | "칠곡": "Chilgok" 17 | "임실": "Imsil" 18 | "울릉": "Ulleung" 19 | "대관령[대괄령]": "Daegwallyeong" 20 | "백마[뱅마]": "Baengma" 21 | "신문로[신문노]": "Sinmunno" 22 | "종로[종노]": "Jongno" 23 | "왕십리[왕심니]": "Wangsimni" 24 | "별내[별래]": "Byeollae" 25 | "신라[실라]": "Silla" 26 | "학여울[항녀울]": "Hangnyeoul" 27 | "알약[알략]": "allyak" 28 | "해돋이[해도지]": "haedoji" 29 | "같이[가치]": "gachi" 30 | "맞히다[마치다]": "machida" 31 | "좋고[조코]": "joko" 32 | "놓다[노타]": "nota" 33 | "잡혀[자펴]": "japyeo" 34 | "낳지[나치]": "nachi" 35 | "압구정": "Apgujeong" 36 | "낙동강": "Nakdonggang" 37 | "죽변": "Jukbyeon" 38 | "낙성대": "Nakseongdae" 39 | "합정": "Hapjeong" 40 | "팔당": "Paldang" 41 | "샛별": "saetbyeol" 42 | "울산": "Ulsan" 43 | "중앙": "Jung-ang" 44 | "반구대": "Ban-gudae" 45 | "세운": "Se-un" 46 | "해운대": "Hae-undae" 47 | "부산": "Busan" 48 | "세종": "Sejong" 49 | "한복남": "Han Boknam (Han Bok-nam)" 50 | "홍빛나": "Hong Bitna (Hong Bit-na)" 51 | "강남대로": "Gangnam-daero" 52 | "세종로": "Sejong-ro" 53 | "개나리길": "Gaenari-gil" 54 | "충청북도": "Chungcheongbuk-do" 55 | "제주도": "Jeju-do" 56 | "의정부시": "Uijeongbu-si" 57 | "양주군": "Yangju-gun" 58 | "도봉구": "Dobong-gu" 59 | "신창읍": "Sinchang-eup" 60 | "삼죽면": "Samjuk-myeon" 61 | "인왕리": "Inwang-ri" 62 | "당산동": "Dangsan-dong" 63 | "봉천 1동": "Bongcheon 1(il)-dong" 64 | "종로 2가": "Jongno 2(i)-ga" 65 | "퇴계로 3가": "Toegyero 3(sam)-ga" 66 | "청주시": "Cheongju" 67 | "함평군": "Hampyeong" 68 | "순창읍": "Sunchang" 69 | "남산": "Namsan" 70 | "속리산": "Songnisan" 71 | "금강": "Geumgang" 72 | "독도": "Dokdo" 73 | "경복궁": "Gyeongbokgung" 74 | "무량수전": "Muryangsujeon" 75 | "연화교": "Yeonhwagyo" 76 | "극락전": "Geungnakjeon" 77 | "안압지": "Anapji" 78 | "남한산성": "Namhansanseong" 79 | "화랑대": "Hwarangdae" 80 | "불국사": "Bulguksa" 81 | "현충사": "Hyeonchungsa" 82 | "독립문": "Dongnimmun" 83 | "오죽헌": "Ojukheon" 84 | "촉석루": "Chokseongnu" 85 | "종묘": "Jongmyo" 86 | "다보탑": "Dabotap" 87 | "집": "jib" 88 | "짚": "jip" 89 | "밖": "bakk" 90 | "값": "gabs" 91 | "붓꽃": "buskkoch" 92 | "먹는": "meogneun" 93 | "독립": "doglib" 94 | "문리": "munli" 95 | "물엿": "mul-yeos" 96 | "굳이": "gud-i" 97 | "좋다": "johda" 98 | "가곡": "gagog" 99 | "조랑말": "jolangmal" 100 | "없었습니다.": "eobs-eoss-seubnida" 101 | "20%da kkkrl": "isip%da kkkrl" 102 | "안녕하세요": "annyeonghaseyo" 103 | "ㅠㅠ": "Yuyu" 104 | "ㄶ": "Eun" 105 | "ㅋㅋㅋ": "Keukeukeu" 106 | -------------------------------------------------------------------------------- /config/default.yml: -------------------------------------------------------------------------------- 1 | --- 2 | structure: 3 | chosung: [ㄱ, ㄲ, ㄴ, ㄷ, ㄸ, ㄹ, ㅁ, ㅂ, ㅃ, ㅅ, ㅆ, ㅇ, ㅈ, ㅉ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ] 4 | jungsung: [ㅏ, ㅐ, ㅑ, ㅒ, ㅓ, ㅔ, ㅕ, ㅖ, ㅗ, ㅘ, ㅙ, ㅚ, ㅛ, ㅜ, ㅝ, ㅞ, ㅟ, ㅠ, ㅡ, ㅢ, ㅣ] 5 | jongsung: [ㄱ, ㄲ, ㄳ, ㄴ, ㄵ, ㄶ, ㄷ, ㄹ, ㄺ, ㄻ, ㄼ, ㄽ, ㄾ, ㄿ, ㅀ, ㅁ, ㅂ, ㅄ, ㅅ, 6 | ㅆ, ㅇ, ㅈ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ] 7 | 8 | fortis map: 9 | ㄱ: ㄲ 10 | ㄷ: ㄸ 11 | ㅂ: ㅃ 12 | ㅅ: ㅆ 13 | ㅈ: ㅉ 14 | 15 | double consonant map: 16 | ㄳ: [ㄱ, ㅅ] 17 | ㄵ: [ㄴ, ㅈ] 18 | ㄶ: [ㄴ, ㅎ] 19 | ㄺ: [ㄹ, ㄱ] 20 | ㄻ: [ㄹ, ㅁ] 21 | ㄼ: [ㄹ, ㅂ] 22 | ㄽ: [ㄹ, ㅅ] 23 | ㄾ: [ㄹ, ㅌ] 24 | ㄿ: [ㄹ, ㅍ] 25 | ㅀ: [ㄹ, ㅎ] 26 | ㅄ: [ㅂ, ㅅ] 27 | 28 | pronouncer: 29 | jongsung sound: 30 | ㄱ: ㄱ 31 | ㄲ: ㄱ 32 | ㄳ: ㄱ 33 | ㄴ: ㄴ 34 | ㄵ: ㄴ 35 | ㄶ: ㄴ 36 | ㄷ: ㄷ 37 | ㄹ: ㄹ 38 | ㄺ: ㄱ 39 | ㄻ: ㅁ 40 | ㄼ: ㄹ 41 | ㄽ: ㄹ 42 | ㄾ: ㅌ 43 | ㄿ: ㅂ 44 | ㅀ: ㄹ 45 | ㅁ: ㅁ 46 | ㅂ: ㅂ 47 | ㅄ: ㅂ 48 | ㅅ: ㄷ 49 | ㅆ: ㄷ 50 | ㅇ: ㅇ 51 | ㅈ: ㄷ 52 | ㅊ: ㄷ 53 | ㅋ: ㄱ 54 | ㅌ: ㄷ 55 | ㅍ: ㅂ 56 | ㅎ: 57 | transformation: 58 | # changing the order affects the quality of the transformation 59 | sequence for 1: 60 | - rule_5_1 61 | - rule_5_3 62 | 63 | sequence for 2: 64 | - rule_16 65 | - rule_17 66 | - rule_18 67 | - rule_19 68 | - rule_5_1 69 | - rule_5_3 70 | - rule_30 71 | - rule_23 72 | - rule_24 73 | - rule_25 74 | - rule_12 75 | - rule_20 76 | - rule_10 77 | - rule_27 78 | - rule_9 79 | - rule_11 80 | - rule_14 81 | - rule_13 82 | - rule_15 83 | blocking rule: 84 | rule_16: [rule_30] 85 | 86 | number: 87 | positive: 플러스 88 | negative: 마이너스 89 | decimal point: 점 90 | units: ["", 만, 억, 조, 경, 해, 자, 양, 구, 간, 정, 재, 극, 항하사, 아승기, 나유타, 불가사의, 무량대수] 91 | digits: [영, 일, 이, 삼, 사, 오, 육, 칠, 팔, 구] 92 | post substitution: 93 | "^일만": 만 94 | 95 | # 정수형일 때 또다른 표현법 (나이, 시간) 96 | alt notation: 97 | when suffix: 98 | 개: 99 | max: 100 | 명: 101 | max: 102 | 살: 103 | max: 104 | 시: 105 | max: 12 106 | tenfolds: [열, 스물, 서른, 마흔, 쉰, 예순, 일흔, 여든, 아흔, 백] 107 | digits: ["", 한, 두, 세, 네, 다섯, 여섯, 일곱, 여덟, 아홉] 108 | post substitution: 109 | "스물$": 스무 110 | 111 | romanization: 112 | chosung: 113 | ㄱ: g 114 | ㄲ: kk 115 | ㅋ: k 116 | ㄷ: d 117 | ㄸ: tt 118 | ㅌ: t 119 | ㅂ: b 120 | ㅃ: pp 121 | ㅍ: p 122 | ㅈ: j 123 | ㅉ: jj 124 | ㅊ: ch 125 | ㅅ: s 126 | ㅆ: ss 127 | ㅎ: h 128 | ㄴ: n 129 | ㅁ: m 130 | ㄹ: r 131 | ㅇ: "-" 132 | jungsung: 133 | ㅏ: a 134 | ㅓ: eo 135 | ㅗ: o 136 | ㅜ: u 137 | ㅡ: eu 138 | ㅣ: i 139 | ㅐ: ae 140 | ㅔ: e 141 | ㅚ: oe 142 | ㅟ: wi 143 | ㅑ: ya 144 | ㅕ: yeo 145 | ㅛ: yo 146 | ㅠ: yu 147 | ㅒ: yae 148 | ㅖ: ye 149 | ㅘ: wa 150 | ㅙ: wae 151 | ㅝ: wo 152 | ㅞ: we 153 | ㅢ: ui 154 | jongsung: 155 | ㄱ: k 156 | ㄴ: n- 157 | ㄷ: t 158 | ㄹ: l 159 | ㅁ: m 160 | ㅂ: p 161 | ㅇ: ng 162 | post substitution: 163 | # 제2항 [붙임 2]‘ㄹ’은 모음 앞에서는 ‘r’로, 자음 앞이나 어말에서는 164 | # ‘l’로 적는다. 단, ‘ㄹㄹ’은 ‘ll’로 적는다. 165 | lr: ll 166 | "-w": w 167 | "-y": y 168 | kkk: k-kk 169 | ttt: t-tt 170 | ppp: p-pp 171 | "--": "-" 172 | "n-([^gaeiou])": "n\\1" 173 | "-(\\s)": "\\1" 174 | "-$": "" 175 | -------------------------------------------------------------------------------- /lib/gimchi/char.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | class Gimchi 4 | # Class representing each Korean character. Its three components, 5 | # `chosung', `jungsung' and `jongsung' can be get and set. 6 | # 7 | # `to_s' merges components into a String. `to_a' returns the three components. 8 | class Char 9 | # @return [String] Chosung component of this character. 10 | attr_reader :chosung 11 | # @return [String] Jungsung component of this character. 12 | attr_reader :jungsung 13 | # @return [String] Jongsung component of this character. 14 | attr_reader :jongsung 15 | 16 | # @param [String] kchar Korean character string 17 | def initialize kchar 18 | raise ArgumentError.new('Not a korean character') unless Gimchi.korean_char? kchar 19 | 20 | if Gimchi.complete_korean_char? kchar 21 | c = kchar.unpack('U').first 22 | n = c - 0xAC00 23 | # '가' ~ '깋' -> 'ㄱ' 24 | n1 = n / (21 * 28) 25 | # '가' ~ '깋'에서의 순서 26 | n = n % (21 * 28) 27 | n2 = n / 28; 28 | n3 = n % 28; 29 | self.chosung = Gimchi.chosungs[n1] 30 | self.jungsung = Gimchi.jungsungs[n2] 31 | self.jongsung = ([nil] + Gimchi.jongsungs)[n3] 32 | elsif Gimchi.chosung? kchar 33 | self.chosung = kchar 34 | elsif Gimchi.jungsung? kchar 35 | self.jungsung = kchar 36 | elsif Gimchi.jongsung? kchar 37 | self.jongsung = kchar 38 | end 39 | end 40 | 41 | # Recombines components into a korean character. 42 | # @return [String] Combined korean character 43 | def to_s 44 | Gimchi.compose chosung, jungsung, jongsung 45 | end 46 | 47 | # Sets the chosung component. 48 | # @param [String] 49 | def chosung= c 50 | raise ArgumentError.new('Invalid chosung component') if 51 | c && Gimchi.chosung?(c) == false 52 | @chosung = c && c.dup.extend(Component).tap { |e| e.kor = Gimchi } 53 | end 54 | 55 | # Sets the jungsung component 56 | # @param [String] 57 | def jungsung= c 58 | raise ArgumentError.new('Invalid jungsung component') if 59 | c && Gimchi.jungsung?(c) == false 60 | @jungsung = c && c.dup.extend(Component).tap { |e| e.kor = Gimchi } 61 | end 62 | 63 | # Sets the jongsung component 64 | # 65 | # @param [String] 66 | def jongsung= c 67 | raise ArgumentError.new('Invalid jongsung component') if 68 | c && Gimchi.jongsung?(c) == false 69 | @jongsung = c && c.dup.extend(Component).tap { |e| e.kor = Gimchi } 70 | end 71 | 72 | # Returns Array of three components. 73 | # 74 | # @return [Array] Array of three components 75 | def to_a 76 | [chosung, jungsung, jongsung] 77 | end 78 | 79 | # Checks if this is a complete Korean character. 80 | def complete? 81 | chosung.nil? == false && jungsung.nil? == false 82 | end 83 | 84 | # Checks if this is a non-complete Korean character. 85 | # e.g. ㅇ, ㅏ 86 | def partial? 87 | chosung.nil? || jungsung.nil? 88 | end 89 | 90 | def inspect 91 | "#{to_s}(#{to_a.join('/')})" 92 | end 93 | 94 | private 95 | # Three components of Gimchi::Char are extended to support #vowel? and #consonant? method. 96 | module Component 97 | # @return [Korean] Hosting Korean instance 98 | attr_accessor :kor 99 | 100 | # Is this component a vowel? 101 | def vowel? 102 | kor.jungsung? self 103 | end 104 | 105 | # Is this component a consonant? 106 | def consonant? 107 | self != 'ㅇ' && kor.chosung?(self) 108 | end 109 | end#Component 110 | end#Char 111 | end#Gimchi 112 | 113 | -------------------------------------------------------------------------------- /README.ko.md: -------------------------------------------------------------------------------- 1 | # Gimchi [![Build Status](https://travis-ci.org/junegunn/gimchi.png?branch=master)](https://travis-ci.org/junegunn/gimchi) 2 | 3 | ## 개요 4 | 5 | Gimchi는 한글 스트링을 다롭니다. 6 | 국립 국어원 어문 규정에 정의된 한글의 표준 발음법과 7 | 로마자 표기법을 (일부) 구현한 것이 주요 기능입니다. 8 | 9 | 또한 다음의 기능들을 제공합니다. 10 | - 주어진 캐릭터가 한글인지 판단 11 | - 한글을 초성, 중성, 종성으로 분리하고, 이를 다시 합치는 기능 12 | - 숫자 표기를 한글 표현으로 변환 13 | 14 | ## 설치 15 | ``` 16 | gem install gimchi 17 | ``` 18 | 19 | ## 사용법 20 | 21 | ### 초/중/종성 분해/합체 22 | 23 | ```ruby 24 | chosung, jungsung, jongsung = Gimchi.decompose "한" 25 | 26 | Gimchi.compose chosung, jungsung, jongsung # 한 27 | Gimchi.compose chosung, "ㅗ", jongsung # 혼 28 | ``` 29 | 30 | ### 한글 캐릭터 여부 판단 31 | ```ruby 32 | Gimchi.korean_char? 'ㄱ' # true 33 | Gimchi.complete_korean_char? 'ㄱ' # false 34 | 35 | Gimchi.korean_char? 'ㅏ' # true 36 | Gimchi.complete_korean_char? 'ㅏ' # false 37 | 38 | Gimchi.korean_char? '가' # true 39 | Gimchi.complete_korean_char? '가' # true 40 | 41 | # Alias of korean_char? 42 | Gimchi.kchar? '가' # true 43 | 44 | Gimchi.chosung? 'ㄱ' # true 45 | Gimchi.jungsung? 'ㄱ' # false 46 | Gimchi.jongsung? 'ㄱ' # true 47 | 48 | Gimchi.chosung? 'ㅏ' # false 49 | Gimchi.jungsung? 'ㅏ' # true 50 | Gimchi.jongsung? 'ㅏ' # false 51 | 52 | Gimchi.chosung? 'ㄺ' # false 53 | Gimchi.jungsung? 'ㄺ' # false 54 | Gimchi.jongsung? 'ㄺ' # true 55 | ``` 56 | 57 | ### Gimchi::Char 의 사용 58 | 59 | ```ruby 60 | kc = Gimchi::Char("한") 61 | kc.class # Gimchi::Char 62 | 63 | kc.chosung # "ㅎ" 64 | kc.jungsung # "ㅏ" 65 | kc.jongsung # "ㄴ" 66 | kc.to_a # ["ㅎ", "ㅏ", "ㄴ"] 67 | kc.to_s # "한" 68 | 69 | kc.complete? # true 70 | kc.partial? # false 71 | 72 | Gimchi::Char("ㅏ").partial? # true 73 | 74 | # Modifying its elements 75 | kc.chosung = 'ㄷ' 76 | kc.jongsung = 'ㄹ' 77 | kc.to_s # "달" 78 | kc.complete? # true 79 | kc.partial? # false 80 | 81 | kc.chosung = nil 82 | kc.jongsung = nil 83 | kc.complete? # false 84 | kc.partial? # true 85 | ``` 86 | 87 | ### 숫자 읽기 88 | ```ruby 89 | Gimchi.read_number(1999) # "천 구백 구십 구" 90 | Gimchi.read_number(- 100.123) # "마이너스 백점일이삼" 91 | Gimchi.read_number("153,191,100,678.3214") 92 | # "천 오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사" 93 | 94 | # 나이, 시간 ( -살, -시 ) 95 | Gimchi.read_number("20살") # "스무살" 96 | Gimchi.read_number("13 살") # "열세 살" 97 | Gimchi.read_number("7시 30분") # "일곱시 삼십분" 98 | ``` 99 | 100 | ### 표준 발음 (부분 구현) 101 | ```ruby 102 | str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고" 103 | Gimchi.pronounce str 104 | # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 모라너코" 105 | 106 | Gimchi.pronounce str, :slur => true 107 | # "돼써 돼써 이제 그런 가르치믄 돼써 매이 라치 밀곱 씨 삼십 뿐까지 우릴 조그만 교실로 모라너코" 108 | 109 | Gimchi.pronounce str, :each_char => true 110 | # "됃어 됃어 이제 그런 가르침은 됃어 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 몰아너고" 111 | 112 | Gimchi.pronounce str, :number => false 113 | # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 7 시 30 분까지 우릴 조그만 교실로 모라너코" 114 | ``` 115 | 116 | ### 로마자 표기 (부분 구현) 117 | ```ruby 118 | str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고" 119 | 120 | Gimchi.romanize str 121 | # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo moraneoko" 122 | 123 | Gimchi.romanize str, :slur => true 124 | # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-i rachi milgop ssi samsip ppunkkaji uril jogeuman gyosillo moraneoko" 125 | 126 | Gimchi.romanize str, :number => false 127 | # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim 7 si 30 bunkkaji uril jogeuman gyosillo moraneoko" 128 | 129 | Gimchi.romanize str, :as_pronounced => false 130 | # "Dwaet-eo dwaet-eo ije geureon gareuchim-eun dwaet-eo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo mol-aneogo" 131 | ``` 132 | 133 | ## 구현의 한계 134 | 135 | 표준 발음법과 로마어 표기법을 모두 구현하기 위해서는 형태소 분석과 충분한 136 | 사전, 그리고 문맥의 의미 분석이 필요합니다. 이 모든 것이 준비된다고 할 지라도 137 | 완벽한 결과를 얻는 것은 불가능합니다. 138 | 이는 현재 gimchi가 목표로 하는 것이 아니며 gimchi는 간단한 구현으로 어느 수준 139 | 이상의 결과를 얻는 것을 목표로 합니다. 현재 구현의 한계 내에서 정확도를 올리기 140 | 위해 Ad-hoc한 patch 등이 코드에 상당량 포함된 상태인데 이를 정제하고 체계화하는 141 | 노력이 필요합니다. 142 | 143 | ## Contributing 144 | 145 | 1. Fork it 146 | 2. Create your feature branch (`git checkout -b my-new-feature`) 147 | 3. Commit your changes (`git commit -am 'Added some feature'`) 148 | 4. Push to the branch (`git push origin my-new-feature`) 149 | 5. Create new Pull Request 150 | 151 | ## Copyright 152 | 153 | Copyright (c) 2013 Junegunn Choi. See LICENSE.txt for 154 | further details. 155 | 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gimchi [![Build Status](https://travis-ci.org/junegunn/gimchi.png?branch=master)](https://travis-ci.org/junegunn/gimchi) 2 | 3 | Gimchi is a simple Ruby gem for handling Korean characters. 4 | 5 | Features: 6 | - Decompose a Korean character into its 3 components, namely chosung, jungsung and optional jongsung 7 | - Compose elements back into the Korean character 8 | - Read numbers in Korean 9 | - Pronounce Korean characters 10 | - Romanize Korean characters 11 | 12 | Gimchi (partially) implements the following rules dictated by 13 | The National Institute of The Korean Language (http://www.korean.go.kr) 14 | - Korean Standard Pronunciation 15 | - Korean Romanization 16 | 17 | ## Installation 18 | 19 | ``` 20 | gem install gimchi 21 | ``` 22 | 23 | ## Usage 24 | 25 | ### Composing and decomposing Korean character 26 | 27 | ```ruby 28 | chosung, jungsung, jongsung = Gimchi.decompose "한" 29 | 30 | Gimchi.compose chosung, jungsung, jongsung # 한 31 | Gimchi.compose chosung, "ㅗ", jongsung # 혼 32 | ``` 33 | 34 | ### Inspecting Korean characters 35 | ```ruby 36 | Gimchi.korean_char? 'ㄱ' # true 37 | Gimchi.complete_korean_char? 'ㄱ' # false 38 | 39 | Gimchi.korean_char? 'ㅏ' # true 40 | Gimchi.complete_korean_char? 'ㅏ' # false 41 | 42 | Gimchi.korean_char? '가' # true 43 | Gimchi.complete_korean_char? '가' # true 44 | 45 | # Alias of korean_char? 46 | Gimchi.kchar? '가' # true 47 | 48 | Gimchi.chosung? 'ㄱ' # true 49 | Gimchi.jungsung? 'ㄱ' # false 50 | Gimchi.jongsung? 'ㄱ' # true 51 | 52 | Gimchi.chosung? 'ㅏ' # false 53 | Gimchi.jungsung? 'ㅏ' # true 54 | Gimchi.jongsung? 'ㅏ' # false 55 | 56 | Gimchi.chosung? 'ㄺ' # false 57 | Gimchi.jungsung? 'ㄺ' # false 58 | Gimchi.jongsung? 'ㄺ' # true 59 | ``` 60 | 61 | ### Using Gimchi::Char 62 | 63 | ```ruby 64 | kc = Gimchi::Char("한") 65 | kc.class # Gimchi::Char 66 | 67 | kc.chosung # "ㅎ" 68 | kc.jungsung # "ㅏ" 69 | kc.jongsung # "ㄴ" 70 | kc.to_a # ["ㅎ", "ㅏ", "ㄴ"] 71 | kc.to_s # "한" 72 | 73 | kc.complete? # true 74 | kc.partial? # false 75 | 76 | Gimchi::Char("ㅏ").partial? # true 77 | 78 | # Modifying its elements 79 | kc.chosung = 'ㄷ' 80 | kc.jongsung = 'ㄹ' 81 | kc.to_s # "달" 82 | kc.complete? # true 83 | kc.partial? # false 84 | 85 | kc.chosung = nil 86 | kc.jongsung = nil 87 | kc.complete? # false 88 | kc.partial? # true 89 | ``` 90 | 91 | ### Reading numbers in Korean 92 | ```ruby 93 | Gimchi.read_number(1999) # "천 구백 구십 구" 94 | Gimchi.read_number(- 100.123) # "마이너스 백점일이삼" 95 | Gimchi.read_number("153,191,100,678.3214") 96 | # "천 오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사" 97 | 98 | # Age, Time ( -살, -시 ) 99 | Gimchi.read_number("20살") # "스무살" 100 | Gimchi.read_number("13 살") # "열세 살" 101 | Gimchi.read_number("7시 30분") # "일곱시 삼십분" 102 | ``` 103 | 104 | ### Standard pronunciation (partially implemented) 105 | ```ruby 106 | str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고" 107 | Gimchi.pronounce str 108 | # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 모라너코" 109 | 110 | Gimchi.pronounce str, :slur => true 111 | # "돼써 돼써 이제 그런 가르치믄 돼써 매이 라치 밀곱 씨 삼십 뿐까지 우릴 조그만 교실로 모라너코" 112 | 113 | Gimchi.pronounce str, :number => false 114 | # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 7 시 30 분까지 우릴 조그만 교실로 모라너코" 115 | 116 | Gimchi.pronounce str, :each_char => true 117 | # "됃어 됃어 이제 그런 가르침은 됃어 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 몰아너고" 118 | ``` 119 | 120 | ### Romanization (partially implemented) 121 | ```ruby 122 | str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고" 123 | 124 | Gimchi.romanize str 125 | # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo moraneoko" 126 | 127 | Gimchi.romanize str, :slur => true 128 | # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-i rachi milgop ssi samsip ppunkkaji uril jogeuman gyosillo moraneoko" 129 | 130 | Gimchi.romanize str, :number => false 131 | # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim 7 si 30 bunkkaji uril jogeuman gyosillo moraneoko" 132 | 133 | Gimchi.romanize str, :as_pronounced => false 134 | # "Dwaet-eo dwaet-eo ije geureon gareuchim-eun dwaet-eo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo mol-aneogo" 135 | ``` 136 | 137 | ## Limitation of the implementation 138 | 139 | Unfortunately in order to implement the complete specification of Korean 140 | pronunciation and romanization, we need NLP, huge Korean dictionaries and even 141 | semantic analysis of the given string. And even with all those complex 142 | processing, we cannot guarantee 100% accuracy of the output. So yes, that is 143 | definitely not what this gem tries to achieve. Gimchi tries to achieve "some" 144 | level of accuracy with relatively simple code. 145 | 146 | Currently, Gimchi code contains a lot of ad-hoc (possibly invalid) patches 147 | that try to improve the quality of the output, which should better be 148 | refactored anytime soon. 149 | 150 | ## Contributing 151 | 152 | 1. Fork it 153 | 2. Create your feature branch (`git checkout -b my-new-feature`) 154 | 3. Commit your changes (`git commit -am 'Added some feature'`) 155 | 4. Push to the branch (`git push origin my-new-feature`) 156 | 5. Create new Pull Request 157 | 158 | ## Copyright 159 | 160 | Copyright (c) 2013 Junegunn Choi. See LICENSE.txt for 161 | further details. 162 | 163 | -------------------------------------------------------------------------------- /test/test_gimchi.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | $LOAD_PATH.unshift File.dirname(__FILE__) 4 | require 'helper' 5 | require 'yaml' 6 | require 'ansi' 7 | 8 | 9 | class TestGimchi < Test::Unit::TestCase 10 | def test_korean_char 11 | assert_equal true, Gimchi.korean_char?('ㄱ') # true 12 | assert_equal true, Gimchi.kchar?('ㄱ') # true 13 | assert_equal true, Gimchi.korean_char?('ㅏ') # true 14 | assert_equal true, Gimchi.korean_char?('가') # true 15 | assert_equal true, Gimchi.korean_char?('값') # true 16 | assert_equal true, Gimchi.kchar?('값') # true 17 | 18 | assert_equal false, Gimchi.korean_char?('a') # false 19 | assert_equal false, Gimchi.korean_char?('1') # false 20 | assert_raise(ArgumentError) { Gimchi.korean_char?('두자') } 21 | assert_raise(ArgumentError) { Gimchi.kchar?('두자') } 22 | end 23 | 24 | def test_kchar 25 | kc = Gimchi::Char('한') 26 | assert_equal Gimchi::Char, kc.class 27 | assert_equal "ㅎ", kc.chosung 28 | assert_equal "ㅏ", kc.jungsung 29 | assert_equal "ㄴ", kc.jongsung 30 | assert_equal ["ㅎ", "ㅏ", "ㄴ"], kc.to_a 31 | assert_equal "한", kc.to_s 32 | assert_equal true, kc.complete? 33 | assert_equal false, kc.partial? 34 | 35 | assert_raise(ArgumentError) { Gimchi::Char('한글') } 36 | assert_raise(ArgumentError) { Gimchi::Char('A') } 37 | 38 | assert_equal true, Gimchi::Char("ㅏ").partial? 39 | end 40 | 41 | def test_complete_korean_char 42 | 43 | assert_equal false, Gimchi.complete_korean_char?('ㄱ') # false 44 | assert_equal false, Gimchi.complete_korean_char?('ㅏ') # false 45 | assert_equal true, Gimchi.complete_korean_char?('가') # true 46 | assert_equal true, Gimchi.complete_korean_char?('값') # true 47 | 48 | assert_equal false, Gimchi.korean_char?('a') # false 49 | assert_equal false, Gimchi.korean_char?('1') # false 50 | assert_raise(ArgumentError) { Gimchi.korean_char?('두자') } 51 | end 52 | 53 | def test_dissect 54 | arr = '이것은 Hangul 입니다.'.each_char.map { |ch| 55 | (Gimchi::Char(ch) rescue [ch]).to_a 56 | }.flatten.compact 57 | 58 | assert_equal ["ㅇ", "ㅣ", "ㄱ", "ㅓ", "ㅅ", "ㅇ", "ㅡ", "ㄴ", " ", 59 | "H", "a", "n", "g", "u", "l", " ", "ㅇ", "ㅣ", "ㅂ", 60 | "ㄴ", "ㅣ", "ㄷ", "ㅏ", "."], arr 61 | end 62 | 63 | def test_convert 64 | arr = '이것은 한글입니다.'.each_char.map { |ch| 65 | Gimchi::Char(ch) rescue ch 66 | } 67 | # [이, 것, 은, " ", 한, 글, 입, 니, 다, "."] 68 | 69 | assert_equal 10, arr.length 70 | assert_equal Gimchi::Char, arr[0].class 71 | assert_equal Gimchi::Char, arr[1].class 72 | assert_equal Gimchi::Char, arr[2].class 73 | 74 | ch = arr[2] 75 | assert_equal 'ㅇ', ch.chosung 76 | assert_equal 'ㅡ', ch.jungsung 77 | assert_equal 'ㄴ', ch.jongsung 78 | assert_equal "은(ㅇ/ㅡ/ㄴ)", ch.inspect 79 | 80 | ch.chosung = 'ㄱ' 81 | ch.jongsung = 'ㅁ' 82 | assert_equal '금', ch.to_s 83 | assert_equal 3, ch.to_a.length 84 | 85 | ch.jongsung = nil 86 | assert_equal '그', ch.to_s 87 | assert_equal 2, ch.to_a.compact.length 88 | assert_equal true, ch.complete? 89 | assert_equal false, ch.partial? 90 | 91 | ch.chosung = nil 92 | assert_equal 1, ch.to_a.compact.length 93 | assert_equal false, ch.complete? 94 | assert_equal true, ch.partial? 95 | assert_equal 'ㅡ', ch.to_s 96 | 97 | ch.jungsung = nil 98 | assert_equal 0, ch.to_a.compact.length 99 | assert_equal false, ch.complete? 100 | assert_equal true, ch.partial? 101 | assert_equal '', ch.to_s 102 | 103 | assert_raise(ArgumentError) { ch.chosung = 'ㅡ' } 104 | assert_raise(ArgumentError) { ch.chosung = 'ㄳ' } 105 | assert_raise(ArgumentError) { ch.jungsung = 'ㄱ' } 106 | assert_raise(ArgumentError) { ch.jongsung = 'ㅠ' } 107 | end 108 | 109 | def test_read_number 110 | assert_equal "영", Gimchi.read_number(0) 111 | assert_equal "일", Gimchi.read_number(1) 112 | assert_equal "구", Gimchi.read_number(9) 113 | assert_equal "천 구백 구십 구", Gimchi.read_number(1999) 114 | assert_equal "마이너스 백점일이삼", Gimchi.read_number(- 100.123) 115 | assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사오육칠", 116 | Gimchi.read_number("53,191,100,678.3214567") 117 | assert_equal "영점영영영영영일이삼사오", Gimchi.read_number("1.2345e-06") 118 | assert_equal "일해 이천 삼백 사십 오경", Gimchi.read_number("1.2345e+20") 119 | assert_equal "플러스 일해 이천 삼백 사십 오경", Gimchi.read_number("+ 1.2345e+20") 120 | assert_equal "마이너스 일해 이천 삼백 사십 오경", Gimchi.read_number("- 1.2345e+20") 121 | assert_equal "만 십 이점삼", Gimchi.read_number("100.123e+2") 122 | assert_equal "십만 십 이점삼", Gimchi.read_number("1000.123e+2") 123 | assert_equal "백 일만 십 이점삼", Gimchi.read_number("10100.123e+2") 124 | assert_equal "천 십 이점삼", Gimchi.read_number("10.123e+2") 125 | assert_equal "십점영", Gimchi.read_number("10.0") 126 | assert_equal "플러스 십점영", Gimchi.read_number("+ 10.0") 127 | 128 | # 나이, 시간, 개수, 명 ( -살, -시, -개, -명 ) 129 | assert_equal "나는 이십", Gimchi.read_number("나는 20") 130 | assert_equal "나는 스무살", Gimchi.read_number("나는 20살") 131 | assert_equal "나는 스물네살", Gimchi.read_number("나는 24살") 132 | assert_equal "스무개", Gimchi.read_number("20개") 133 | assert_equal "스무 명", Gimchi.read_number("20 명") 134 | assert_equal "이십 칠점일살", Gimchi.read_number("27.1살") 135 | assert_equal "너는 열세 살", Gimchi.read_number("너는 13 살") 136 | assert_equal "백 서른두명", Gimchi.read_number("132명") 137 | assert_equal "이천 오백 아흔아홉개", Gimchi.read_number("2,599개") 138 | assert_equal "지금은 일곱시 삼십분", Gimchi.read_number("지금은 7시 30분") 139 | 140 | # No way! 141 | assert_raise(RangeError) { Gimchi.read_number 10 ** 100 } 142 | end 143 | 144 | def test_pronounce 145 | cnt = 0 146 | s = 0 147 | test_set = YAML.load File.read(File.dirname(__FILE__) + '/pronunciation.yml') 148 | test_set.each do | k, v | 149 | cnt += 1 150 | k = k.gsub(/[-]/, '') 151 | 152 | t1, tfs1 = Gimchi.pronounce(k, :each_char => false, :slur => true, :debug => true) 153 | t2, tfs2 = Gimchi.pronounce(k, :each_char => false, :slur => false, :debug => true) 154 | 155 | path = "" 156 | if (with_slur = v.include?(t1.gsub(/\s/, ''))) || v.include?(t2.gsub(/\s/, '')) 157 | r = ANSI::Code::BLUE + ANSI::Code::BOLD + v.join(' / ') + ANSI::Code::RESET if v.length > 1 158 | path = (with_slur ? tfs1 : tfs2).map { |e| e.sub 'rule_', '' }.join(' > ') 159 | t = with_slur ? t1 : t2 160 | s += 1 161 | else 162 | r = ANSI::Code::RED + ANSI::Code::BOLD + v.join(' / ') + ANSI::Code::RESET 163 | t = [t1, t2].join ' | ' 164 | end 165 | puts "#{k} => #{t} (#{Gimchi.romanize t, :as_pronounced => false}) [#{path}] #{r}" 166 | end 167 | puts "#{s} / #{cnt}" 168 | # FIXME 169 | assert s >= 411 170 | end 171 | 172 | def test_romanize_preserve_non_korean 173 | assert_equal 'ttok-kkateun kkk', Gimchi.romanize('똑같은 kkk') 174 | end 175 | 176 | def test_romanize 177 | cnt = 0 178 | s = 0 179 | test_set = YAML.load File.read(File.dirname(__FILE__) + '/romanization.yml') 180 | test_set.each do | k, v | 181 | cnt += 1 182 | rom = Gimchi.romanize k.sub(/\[.*/, '') 183 | if rom.downcase.gsub(/[\s-]/, '') == v.downcase.gsub(/\(.*\)/, '').gsub(/[\s-]/, '') 184 | r = ANSI::Code::BLUE + ANSI::Code::BOLD + rom + ANSI::Code::RESET 185 | s += 1 186 | else 187 | r = ANSI::Code::RED + ANSI::Code::BOLD + rom + ANSI::Code::RESET 188 | end 189 | puts "#{k} => #{r} [#{v}]" 190 | end 191 | puts "#{s} / #{cnt}" 192 | # FIXME 193 | assert s >= 63 194 | end 195 | 196 | def test_cho_jung_jongsung? 197 | c, j, jo = Gimchi::Char("달").to_a 198 | assert Gimchi.chosung?(c) 199 | assert Gimchi.jungsung?(j) 200 | assert Gimchi.jongsung?(jo) 201 | 202 | assert Gimchi.chosung?( 'ㄱ') 203 | assert !Gimchi.jungsung?('ㄱ') 204 | assert Gimchi.jongsung?('ㄱ') 205 | assert !Gimchi.chosung?( 'ㅏ') 206 | assert Gimchi.jungsung?('ㅏ') 207 | assert !Gimchi.jongsung?('ㅏ') 208 | assert !Gimchi.chosung?( 'ㄺ') 209 | assert !Gimchi.jungsung?('ㄺ') 210 | assert Gimchi.jongsung?('ㄺ') 211 | end 212 | 213 | def test_compose_decompose 214 | ret = Gimchi.decompose("한") 215 | assert ret.is_a?(Array) 216 | assert_equal 'ㅎ', ret[0] 217 | assert_equal 'ㅏ', ret[1] 218 | assert_equal 'ㄴ', ret[2] 219 | 220 | assert_equal '한', Gimchi.compose(*ret) 221 | 222 | ret = Gimchi.decompose("ㅋ") 223 | assert_equal 'ㅋ', ret[0] 224 | assert_equal nil, ret[1] 225 | assert_equal nil, ret[2] 226 | 227 | assert_equal 'ㅋ', Gimchi.compose(*ret) 228 | end 229 | 230 | def test_singleton 231 | assert_raise(NoMethodError) { Gimchi.new } 232 | end 233 | end 234 | -------------------------------------------------------------------------------- /test/pronunciation.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "가져": [가저] 3 | "쪄": [쩌] 4 | "다쳐": [다처] 5 | "계집": [계집, 게집] 6 | "계시다": [계시다, 게시다] 7 | "주의": [주의, 주이] 8 | "협의": [혀븨, 혀비] 9 | "우리의": [우리의, 우리에] 10 | "강의의": [강의의, 강이에] 11 | "져": [저] 12 | "쪄": [쩌] 13 | "쳐": [처] 14 | "다져": [다저] 15 | "살쪄": [살쩌] 16 | "바쳐": [바처] 17 | "계산": [계산, 게산] 18 | "통계": [통계, 통게] 19 | "폐단": [폐단, 페단] 20 | "밀폐": [밀폐, 밀페] 21 | "혜성": [혜성, 헤성] 22 | "은혜": [은혜, 은헤] 23 | " 흰무리": [힌무리] 24 | "희미하다": [히미하다] 25 | "유희": [유히] 26 | "오늬": [오니] 27 | "하늬바람": [하니바람] 28 | "보늬": [보니] 29 | "닦다": [닥따] 30 | "키읔": [키윽] 31 | "키읔과": [키윽꽈] 32 | "옷": [옫] 33 | "웃다": [욷따] 34 | "있다": [읻따] 35 | "젖": [젇] 36 | "빚다": [빋따] 37 | "꽃": [꼳] 38 | "쫓다": [쫃따] 39 | "솥": [솓] 40 | "앞": [압] 41 | "덮다": [덥따] 42 | "박": [박] 43 | "밖": [박] 44 | "부엌": [부억] 45 | "꺾다": [꺽따] 46 | "낫": [낟] 47 | "낮": [낟] 48 | "낯": [낟] 49 | "낫다": [낟따] 50 | "낮다": [낟따] 51 | "있었다": [이썯따] 52 | "낱": [낟] 53 | "밭": [받] 54 | "받다": [받따] 55 | "맡다": [맏따] 56 | "뱉다": [밷따] 57 | "집": [집] 58 | "짚": [집] 59 | "집다": [집따] 60 | "곱다": [곱따] 61 | "짚다": [집따] 62 | "넋": [넉] 63 | "넋과": [넉꽈] 64 | "앉다": [안따] 65 | "여덟": [여덜] 66 | "넓다": [널따] 67 | "외곬": [외골] 68 | "핥다": [할따] 69 | "값": [갑] 70 | "없다": [업따] 71 | "밟소": [밥쏘] 72 | "밟지": [밥찌] 73 | "밟게": [밥께] 74 | "밟고": [밥꼬] 75 | "넓둥글다": [넙뚱글다] 76 | "몫": [목] 77 | "몫도": [목또] 78 | "몫까지": [목까지] 79 | "얹다": [언따] 80 | "얹지": [언찌] 81 | "얹고": [언꼬] 82 | "얇다": [얄따] 83 | "얇지": [얄찌] 84 | "얇고": [얄꼬] 85 | "훑다": [훌따] 86 | "훑지": [훌찌] 87 | "훑고": [훌꼬] 88 | "닭": [닥] 89 | "흙과": [흑꽈] 90 | "맑다": [막따] 91 | "늙지": [늑찌] 92 | "삶": [삼] 93 | "젊다": [점따] 94 | "읊고": [읍꼬] 95 | "읊다": [읍따] 96 | "맑게": [말께] 97 | "묽고": [물꼬] 98 | "얽거나": [얼꺼나] 99 | "칡": [칙] 100 | "칡도": [칙또] 101 | "칡까지": [칙까지] 102 | "앎": [암] 103 | "앎도": [암도] 104 | "앎과": [암과] 105 | "닮다": [담따] 106 | "닮지": [담찌] 107 | "닮고": [담꼬] 108 | "읊지": [읍찌] 109 | "맑지": [막찌] 110 | "맑습니다": [막씀니다] 111 | "늙다": [늑따] 112 | "늙습니다": [늑씀니다] 113 | "맑고": [말꼬] 114 | "맑거나": [말꺼나] 115 | " 늙게": [늘께] 116 | "늙고": [늘꼬] 117 | "늙거나": [늘꺼나] 118 | "놓고": [노코] 119 | "좋던": [조턴] 120 | "쌓지": [싸치] 121 | "많고": [만코] 122 | "않던": [안턴] 123 | "닳지": [달치] 124 | "각하": [가카] 125 | "먹히다": [머키다] 126 | "밝히다": [발키다] 127 | "맏형": [마텽] 128 | "좁히다": [조피다] 129 | "넓히다": [널피다] 130 | "꽂히다": [꼬치다] 131 | "앉히다": [안치다] 132 | "옷 한 벌": [오탄벌] 133 | "낮 한때": [나탄때] 134 | "꽃 한 송이": [꼬탄송이] 135 | "숱하다": [수타다] 136 | "닿소": [다쏘] 137 | "많소": [만쏘] 138 | "싫소": [실쏘] 139 | "놓는": [논는] 140 | "쌓네": [싼네] 141 | "않네": [안네] 142 | "않는": [안는] 143 | "뚫네": [뚤레] 144 | "뚫는": [뚤른] 145 | "낳은": [나은] 146 | "놓아": [노아] 147 | "쌓이다": [싸이다] 148 | "많아": [마나] 149 | "않은": [아는] 150 | "닳아": [다라] 151 | "싫어도": [시러도] 152 | "놓던": [노턴] 153 | "놓지": [노치] 154 | "많던": [만턴] 155 | "많지": [만치] 156 | "앓고": [알코] 157 | "앓던": [알턴] 158 | "앓지": [알치] 159 | "국화": [구콰] 160 | "정직하다": [정지카다] 161 | "박하다": [바카다] 162 | "박히다": [바키다] 163 | "읽히다": [일키다] 164 | "굿하다": [구타다] 165 | "잊히다": [이치다] 166 | "얹히다": [언치다] 167 | "입학": [이팍] 168 | "급하다": [그파다] 169 | "입히다": [이피다] 170 | "밟히다": [발피다] 171 | "온갖 힘": [온가팀] 172 | "뭇 형벌": [무텽벌] 173 | "몇 할": [며탈] 174 | "밥 한 사발": [바판사발] 175 | "국 한 대접": [구칸대접] 176 | "끊습니다.": [끈씀니다] 177 | "끊사오니": [끈싸오니] 178 | "놓네": [논네] 179 | "놓나": [논나] 180 | "끊는": [끈는] 181 | "끊네": [끈네] 182 | "끊나": [끈나] 183 | "끓는": [끌른] 184 | "끓네": [끌레] 185 | "끓나": [끌라] 186 | "넣은": [너은] 187 | "쌓을": [싸을] 188 | "찧으니까": [찌으니까] 189 | "끊은": [끄는] 190 | "많을": [마늘] 191 | "않으니까": [아느니까] 192 | "옳은": [오른] 193 | "싫을": [시를] 194 | "곯으니까": [고르니까] 195 | "쌓인": [싸인] 196 | "끊일": [끄닐] 197 | "끓이니까": [끄리니까] 198 | "깎아": [까까] 199 | "옷이": [오시] 200 | "있어": [이써] 201 | "낮이": [나지] 202 | "꽂아": [꼬자] 203 | "꽃을": [꼬츨] 204 | "쫓아": [쪼차] 205 | "밭에": [바테] 206 | "앞으로": [아프로] 207 | "덮이다": [더피다] 208 | "부엌이": [부어키] 209 | "낯을": [나츨] 210 | "밭의": [바틔] 211 | "무릎에": [무르페] 212 | "꺾어": [꺼꺼] 213 | "쫓을": [쪼츨] 214 | "같은": [가튼] 215 | "짚으면": [지프면] 216 | "섞여": [서껴] 217 | "높여": [노펴] 218 | "넋이": [넉씨] 219 | "앉아": [안자] 220 | "닭을": [달글] 221 | "젊어": [절머] 222 | "곬이": [골씨] 223 | "핥아": [할타] 224 | "읊어": [을퍼] 225 | "값을": [갑쓸] 226 | "없어": [업써] 227 | "닭이": [달기] 228 | "여덟을": [여덜블] 229 | "삶에": [살메] 230 | "읽어": [일거] 231 | "밟을": [발블] 232 | "옮은": [올믄] 233 | "몫이": [목씨] 234 | "넋을": [넉쓸] 235 | "외곬으로": [외골쓰로] 236 | "값이": [갑씨] 237 | "값에": [갑쎄] 238 | "없이": [업씨] 239 | "없으면": [업쓰면] 240 | "밭 아래": [바다래] 241 | "늪 앞": [느밥] 242 | "젖어미": [저더미] 243 | "맛없다": [마덥따] 244 | "겉옷": [거돋] 245 | "헛웃음": [허두슴] 246 | "꽃 위": [꼬뒤] 247 | "넋 없다": [너겁따] 248 | "닭 앞에": [다가페] 249 | "값어치": [가버치] 250 | "값있는": [가빈는] 251 | "디귿이": [디그시] 252 | "디귿을": [디그슬] 253 | "디귿에": [디그세] 254 | "지읒이": [지으시] 255 | "지읒을": [지으슬] 256 | "지읒에": [지으세] 257 | "치읓이": [치으시] 258 | "치읓을": [치으슬] 259 | "치읓에": [치으세] 260 | "키읔이": [키으기] 261 | "키읔을": [키으글] 262 | "키읔에": [키으게] 263 | "티읕이": [티으시] 264 | "티읕을": [티으슬] 265 | "티읕에": [티으세] 266 | "피읖이": [피으비] 267 | "피읖을": [피으블] 268 | "피읖에": [피으베] 269 | "히읗이": [히으시] 270 | "히읗을": [히으슬] 271 | "히읗에": [히으세] 272 | "곧이듣다": [고지듣따] 273 | "굳이": [구지] 274 | "미닫이": [미다지] 275 | "땀받이": [땀바지] 276 | "밭이": [바치] 277 | "벼훑이": [벼훌치] 278 | "굳히다": [구치다] 279 | "닫히다": [다치다] 280 | "묻히다": [무치다] 281 | "먹는": [멍는] 282 | "국물": [궁물] 283 | "깎는": [깡는] 284 | "키읔만": [키응만] 285 | "몫몫이": [몽목씨] 286 | "긁는": [긍는] 287 | "흙만": [흥만] 288 | "닫는": [단는] 289 | "짓는": [진는] 290 | "옷맵시": [온맵씨] 291 | "있는": [인는] 292 | "맞는": [만는] 293 | "젖멍울": [전멍울] 294 | "쫓는": [쫀는] 295 | "꽃망울": [꼰망울] 296 | "붙는": [분는] 297 | "잡는": [잠는] 298 | "밥물": [밤물] 299 | "앞마당": [암마당] 300 | "밟는": [밤는] 301 | "읊는": [음는] 302 | "없는": [엄는] 303 | "책 넣는다": [챙넌는다] 304 | "흙 말리다": [흥말리다] 305 | "옷 맞추다": [온맏추다] 306 | "밥 먹는다": [밤멍는다] 307 | "값 매기다": [감매기다] 308 | "국 마시다": [궁마시다] 309 | "옷 마르다": [온마르다] 310 | "입 놀리다": [임놀리다] 311 | "담력": [담녁] 312 | "침략": [침냑] 313 | "강릉": [강능] 314 | "항로": [항노] 315 | "대통령": [대통녕] 316 | "막론": [망논] 317 | "협력": [혐녁] 318 | "신라": [실라] 319 | "천리": [철리] 320 | "광한루": [광할루] 321 | "물난리": [물랄리] 322 | "줄넘기": [줄럼끼] 323 | "할는지": [할른지] 324 | "닳는": [달른] 325 | "핥네": [할레] 326 | "의견란": [의견난] 327 | "임진란": [임진난] 328 | "생산량": [생산냥] 329 | "결단력": [결딴녁] 330 | "공권력": [공꿘녁] 331 | "동원령": [동원녕] 332 | "상견례": [상견녜] 333 | "횡단로": [횡단노] 334 | "이원론": [이원논] 335 | "입원료": [이붠뇨] 336 | "구근류": [구근뉴] 337 | "갈 놈": [갈롬] 338 | "바람 잦을 날": [바람자즐랄] 339 | "되어": [되어, 되여] 340 | "피어": [피어, 피여] 341 | "국밥": [국빱] 342 | "깎다": [깍따] 343 | "넋받이": [넉빠지] 344 | "삯돈": [삭똔] 345 | "닭장": [닥짱] 346 | "칡범": [칙뻠] 347 | "뻗대다": [뻗때다] 348 | "옷고름": [옫꼬름] 349 | "있던": [읻떤] 350 | "꽂고": [꼳꼬] 351 | "꽃다발": [꼳따발] 352 | "낯설다": [낟썰다] 353 | "밭갈이": [받까리] 354 | "솥전": [솓쩐] 355 | "곱돌": [곱똘] 356 | "덮개": [덥깨] 357 | "옆집": [엽찝] 358 | "넓죽하다": [넙쭈카다] 359 | "읊조리다": [읍쪼리다] 360 | "값지다": [갑찌다] 361 | "신고": [신꼬] 362 | "껴안다": [껴안따] 363 | "앉고": [안꼬] 364 | "삼고": [삼꼬] 365 | "더듬지": [더듬찌] 366 | "젊지": [점찌] 367 | "넓게": [널께] 368 | "훑소": [훌쏘] 369 | "떫지": [떨찌] 370 | "갈등": [갈뜽] 371 | "발동": [발똥] 372 | "절도": [절또] 373 | "말살": [말쌀] 374 | "일시": [일씨] 375 | "갈증": [갈쯩] 376 | "물질": [물찔] 377 | "발전": [발쩐] 378 | "몰상식": [몰쌍식] 379 | "불세출": [불쎄출] 380 | "할 것을": [할꺼슬] 381 | "갈 데가": [갈떼가] 382 | "할 바를": [할빠를] 383 | "할 수는": [할쑤는] 384 | "할 적에": [할쩌게] 385 | "갈 곳": [갈꼳] 386 | "할 도리": [할또리] 387 | "만날 사람": [만날싸람] 388 | "할걸": [할껄] 389 | "할밖에": [할빠께] 390 | "할세라": [할쎄라] 391 | "할수록": [할쑤록] 392 | "할지라도": [할찌라도] 393 | "할지언정": [할찌언정] 394 | "할진대": [할찐대] 395 | "할 듯하다": [할뜨타다] 396 | "할 법하다": [할뻐파다] 397 | "할 성싶다": [할썽십따] 398 | "간 사람": [간사람] 399 | "가는 사람": [가는사람] 400 | "가던 사람": [가던사람] 401 | "입는다": [임는다] 402 | "입는데": [임는데] 403 | "입는지": [임는지] 404 | "문고리": [문꼬리] 405 | "눈동자": [눈똥자] 406 | "신바람": [신빠람] 407 | "산새": [산쌔] 408 | "손재주": [손째주] 409 | "길가": [길까] 410 | "물동이": [물똥이] 411 | "발바닥": [발빠닥] 412 | "굴속": [굴쏙] 413 | "술잔": [술짠] 414 | "바람결": [바람껼] 415 | "그믐달": [그믐딸] 416 | "아침밥": [아침빱] 417 | "잠자리": [잠짜리] 418 | "강가": [강까] 419 | "초승달": [초승딸] 420 | "등불": [등뿔] 421 | "창살": [창쌀] 422 | "강줄기": [강쭐기] 423 | "솜이불": [솜니불] 424 | "홑이불": [혼니불] 425 | "막일": [망닐] 426 | "삯일": [상닐] 427 | "맨입": [맨닙] 428 | "꽃잎": [꼰닙] 429 | "내복약": [내봉냑] 430 | "한여름": [한녀름] 431 | "남존여비": [남존녀비] 432 | "신여성": [신녀성] 433 | "색연필": [생년필] 434 | "직행열차": [지캥녈차] 435 | "늑막염": [능망념] 436 | "콩엿": [콩녇] 437 | "담요": [담뇨] 438 | "눈요기": [눈뇨기] 439 | "영업용": [영엄뇽] 440 | "식용유": [시굥뉴] 441 | "국민윤리": [궁민뉼리] 442 | "밤윷": [밤뉻] 443 | "이죽이죽": [이중니죽, 이주기죽] 444 | "야금야금": [야금냐금, 야그먀금] 445 | "검열": [검녈, 거멸] 446 | "욜랑욜랑": [욜랑뇰랑, 욜랑욜랑] 447 | "금융": [금늉, 그뮹] 448 | "들일": [들릴] 449 | "솔잎": [솔립] 450 | "설익다": [설릭따] 451 | "물약": [물략] 452 | "불여우": [불려우] 453 | "서울역": [서울력] 454 | "물엿": [물렫] 455 | "휘발유": [휘발류] 456 | "유들유들": [유들류들] 457 | "한 일": [한닐] 458 | "옷 입다": [온닙따] 459 | "서른여섯": [서른녀섣] 460 | "먹은 엿": [머근녇] 461 | "할 일": [할릴] 462 | "잘 입다": [잘립따] 463 | "스물여섯": [스물려섣] 464 | "먹을 엿": [머글렫] 465 | "송별연": [송벼련] 466 | "등용문": [등용문] 467 | "절약": [저략] 468 | "월요일": [워료일] 469 | "목요일": [모교일] 470 | "금요일": [그묘일] 471 | "냇가": [내까, 낻까] 472 | "샛길": [새낄, 샏낄] 473 | "빨랫돌": [빨래똘, 빨랟똘] 474 | "콧등": [코뜽, 콛뜽] 475 | "깃발": [기빨, 긷빨] 476 | "대팻밥": [대패빱, 대팯빱] 477 | "햇살": [해쌀, 핻쌀] 478 | "뱃속": [배쏙, 밷쏙] 479 | "뱃전": [배쩐, 밷쩐] 480 | "고갯짓": [고개찓, 고갣찓] 481 | "콧날": [콘날] 482 | "아랫니": [아랜니] 483 | "툇마루": [퇸마루] 484 | "뱃머리": [밴머리] 485 | "베갯잇": [베갠닏] 486 | "깻잎": [깬닙] 487 | "나뭇잎": [나문닙] 488 | "도리깻열": [도리깬녈] 489 | "뒷윷": [뒨뉻] 490 | "뒷일": [뒨닐] 491 | -------------------------------------------------------------------------------- /lib/gimchi.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # encoding: UTF-8 3 | # Junegunn Choi (junegunn.c@gmail.com) 4 | 5 | require 'yaml' 6 | require 'set' 7 | require 'gimchi/char' 8 | require 'gimchi/pronouncer' 9 | 10 | class Gimchi 11 | class << self 12 | attr_reader :chosungs, :jungsungs, :jongsungs 13 | 14 | def Char ch 15 | Gimchi::Char.new(ch) 16 | end 17 | 18 | # Decompose a Korean character into 3 components 19 | # @param [String] ch Korean character 20 | # @return [Array] 21 | def decompose ch 22 | Gimchi::Char.new(ch).to_a 23 | end 24 | 25 | # Compose 3 elements into a Korean character String 26 | # @param [String] chosung 27 | # @param [String] jungsung 28 | # @param [String] jongsung 29 | # @return [String] 30 | def compose chosung, jungsung = nil, jongsung = nil 31 | if chosung.nil? && jungsung.nil? 32 | "" 33 | elsif chosung && jungsung 34 | n1, n2, n3 = 35 | n1 = chosungs.index(chosung) || 0 36 | n2 = jungsungs.index(jungsung) || 0 37 | n3 = ([nil] + jongsungs).index(jongsung) || 0 38 | [ 0xAC00 + n1 * (21 * 28) + n2 * 28 + n3 ].pack('U') 39 | else 40 | chosung || jungsung 41 | end 42 | end 43 | 44 | # @param [String] ch 45 | # @return [Boolean] 46 | def chosung? ch 47 | @chosung_set.include? ch 48 | end 49 | 50 | # @param [String] ch 51 | # @return [Boolean] 52 | def jungsung? ch 53 | @jungsung_set.include? ch 54 | end 55 | 56 | # @param [String] ch 57 | # @return [Boolean] 58 | def jongsung? ch 59 | @jongsung_set.include? ch 60 | end 61 | 62 | # Checks if the given character is a korean character. 63 | # @param [String] ch A string of size 1 64 | def korean_char? ch 65 | raise ArgumentError.new('Lengthy input') if str_length(ch) > 1 66 | 67 | complete_korean_char?(ch) || @all.include?(ch) 68 | end 69 | alias kchar? korean_char? 70 | 71 | # Checks if the given character is a "complete" korean character. 72 | # "Complete" Korean character must have chosung and jungsung, with optional jongsung. 73 | # @param [String] ch A string of size 1 74 | def complete_korean_char? ch 75 | raise ArgumentError.new('Lengthy input') if str_length(ch) > 1 76 | 77 | # Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣) 78 | ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 } 79 | end 80 | 81 | # @deprecated 82 | # @private 83 | def kchar ch 84 | Gimchi::Char.new(ch) 85 | end 86 | 87 | # Reads numeric expressions in Korean way. 88 | # @param [String, Number] str Numeric type or String containing numeric expressions 89 | # @return [String] Output string 90 | def read_number str 91 | str.to_s.gsub(/(([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+(e[+-][0-9]+)?)?)(\s*.)?/) { 92 | read_number_sub($1, $5) 93 | } 94 | end 95 | 96 | # Returns the pronunciation of the given string containing Korean characters. 97 | # Takes optional options hash. 98 | # 99 | # @param [String] Input string 100 | # @param [Hash] options Options 101 | # @option options [Boolean] each_char Each character of the string is pronounced respectively. 102 | # @option options [Boolean] slur Strings separated by whitespaces are processed again as if they were contiguous. 103 | # @option options [Boolean] number Numberic parts of the string is also pronounced in Korean. 104 | # @option options [Array] except Allows you to skip certain transformations. 105 | # @return [String] Output string 106 | def pronounce str, options = {} 107 | options = { 108 | :each_char => false, 109 | :slur => false, 110 | :number => true, 111 | :except => [], 112 | :debug => false 113 | }.merge options 114 | 115 | str = read_number(str) if options[:number] 116 | 117 | result, transforms = @pronouncer.send :pronounce!, str, options 118 | 119 | if options[:debug] 120 | return result, transforms 121 | else 122 | return result 123 | end 124 | end 125 | 126 | # Returns the romanization (alphabetical notation) of the given Korean string. 127 | # http://en.wikipedia.org/wiki/Korean_romanization 128 | # @param [String] str Input Korean string 129 | # @param [Hash] options Options 130 | # @option options [Boolean] as_pronounced If true, #pronounce is internally called before romanize 131 | # @option options [Boolean] number Whether to read numeric expressions in the string 132 | # @option options [Boolean] slur Same as :slur in #pronounce 133 | # @return [String] Output string in Roman Alphabet 134 | # @see Korean#pronounce 135 | def romanize str, options = {} 136 | options = { 137 | :as_pronounced => true, 138 | :number => true, 139 | :slur => false 140 | }.merge options 141 | 142 | rdata = @config[:romanization] 143 | post_subs = rdata[:post_substitution] 144 | rdata = [rdata[:chosung], rdata[:jungsung], rdata[:jongsung]] 145 | 146 | str = pronounce str, 147 | :each_char => !options[:as_pronounced], 148 | :number => options[:number], 149 | :slur => options[:slur], 150 | # 제1항 [붙임 1] ‘ㅢ’는 ‘ㅣ’로 소리 나더라도 ‘ui’로 적는다. 151 | :except => %w[rule_5_3] 152 | dash = rdata[0]["ㅇ"] 153 | romanization = "" 154 | 155 | romanize_chunk = lambda do |chunk| 156 | chunk.each_char.map { |ch| Gimchi::Char.new(ch) rescue ch }.each do |kc| 157 | kc.to_a.each_with_index do |comp, idx| 158 | next if comp.nil? 159 | comp = rdata[idx][comp] || comp 160 | comp = comp[1..-1] if comp[0, 1] == dash && 161 | (romanization.empty? || romanization[-1, 1] =~ /\s/) 162 | romanization += comp 163 | end 164 | end 165 | 166 | return post_subs.keys.inject(romanization) { | output, pattern | 167 | output.gsub(pattern, post_subs[pattern]) 168 | } 169 | end 170 | 171 | k_chunk = "" 172 | str.each_char do | c | 173 | if korean_char? c 174 | k_chunk += c 175 | else 176 | unless k_chunk.empty? 177 | romanization = romanize_chunk.call k_chunk 178 | k_chunk = "" 179 | end 180 | romanization += c 181 | end 182 | end 183 | romanization = romanize_chunk.call k_chunk unless k_chunk.empty? 184 | romanization 185 | end 186 | 187 | private 188 | CONFIG_FILE_PATH = File.expand_path('../../config/default.yml', __FILE__) 189 | 190 | def str_length str 191 | str.length 192 | end 193 | 194 | def read_number_sub num, next_char 195 | nconfig = @config[:number] 196 | 197 | if num == '0' 198 | return nconfig[:digits].first 199 | end 200 | 201 | num = num.gsub(',', '') 202 | next_char = next_char.to_s 203 | is_float = num.match(/[\.e]/) != nil 204 | 205 | # Alternative notation for integers with proper suffix 206 | alt = false 207 | if is_float == false && 208 | nconfig[:alt_notation][:when_suffix].keys.include?(next_char.strip) 209 | max = nconfig[:alt_notation][:when_suffix][next_char.strip][:max] 210 | 211 | if max.nil? || num.to_i <= max 212 | alt = true 213 | end 214 | end 215 | 216 | # Sign 217 | sign = [] 218 | negative = false 219 | if num =~ /^-/ 220 | num = num.sub(/^-\s*/, '') 221 | sign << nconfig[:negative] 222 | negative = true 223 | elsif num =~ /^\+/ 224 | num = num.sub(/^\+\s*/, '') 225 | sign << nconfig[:positive] 226 | end 227 | 228 | if is_float 229 | below = nconfig[:decimal_point] 230 | below = nconfig[:digits][0] + below if num.to_f < 1 231 | 232 | if md = num.match(/(.*)e(.*)/) 233 | dp = md[1].index('.') 234 | num = md[1].tr '.', '' 235 | exp = md[2].to_i 236 | 237 | dp += exp 238 | if dp > num.length 239 | num = num.ljust(dp, '0') 240 | num = num.sub(/^0+([1-9])/, "\\1") 241 | 242 | below = "" 243 | elsif dp < 0 244 | num = '0.' + '0' * (-dp) + num 245 | else 246 | num[dp, 1] = '.' + num[dp, 1] 247 | end 248 | end 249 | num.sub(/.*\./, '').each_char do | char | 250 | below += nconfig[:digits][char.to_i] 251 | end if num.include? '.' 252 | num = num.sub(/\..*/, '') 253 | else 254 | below = "" 255 | end 256 | 257 | tokens = [] 258 | unit_idx = -1 259 | num = num.to_i 260 | while num > 0 261 | v = num % 10000 262 | 263 | unit_idx += 1 264 | if v > 0 265 | if alt == false || unit_idx >= 1 266 | str = "" 267 | # Cannot use hash as they're unordered in 1.8 268 | [[1000, '천'], 269 | [100, '백'], 270 | [10, '십']].each do | arr | 271 | u, sub_unit = arr 272 | str += (nconfig[:digits][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0 273 | v %= u 274 | end 275 | str += nconfig[:digits][v] if v > 0 276 | 277 | raise RangeError, "number too large" unless nconfig[:units][unit_idx] 278 | tokens << str.sub(/ $/, '') + nconfig[:units][unit_idx] 279 | else 280 | str = "" 281 | tenfolds = nconfig[:alt_notation][:tenfolds] 282 | digits = nconfig[:alt_notation][:digits] 283 | alt_post_subs = nconfig[:alt_notation][:post_substitution] 284 | 285 | # Likewise. 286 | [[1000, '천'], 287 | [100, '백']].each do |u, sub_unit| 288 | str += (nconfig[:digits][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0 289 | v %= u 290 | end 291 | 292 | str += tenfolds[(v / 10) - 1] if v / 10 > 0 293 | v %= 10 294 | str += digits[v] if v > 0 295 | 296 | alt_post_subs.each do |p, s| 297 | str.gsub!(p, s) 298 | end if alt 299 | tokens << str.sub(/ $/, '') + nconfig[:units][unit_idx] 300 | end 301 | end 302 | num /= 10000 303 | end 304 | 305 | tokens += sign unless sign.empty? 306 | ret = tokens.reverse.join(' ') + below + next_char 307 | nconfig[:post_substitution].each do |p, s| 308 | ret.gsub!(p, s) 309 | end 310 | ret 311 | end 312 | 313 | # @private 314 | def setup 315 | symbolize_keys = lambda do |val| 316 | case val 317 | when Hash 318 | {}.tap do |h| 319 | val.each do |k, v| 320 | k = k.gsub(' ', '_').to_sym if k =~ /[a-z0-9 ]/ 321 | h[k] = symbolize_keys.call v 322 | end 323 | end 324 | when Array 325 | val.map { |v| symbolize_keys.call v } 326 | else 327 | val 328 | end 329 | end 330 | @config = symbolize_keys.call YAML.load(File.read CONFIG_FILE_PATH) 331 | 332 | [ 333 | @config[:romanization][:post_substitution], 334 | @config[:number][:post_substitution], 335 | @config[:number][:alt_notation][:post_substitution] 336 | ].each do |r| 337 | r.keys.each do |k| 338 | r[Regexp.compile k.to_s] = r.delete k 339 | end 340 | end 341 | @config.freeze 342 | 343 | @pronouncer = Gimchi::Pronouncer.send :new, @config[:pronouncer], @config[:structure] 344 | 345 | @chosungs = @config[:structure][:chosung] 346 | @jungsungs = @config[:structure][:jungsung] 347 | @jongsungs = @config[:structure][:jongsung] 348 | @chosung_set = Set[*@chosungs] 349 | @jungsung_set = Set[*@jungsungs] 350 | @jongsung_set = Set[*@jongsungs] 351 | @all = @chosung_set + @jungsung_set + @jongsung_set 352 | end 353 | end 354 | private 355 | def initialize 356 | raise NoMethodError, "Gimchi is a singleton class" 357 | end 358 | end#Gimchi 359 | 360 | require 'gimchi/patch_1.8' 361 | 362 | Gimchi.send :setup 363 | -------------------------------------------------------------------------------- /lib/gimchi/pronouncer.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | class Gimchi 4 | # Private class. 5 | # Partial implementation of Korean pronouncement pronunciation rules specified in 6 | # http://http://www.korean.go.kr/ 7 | # @private 8 | class Pronouncer 9 | private 10 | def initialize pconfig, structure 11 | @pconfig = pconfig 12 | @structure = structure 13 | end 14 | 15 | def pronounce! str, options = {} 16 | @sequence = @pconfig[:transformation][ 17 | "sequence_for_#{options[:each_char] ? '1' : '2'}".to_sym] - options[:except] 18 | 19 | # Dissecting 20 | @chars = str.each_char.map { |c| Gimchi::Char.new(c) rescue c } 21 | @orig_chars = @chars.dup 22 | 23 | # Padding 24 | @chars.each { |c| pad c } 25 | 26 | # Two-phase processing 27 | # - For `slur' 28 | applied = [] 29 | 2.times do | phase | 30 | @chars = @chars.reject { |c| c =~ /\s/ } if phase == 1 # slur-phase 31 | 32 | # Deep-fried...no copied backup 33 | @initial_chars = @chars.map { |c| c.dup } 34 | 35 | # Transform one by one 36 | applied += (0...@chars.length).inject([]) { | arr, i | arr + transform(i); } 37 | 38 | # Post-processing (actually just for :each_char option) 39 | @chars.select { |c| c.is_a?(Gimchi::Char) && c.jongsung }.each do | c | 40 | c.jongsung = @pconfig[:jongsung_sound][c.jongsung] 41 | end 42 | 43 | break unless options[:slur] 44 | end 45 | 46 | return @orig_chars.join, applied 47 | end 48 | 49 | private 50 | def transform idx 51 | @cursor = idx 52 | kc = @chars[@cursor] 53 | 54 | # Not korean 55 | return [] unless kc.is_a? Gimchi::Char 56 | 57 | # Setting up variables for fast lookup 58 | @kc = kc 59 | @next_kc = (nkc = @chars[@cursor + 1]).is_a?(Gimchi::Char) ? nkc : nil 60 | @kc_org = @initial_chars[@cursor] 61 | @next_kc_org = (nkco = @initial_chars[@cursor + 1]).is_a?(Gimchi::Char) ? nkco : nil 62 | 63 | # Cannot properly pronounce 64 | return [] if @kc.chosung.nil? && @kc.jungsung.nil? && @kc.jongsung.nil? 65 | 66 | applied = [] 67 | not_todo = [] 68 | blocking_rule = @pconfig[:transformation][:blocking_rule] 69 | @sequence.each do | rule | 70 | next if not_todo.include?(rule) 71 | 72 | if self.send(rule) 73 | applied << rule 74 | not_todo += blocking_rule[rule] if blocking_rule.has_key?(rule) 75 | end 76 | end 77 | applied 78 | end 79 | 80 | def pad c 81 | return unless c.is_a? Gimchi::Char 82 | 83 | c.chosung = 'ㅇ' if c.chosung.nil? 84 | c.jungsung = 'ㅡ' if c.jungsung.nil? 85 | end 86 | 87 | # shortcut 88 | def fortis_map 89 | @structure[:fortis_map] 90 | end 91 | 92 | # shortcut 93 | def double_consonant_map 94 | @structure[:double_consonant_map] 95 | end 96 | 97 | # 제5항: ‘ㅑ ㅒ ㅕ ㅖ ㅘ ㅙ ㅛ ㅝ ㅞ ㅠ ㅢ’는 이중 모음으로 발음한다. 98 | # 다만 1. 용언의 활용형에 나타나는 ‘져, 쪄, 쳐’는 [저, 쩌, 처]로 발음한다. 99 | # 다만 3. 자음을 첫소리로 가지고 있는 음절의 ‘ㅢ’는 [ㅣ]로 발음한다. 100 | def rule_5_1 101 | if %w[져 쪄 쳐].include? @kc.to_s 102 | @kc.jungsung = 'ㅓ' 103 | 104 | true 105 | end 106 | end 107 | 108 | def rule_5_3 109 | if @kc.jungsung == 'ㅢ' && @kc_org.chosung.consonant? 110 | @kc.jungsung = 'ㅣ' 111 | 112 | true 113 | end 114 | end 115 | 116 | # 제9항: 받침 ‘ㄲ, ㅋ’, ‘ㅅ, ㅆ, ㅈ, ㅊ, ㅌ’, ‘ㅍ’은 어말 또는 자음 앞에서 117 | # 각각 대표음 [ㄱ, ㄷ, ㅂ]으로 발음한다. 118 | def rule_9 119 | map = { 120 | %w[ㄲ ㅋ] => 'ㄱ', 121 | %w[ㅅ ㅆ ㅈ ㅊ ㅌ] => 'ㄷ', 122 | %w[ㅍ] => 'ㅂ' 123 | } 124 | if map.keys.flatten.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?) 125 | @kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ] 126 | 127 | true 128 | end 129 | end 130 | 131 | # 제10항: 겹받침 ‘ㄳ’, ‘ㄵ’, ‘ㄼ, ㄽ, ㄾ’, ‘ㅄ’은 어말 또는 자음 앞에서 132 | # 각각 [ㄱ, ㄴ, ㄹ, ㅂ]으로 발음한다. 133 | def rule_10 134 | map = { 135 | %w[ㄳ] => 'ㄱ', 136 | %w[ㄵ] => 'ㄴ', 137 | %w[ㄼ ㄽ ㄾ] => 'ㄹ', 138 | %w[ㅄ] => 'ㅂ' 139 | } 140 | if map.keys.flatten.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?) 141 | # Exceptions 142 | if @next_kc && ( 143 | (@kc.to_s == '밟' && @next_kc.chosung.consonant?) || 144 | (@kc.to_s == '넓' && @next_kc && %w[적 죽 둥].include?(@next_kc_org.to_s))) # PATCH 145 | @kc.jongsung = 'ㅂ' 146 | else 147 | @kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ] 148 | end 149 | 150 | true 151 | end 152 | end 153 | 154 | # 제11항: 겹받침 ‘ㄺ, ㄻ, ㄿ’은 어말 또는 자음 앞에서 각각 [ㄱ, ㅁ, ㅂ]으로 발음한다. 155 | def rule_11 156 | map = { 157 | 'ㄺ' => 'ㄱ', 158 | 'ㄻ' => 'ㅁ', 159 | 'ㄿ' => 'ㅂ' 160 | } 161 | if map.keys.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?) 162 | # 다만, 용언의 어간 말음 ‘ㄺ’은 ‘ㄱ’ 앞에서 [ㄹ]로 발음한다. 163 | # - 용언 여부 판단은?: 중성으로 판단 (PATCH) 164 | if @next_kc && @kc.jongsung == 'ㄺ' && 165 | @next_kc_org.chosung == 'ㄱ' && 166 | %w[맑 얽 섥 밝 늙 묽 넓].include?(@kc.to_s) # PATCH 167 | @kc.jongsung = 'ㄹ' 168 | else 169 | @kc.jongsung = map[@kc.jongsung] 170 | end 171 | 172 | true 173 | end 174 | end 175 | 176 | # 제12항: 받침 ‘ㅎ’의 발음은 다음과 같다. 177 | # 1. ‘ㅎ(ㄶ, ㅀ)’ 뒤에 ‘ㄱ, ㄷ, ㅈ’이 결합되는 경우에는, 뒤 음절 첫소리와 178 | # 합쳐서 [ㅋ, ㅌ, ㅊ]으로 발음한다. 179 | # [붙임 1]받침 ‘ㄱ(ㄺ), ㄷ, ㅂ(ㄼ), ㅈ(ㄵ)’이 뒤 음절 첫소리 ‘ㅎ’과 180 | # 결합되는 경우에도, 역시 두 음을 합쳐서 [ㅋ, ㅌ, ㅍ, ㅊ]으로 발음한다. 181 | # [붙임 2]규정에 따라 ‘ㄷ’으로 발음되는 ‘ㅅ, ㅈ, ㅊ, ㅌ’의 경우에도 이에 준한다. 182 | # 183 | # 2. ‘ㅎ(ㄶ, ㅀ)’ 뒤에 ‘ㅅ’이 결합되는 경우에는, ‘ㅅ’을 [ㅆ]으로 발음한다. 184 | # 185 | # 3. ‘ㅎ’ 뒤에 ‘ㄴ’이 결합되는 경우에는, [ㄴ]으로 발음한다. 186 | # [붙임]‘ㄶ, ㅀ’ 뒤에 ‘ㄴ’이 결합되는 경우에는, ‘ㅎ’을 발음하지 않는다. 187 | # 188 | # 4. ‘ㅎ(ㄶ, ㅀ)’ 뒤에 모음으로 시작된 어미나 접미사가 결합되는 경우에는, ‘ㅎ’을 발음하지 않는다. 189 | def rule_12 190 | return if @next_kc.nil? 191 | 192 | map_12_1 = { 193 | 'ㄱ' => 'ㅋ', 194 | 'ㄷ' => 'ㅌ', 195 | 'ㅈ' => 'ㅊ' } 196 | if %w[ㅎ ㄶ ㅀ].include?(@kc.jongsung) 197 | # 12-1 198 | if map_12_1.keys.include?(@next_kc.chosung) 199 | @next_kc.chosung = map_12_1[@next_kc.chosung] 200 | @kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first 201 | 202 | # 12-2 203 | elsif @next_kc.chosung == 'ㅅ' 204 | @kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first 205 | @next_kc.chosung = 'ㅆ' 206 | 207 | # 12-3 208 | elsif @next_kc.chosung == 'ㄴ' 209 | if dc = double_consonant_map[@kc.jongsung] 210 | @kc.jongsung = dc.first 211 | else 212 | @kc.jongsung = 'ㄴ' 213 | end 214 | 215 | # 12-4 216 | elsif @next_kc.chosung == 'ㅇ' 217 | @kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first 218 | end 219 | 220 | true 221 | end 222 | 223 | # 12-1 붙임 224 | if @next_kc.chosung == 'ㅎ' 225 | map_jongsung = { 226 | # 붙임 1 227 | 'ㄱ' => [nil, 'ㅋ'], 228 | 'ㄺ' => ['ㄹ', 'ㅋ'], 229 | 'ㄷ' => [nil, 'ㅌ'], 230 | 'ㅂ' => [nil, 'ㅍ'], 231 | 'ㄼ' => ['ㄹ', 'ㅍ'], 232 | 'ㅈ' => [nil, 'ㅊ'], 233 | 'ㄵ' => ['ㄴ', 'ㅊ'], 234 | 235 | # 붙임 2 236 | 'ㅅ' => [nil, 'ㅌ'], 237 | #'ㅈ' => [nil, 'ㅌ'], # FIXME: 붙임2의 모순 238 | 'ㅊ' => [nil, 'ㅌ'], 239 | 'ㅌ' => [nil, 'ㅌ'], 240 | } 241 | if trans1 = map_jongsung[@kc.jongsung] 242 | @kc.jongsung = trans1.first 243 | @next_kc.chosung = trans1.last 244 | 245 | true 246 | end 247 | end 248 | end 249 | 250 | # 제13항: 홑받침이나 쌍받침이 모음으로 시작된 조사나 어미, 접미사와 251 | # 결합되는 경우에는, 제 음가대로 뒤 음절 첫소리로 옮겨 발음한다. 252 | def rule_13 253 | return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ' 254 | @next_kc.chosung = @kc.jongsung 255 | @kc.jongsung = nil 256 | 257 | true 258 | end 259 | 260 | # 제14항: 겹받침이 모음으로 시작된 조사나 어미, 접미사와 결합되는 경우에는, 261 | # 뒤엣것만을 뒤 음절 첫소리로 옮겨 발음한다.(이 경우, ‘ㅅ’은 된소리로 발음함.) 262 | # 263 | def rule_14 264 | return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ' 265 | if consonants = double_consonant_map[@kc.jongsung] 266 | consonants[1] = 'ㅆ' if consonants[1] == 'ㅅ' 267 | @kc.jongsung, @next_kc.chosung = consonants 268 | 269 | true 270 | end 271 | end 272 | 273 | # 제15항: 받침 뒤에 모음 ‘ㅏ, ㅓ, ㅗ, ㅜ, ㅟ’들로 시작되는 __실질 형태소__가 연결되는 274 | # 경우에는, 대표음으로 바꾸어서 뒤 음절 첫소리로 옮겨 발음한다. 275 | def rule_15 276 | return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ' 277 | 278 | if false && %w[ㅏ ㅓ ㅗ ㅜ ㅟ].include?(@next_kc.jungsung) && 279 | %[ㅆ ㄲ ㅈ ㅊ ㄵ ㄻ ㄾ ㄿ ㄺ].include?(@kc.jongsung) == false # PATCH 280 | @next_kc.chosung = @pconfig[:jongsung_sound][ @kc.jongsung ] 281 | @kc.jongsung = nil 282 | 283 | true 284 | end 285 | end 286 | 287 | # 제16항: 한글 자모의 이름은 그 받침소리를 연음하되, ‘ㄷ, ㅈ, ㅊ, ㅋ, ㅌ, 288 | # ㅍ, ㅎ’의 경우에는 특별히 다음과 같이 발음한다. 289 | def rule_16 290 | return if @next_kc.nil? 291 | 292 | map = {'디귿' => '디긋', 293 | '지읒' => '지읏', 294 | '치읓' => '치읏', 295 | '키읔' => '키윽', 296 | '티읕' => '티읏', 297 | '피읖' => '피읍', 298 | '히읗' => '히읏'} 299 | 300 | word = @kc.to_s + @next_kc.to_s 301 | if map.keys.include? word 302 | new_char = Gimchi::Char.new(map[word].scan(/./mu)[1]) 303 | @next_kc.chosung = new_char.chosung 304 | @next_kc.jongsung = new_char.jongsung 305 | 306 | true 307 | end 308 | end 309 | 310 | # 제17항: 받침 ‘ㄷ, ㅌ(ㄾ)’이 조사나 접미사의 모음 ‘ㅣ’와 결합되는 경우에는, 311 | # [ㅈ, ㅊ]으로 바꾸어서 뒤 음절 첫소리로 옮겨 발음한다. 312 | # 313 | # [붙임] ‘ㄷ’ 뒤에 접미사 ‘히’가 결합되어 ‘티’를 이루는 것은 [치]로 발음한다. 314 | def rule_17 315 | return if @next_kc.nil? || %w[ㄷ ㅌ ㄾ].include?(@kc.jongsung) == false 316 | 317 | if @next_kc.to_s == '이' 318 | @next_kc.chosung = @kc.jongsung == 'ㄷ' ? 'ㅈ' : 'ㅊ' 319 | @kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first 320 | 321 | true 322 | elsif @next_kc.to_s == '히' 323 | @next_kc.chosung = 'ㅊ' 324 | @kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first 325 | 326 | true 327 | end 328 | end 329 | 330 | # 제18항: 받침 ‘ㄱ(ㄲ, ㅋ, ㄳ, ㄺ), ㄷ(ㅅ, ㅆ, ㅈ, ㅊ, ㅌ, ㅎ), ㅂ(ㅍ, ㄼ, 331 | # ㄿ, ㅄ)’은 ‘ㄴ, ㅁ’ 앞에서 [ㅇ, ㄴ, ㅁ]으로 발음한다. 332 | def rule_18 333 | map = { 334 | %w[ㄱ ㄲ ㅋ ㄳ ㄺ] => 'ㅇ', 335 | %w[ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅎ] => 'ㄴ', 336 | %w[ㅂ ㅍ ㄼ ㄿ ㅄ] => 'ㅁ' 337 | } 338 | if @next_kc && map.keys.flatten.include?(@kc.jongsung) && %w[ㄴ ㅁ].include?(@next_kc.chosung) 339 | @kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ] 340 | 341 | true 342 | end 343 | end 344 | 345 | # 제19항: 받침 ‘ㅁ, ㅇ’ 뒤에 연결되는 ‘ㄹ’은 [ㄴ]으로 발음한다. 346 | # [붙임]받침 ‘ㄱ, ㅂ’ 뒤에 연결되는 ‘ㄹ’도 [ㄴ]으로 발음한다. 347 | def rule_19 348 | if @next_kc && @next_kc.chosung == 'ㄹ' && %w[ㅁ ㅇ ㄱ ㅂ].include?(@kc.jongsung) 349 | @next_kc.chosung = 'ㄴ' 350 | 351 | case @kc.jongsung 352 | when 'ㄱ' then @kc.jongsung = 'ㅇ' 353 | when 'ㅂ' then @kc.jongsung = 'ㅁ' 354 | end 355 | 356 | true 357 | end 358 | end 359 | 360 | # 제20항: ‘ㄴ’은 ‘ㄹ’의 앞이나 뒤에서 [ㄹ]로 발음한다. 361 | def rule_20 362 | return if @next_kc.nil? 363 | 364 | to = if %w[견란 진란 산량 단력 권력 원령 견례 365 | 문로 단로 원론 원료 근류].include?(@kc_org.to_s + @next_kc_org.to_s) 366 | 'ㄴ' 367 | else 368 | 'ㄹ' 369 | end 370 | 371 | if @kc.jongsung == 'ㄹ' && @next_kc.chosung == 'ㄴ' 372 | @kc.jongsung = @next_kc.chosung = to 373 | 374 | true 375 | elsif @kc.jongsung == 'ㄴ' && @next_kc.chosung == 'ㄹ' 376 | @kc.jongsung = @next_kc.chosung = to 377 | 378 | true 379 | end 380 | end 381 | 382 | # 제23항: 받침 ‘ㄱ(ㄲ, ㅋ, ㄳ, ㄺ), ㄷ(ㅅ, ㅆ, ㅈ, ㅊ, ㅌ), ㅂ(ㅍ, ㄼ, ㄿ,ㅄ)’ 383 | # 뒤에 연결되는 ‘ㄱ, ㄷ, ㅂ, ㅅ, ㅈ’은 된소리로 발음한다. 384 | def rule_23 385 | return if @next_kc.nil? 386 | if fortis_map.keys.include?(@next_kc.chosung) && 387 | %w[ㄱ ㄲ ㅋ ㄳ ㄺ ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅂ ㅍ ㄼ ㄿ ㅄ].include?(@kc.jongsung) 388 | @next_kc.chosung = fortis_map[@next_kc.chosung] 389 | 390 | true 391 | end 392 | end 393 | 394 | # 제24항: 어간 받침 ‘ㄴ(ㄵ), ㅁ(ㄻ)’ 뒤에 결합되는 어미의 첫소리 ‘ㄱ, ㄷ, ㅅ, ㅈ’은 된소리로 발음한다. 395 | # 다만, 피동, 사동의 접미사 ‘-기-’는 된소리로 발음하지 않는다. 396 | # 용언 어간에만 적용. 397 | def rule_24 398 | return if @next_kc.nil? || 399 | @next_kc.to_s == '기' # FIXME 피동/사동 여부 판단 불가. e.g. 줄넘기 400 | 401 | # FIXME 용언 여부를 판단. 정확한 판단 불가. 402 | return unless case @kc.jongsung 403 | when 'ㄵ' 404 | %w[앉 얹].include? @kc.to_s 405 | when 'ㄻ' 406 | %w[젊 닮].include? @kc.to_s 407 | else 408 | false # XXX 일반적인 경우 사전 없이 판단 불가 409 | end 410 | 411 | if %w[ㄱ ㄷ ㅅ ㅈ].include?(@next_kc.chosung) && 412 | %w[ㄴ ㄵ ㅁ ㄻ ㄼ ㄾ].include?(@kc.jongsung) 413 | @next_kc.chosung = fortis_map[@next_kc.chosung] 414 | 415 | true 416 | end 417 | end 418 | 419 | # 제25항: 어간 받침 ‘ㄼ, ㄾ’ 뒤에 결합되는 어미의 첫소리 ‘ㄱ, ㄷ, ㅅ, ㅈ’은 420 | # 된소리로 발음한다. 421 | def rule_25 422 | return if @next_kc.nil? 423 | 424 | if %w[ㄱ ㄷ ㅅ ㅈ].include?(@next_kc.chosung) && 425 | %w[ㄼ ㄾ].include?(@kc.jongsung) 426 | @next_kc.chosung = fortis_map[@next_kc.chosung] 427 | 428 | true 429 | end 430 | end 431 | 432 | # 제26항: 한자어에서, ‘ㄹ’ 받침 뒤에 연결되는 ‘ㄷ, ㅅ, ㅈ’은 된소리로 발음한다. 433 | def rule_26 434 | # TODO 435 | end 436 | 437 | # 제27항: __관형사형__ ‘-(으)ㄹ’ 뒤에 연결되는 ‘ㄱ, ㄷ, ㅂ, ㅅ, ㅈ’은 된소리로 발음한다. 438 | # - ‘-(으)ㄹ’로 시작되는 어미의 경우에도 이에 준한다. 439 | def rule_27 440 | # FIXME: NOT PROPERLY IMPLEMENTED 441 | return if @next_kc.nil? 442 | 443 | # 비교적 확률이 높은 경우들에 대해서만 처리. "일" 은 제외. 444 | if %w[할 갈 날 볼 을 앨 말 힐].include?(@kc.to_s) && # @kc.jongsung == 'ㄹ' && 445 | %w[ㄱ ㄷ ㅂ ㅅ ㅈ].include?(@next_kc.chosung) 446 | @next_kc.chosung = fortis_map[@next_kc.chosung] 447 | true 448 | end 449 | end 450 | 451 | # 제26항: 한자어에서, ‘ㄹ’ 받침 뒤에 연결되는 ‘ㄷ, ㅅ, ㅈ’은 된소리로 발음한다. 452 | # 제28항: 표기상으로는 사이시옷이 없더라도, 관형격 기능을 지니는 사이시옷이 453 | # 있어야 할(휴지가 성립되는) 합성어의 경우에는, 뒤 단어의 첫소리 ‘ㄱ, ㄷ, 454 | # ㅂ, ㅅ, ㅈ’을 된소리로 발음한다. 455 | def rule_26_28 456 | # TODO 457 | end 458 | 459 | # 제29항: 합성어 및 파생어에서, 앞 단어나 접두사의 끝이 자음이고 뒤 단어나 460 | # 접미사의 첫음절이 ‘이, 야, 여, 요, 유’인 경우에는, ‘ㄴ’ 음을 첨가하여 461 | # [니, 냐, 녀, 뇨, 뉴]로 발음한다. 462 | def rule_29 463 | # TODO 464 | end 465 | 466 | # 제30항: 사이시옷이 붙은 단어는 다음과 같이 발음한다. 467 | # 1. ‘ㄱ, ㄷ, ㅂ, ㅅ, ㅈ’으로 시작하는 단어 앞에 사이시옷이 올 때는 이들 468 | # 자음만을 된소리로 발음하는 것을 원칙으로 하되, 사이시옷을 [ㄷ]으로 469 | # 발음하는 것도 허용한다. 470 | # 2. 사이시옷 뒤에 ‘ㄴ, ㅁ’이 결합되는 경우에는 [ㄴ]으로 발음한다. 471 | # 3. 사이시옷 뒤에 ‘이’ 음이 결합되는 경우에는 [ㄴㄴ]으로 발음한다. 472 | def rule_30 473 | return if @next_kc.nil? || @kc.jongsung != 'ㅅ' 474 | 475 | if %w[ㄱ ㄷ ㅂ ㅅ ㅈ].include? @next_kc.chosung 476 | @kc.jongsung = 'ㄷ' # or nil 477 | @next_kc.chosung = fortis_map[@next_kc.chosung] 478 | 479 | true 480 | elsif %w[ㄴ ㅁ].include? @next_kc.chosung 481 | @kc.jongsung = 'ㄴ' 482 | 483 | true 484 | elsif @next_kc.chosung == 'ㅇ' && 485 | %w[ㅣ ㅒ ㅖ ㅑ ㅕ ㅛ ㅠ].include?(@next_kc.jungsung) && 486 | @next_kc.jongsung # PATCH 487 | @kc.jongsung = @next_kc.chosung = 'ㄴ' 488 | 489 | true 490 | end 491 | end 492 | end#Pronouncer 493 | end#Gimchi 494 | --------------------------------------------------------------------------------