├── .document ├── .gitignore ├── LICENSE ├── README.rdoc ├── Rakefile ├── VERSION ├── lib └── korean-string.rb └── test ├── helper.rb └── test_korean-string.rb /.document: -------------------------------------------------------------------------------- 1 | README.rdoc 2 | lib/**/*.rb 3 | bin/* 4 | features/**/*.feature 5 | LICENSE 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## MAC OS 2 | .DS_Store 3 | 4 | ## TEXTMATE 5 | *.tmproj 6 | tmtags 7 | 8 | ## EMACS 9 | *~ 10 | \#* 11 | .\#* 12 | 13 | ## VIM 14 | *.swp 15 | 16 | ## PROJECT::GENERAL 17 | coverage 18 | rdoc 19 | pkg 20 | 21 | ## PROJECT::SPECIFIC 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 Ben Humphreys 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = korean-string 2 | 3 | Split Korean characters to individual compontents, join components together to create characters. 4 | 5 | You could use it to make some weird conjugation rules. Go wild. 6 | 7 | == Methods 8 | 9 | === String.split_ko 10 | 11 | Return an array of arrays of Korean character components 12 | 13 | require 'korean-string' 14 | '읽어싶'.split_ko 15 | => [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]] 16 | 17 | === Array.join_ko 18 | 19 | Accepts an array of character pieces 20 | 21 | require 'korean-string' 22 | [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]].join_ko 23 | => '읽어싶' 24 | 25 | 26 | == Todo 27 | 28 | Come up with other useful methods? 29 | 30 | 31 | == Copyright 32 | 33 | Copyright (c) 2010 Ben Humphreys. See LICENSE for details. 34 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'rake' 3 | 4 | begin 5 | require 'jeweler' 6 | Jeweler::Tasks.new do |gem| 7 | gem.name = "korean-string" 8 | gem.summary = %Q{Korean string join and split} 9 | gem.description = %Q{Split Korean characters to individual compontents, join components together to create characters} 10 | gem.email = "benhumphreys@gmail.com" 11 | gem.homepage = "http://github.com/bhumphreys/korean-string" 12 | gem.authors = ["Ben Humphreys"] 13 | #gem.add_development_dependency "thoughtbot-shoulda", ">= 0" 14 | # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings 15 | end 16 | Jeweler::GemcutterTasks.new 17 | rescue LoadError 18 | puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler" 19 | end 20 | 21 | require 'rake/testtask' 22 | Rake::TestTask.new(:test) do |test| 23 | test.libs << 'lib' << 'test' 24 | test.pattern = 'test/**/test_*.rb' 25 | test.verbose = true 26 | end 27 | 28 | begin 29 | require 'rcov/rcovtask' 30 | Rcov::RcovTask.new do |test| 31 | test.libs << 'test' 32 | test.pattern = 'test/**/test_*.rb' 33 | test.verbose = true 34 | end 35 | rescue LoadError 36 | task :rcov do 37 | abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov" 38 | end 39 | end 40 | 41 | task :test => :check_dependencies 42 | 43 | task :default => :test 44 | 45 | require 'rake/rdoctask' 46 | Rake::RDocTask.new do |rdoc| 47 | version = File.exist?('VERSION') ? File.read('VERSION') : "" 48 | 49 | rdoc.rdoc_dir = 'rdoc' 50 | rdoc.title = "korean-string #{version}" 51 | rdoc.rdoc_files.include('README*') 52 | rdoc.rdoc_files.include('lib/**/*.rb') 53 | end 54 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.1.0 2 | -------------------------------------------------------------------------------- /lib/korean-string.rb: -------------------------------------------------------------------------------- 1 | # Originally transliterate-hacked from Perl from 2 | # http://blog.naver.com/PostView.nhn?blogId=mokomoji&logNo=130013133481 3 | # 4 | # For the theory of why this works, check out the W3C spec on Korean encoding 5 | # http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html 6 | # (Thanks to @ntrolls for this) 7 | 8 | $KCODE = 'UTF8' unless RUBY_VERSION > '1.9.0' 9 | 10 | # ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ 11 | CHOSUNG = [0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 12 | # ㅃ ㅅ ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ 13 | 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b, 14 | # ㅌ ㅍ ㅎ 15 | 0x314c, 0x314d, 0x314e] 16 | 17 | # ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ 18 | JWUNGSUNG = [0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 19 | # ㅗ ㅘ ㅙ ㅚ ㅛ ㅜ ㅝ ㅞ 20 | 0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 0x315c, 0x315d, 0x315e, 21 | # ㅟ ㅠ ㅡ ㅢ ㅣ 22 | 0x315f, 0x3160, 0x3161, 0x3162, 0x3163] 23 | 24 | # ㄱ ㄲ ㄳ ㄴ ㄵ ㄶ ㄷ ㄹ 25 | JONGSUNG = [ 0, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 26 | # ㄺ ㄻ ㄼ ㄽ ㄾ ㄿ ㅀ ㅁ 27 | 0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f, 0x3140, 28 | # ㅂ ㅄ ㅅ ㅆ ㅇ ㅈ ㅊ ㅋ 29 | 0x3141, 0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314a, 30 | # ㅌ ㅍ ㅎ ?whoops 31 | 0x314b, 0x314c, 0x314d, 0x314e ] 32 | 33 | 34 | # Not wrapping this in a module... not sure if that's a terrible idea 35 | 36 | class String 37 | def split_ko 38 | 39 | raw_chars = self.unpack("U*") 40 | 41 | final_result = Array.new 42 | 43 | raw_chars.each do |char| 44 | result = Array.new 45 | if (char >= 0xAC00 && char <= 0xD7A3) 46 | # Move it down in the range 47 | c = char - 0xAC00; 48 | 49 | # Here be dragons 50 | a = c.to_f / (21 * 28) 51 | c = c % (21 * 28) 52 | b = c.to_f / 28 53 | c = c % 28 54 | 55 | a = a.to_i 56 | b = b.to_i 57 | c = c.to_i 58 | 59 | result.push( CHOSUNG[a], JWUNGSUNG[b] ) 60 | 61 | if c != 0 62 | result.push( JONGSUNG[c] ) 63 | end 64 | else 65 | result.push(char) 66 | end 67 | 68 | final_result.push(result.pack("U*").split('')) 69 | end 70 | 71 | return final_result 72 | 73 | end 74 | end 75 | 76 | 77 | class Array 78 | # We've got our sploded array of korean bits 79 | # need to put them back into Real Words 80 | def join_ko 81 | # http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html 82 | # Leading, middle, following (optional) 83 | a = self[0].unpack("U*").first 84 | b = self[1].unpack("U*").first 85 | c = self[2].unpack("U*").first if self[2] 86 | 87 | offset_a = CHOSUNG.index(a) 88 | if offset_a.nil? 89 | raise 90 | end 91 | offset_b = JWUNGSUNG.index(b) 92 | if offset_b.nil? 93 | raise 94 | end 95 | 96 | offset_c = 0 97 | if c 98 | offset_c = JONGSUNG.index(c) 99 | if offset_c.nil? 100 | raise 101 | end 102 | end 103 | 104 | raw = 0xAC00 + 105 | offset_a * (21 * 28) + 106 | offset_b * 28 + 107 | offset_c 108 | 109 | [ raw ].pack("U*") 110 | end 111 | end 112 | 113 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'test/unit' 3 | 4 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 5 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 6 | require 'korean-string' 7 | 8 | class Test::Unit::TestCase 9 | end 10 | -------------------------------------------------------------------------------- /test/test_korean-string.rb: -------------------------------------------------------------------------------- 1 | require 'helper' 2 | 3 | class TestKoreanString < Test::Unit::TestCase 4 | 5 | def test_split 6 | assert_equal( 7 | [["ㅇ", "ㅏ", "ㄴ"], 8 | ["ㄴ", "ㅕ", "ㅇ"], 9 | ["ㅎ", "ㅏ"], 10 | ["ㅅ", "ㅔ"], 11 | ["ㅇ", "ㅛ"]], 12 | '안녕하세요'.split_ko 13 | ) 14 | 15 | assert_equal( 16 | [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]], 17 | '읽어싶'.split_ko 18 | ) 19 | 20 | assert_equal( 21 | [["ㄱ", "ㅙ", "ㄴ"], ["ㅊ", "ㅏ", "ㄶ"], ["ㅇ", "ㅏ"]], 22 | '괜찮아'.split_ko 23 | ) 24 | end 25 | 26 | def test_join 27 | assert_equal( 28 | "아", 29 | %w(ㅇ ㅏ).join_ko 30 | ) 31 | 32 | assert_equal( 33 | "일", 34 | %w(ㅇ ㅣ ㄹ).join_ko 35 | ) 36 | end 37 | end 38 | 39 | --------------------------------------------------------------------------------