├── .gitignore ├── CHANGES.txt ├── INSTALL.txt ├── ISSUES.txt ├── LICENSE.txt ├── README.rdoc ├── Rakefile ├── bench ├── case_mappings.rb ├── char_name.rb ├── each_grapheme.rb ├── each_word.rb ├── grep.rb ├── normalization.rb └── titlecase.rb ├── cdata ├── canonical_decomposition_map ├── case_ignorable_set ├── casefold_c_map ├── casefold_f_map ├── casefold_s_map ├── cat_set_titlecase ├── combining_class_map ├── compatibility_decomposition_map ├── composition_exclusion_set ├── cond_lc_map ├── cond_tc_map ├── cond_uc_map ├── east_asian_width_property_per_cp ├── east_asian_width_property_ranges ├── general_category_aliases ├── general_category_per_cp ├── general_category_ranges ├── grapheme_break_property ├── jamo_short_names ├── name_aliases ├── names ├── prop_set_default_ignorable ├── prop_set_lowercase ├── prop_set_uppercase ├── simple_lc_map ├── simple_tc_map ├── simple_uc_map ├── soft_dotted_set ├── special_lc_map ├── special_tc_map ├── special_uc_map ├── white_space_set └── word_break_property ├── data ├── CaseFolding.txt ├── DerivedCombiningClass.txt ├── DerivedCoreProperties.txt ├── DerivedNormalizationProps.txt ├── EastAsianWidth.txt ├── GraphemeBreakProperty.txt ├── GraphemeBreakTest.html ├── GraphemeBreakTest.txt ├── Jamo.txt ├── NameAliases.txt ├── NormalizationTest.txt ├── PropList.txt ├── PropertyValueAliases.txt ├── README.txt ├── SpecialCasing.txt ├── UnicodeData.txt ├── WordBreakProperty.txt ├── WordBreakTest.html ├── WordBreakTest.txt ├── compile.rb └── tr44-10.html ├── install.rb ├── lib ├── unicode_utils.rb └── unicode_utils │ ├── canonical_decomposition.rb │ ├── canonical_equivalents_q.rb │ ├── case_ignorable_char_q.rb │ ├── cased_char_q.rb │ ├── casefold.rb │ ├── char_display_width.rb │ ├── char_name.rb │ ├── char_type.rb │ ├── code_point_type.rb │ ├── codepoint.rb │ ├── combining_class.rb │ ├── compatibility_decomposition.rb │ ├── conditional_casing.rb │ ├── debug.rb │ ├── default_ignorable_char_q.rb │ ├── display_width.rb │ ├── downcase.rb │ ├── each_grapheme.rb │ ├── each_word.rb │ ├── east_asian_width.rb │ ├── gc.rb │ ├── general_category.rb │ ├── graphic_char_q.rb │ ├── grep.rb │ ├── hangul_syllable_decomposition.rb │ ├── jamo_short_name.rb │ ├── lowercase_char_q.rb │ ├── name_alias.rb │ ├── name_aliases.rb │ ├── nfc.rb │ ├── nfd.rb │ ├── nfkc.rb │ ├── nfkd.rb │ ├── read_cdata.rb │ ├── sid.rb │ ├── simple_casefold.rb │ ├── simple_downcase.rb │ ├── simple_upcase.rb │ ├── soft_dotted_char_q.rb │ ├── titlecase.rb │ ├── titlecase_char_q.rb │ ├── u.rb │ ├── upcase.rb │ ├── uppercase_char_q.rb │ ├── version.rb │ └── white_space_char_q.rb ├── test ├── coverage.rb ├── dreilaendereck.txt ├── dreilaendereck_cf.txt ├── dreilaendereck_lc.txt ├── dreilaendereck_uc.txt ├── suite.rb ├── test_case_mappings.rb ├── test_codepoint.rb ├── test_each_grapheme.rb ├── test_each_word.rb ├── test_grep.rb ├── test_normalization.rb ├── test_unicode_6_0_0.rb ├── test_unicode_6_1_0.rb ├── test_unicode_6_2_0.rb └── test_unicode_utils.rb └── unicode_utils.gemspec /.gitignore: -------------------------------------------------------------------------------- 1 | tmp 2 | pkg 3 | doc 4 | Session.vim 5 | .*.swp 6 | *~ 7 | .test-result 8 | /coverage 9 | /aux 10 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | == 1.4.0, 2012-09-30 2 | 3 | Updated to Unicode 6.2.0. 4 | 5 | * UnicodeUtils.debug accepts single Integer code point 6 | 7 | New methods in UnicodeUtils: 8 | 9 | * white_space_char? 10 | 11 | All tests pass with jruby-1.7.0.RC1. Not all tests pass with 12 | MRI 1.9.3p194 due to unexptected behaviour of String#<< with 13 | UTF-16 strings. As long as you use only UTF-8, there's no problem. 14 | 15 | == 1.3.0, 2012-03-07 16 | 17 | Updated to Unicode 6.1.0. 18 | 19 | New methods in UnicodeUtils: 20 | 21 | * code_point_type 22 | * name_aliases 23 | * sid (string identifier) 24 | 25 | New constants in UnicodeUtils: 26 | 27 | * UNICODE_VERSION 28 | 29 | == 1.2.2, 2011-11-27 30 | 31 | New methods in UnicodeUtils: 32 | 33 | * east_asian_width 34 | * display_width 35 | * default_ignorable_char_q 36 | * gc 37 | * graphic_char_q 38 | * general_category 39 | * char_type 40 | * char_display_width 41 | * debug 42 | 43 | == 1.1.2, 2011-11-18 44 | 45 | Updated to Unicode 6.0.0. No additions to API. 46 | 47 | == 1.0.0, 2009-01-30 48 | 49 | First release, conforms to Unicode 5.1.0. 50 | -------------------------------------------------------------------------------- /INSTALL.txt: -------------------------------------------------------------------------------- 1 | == Installing UnicodeUtils 2 | 3 | The easiest way to install UnicodeUtils is with RubyGems: 4 | 5 | $ gem install unicode_utils 6 | 7 | === Manual installation 8 | 9 | Two kinds of files must be installed: 10 | 11 | 1. The library code. All files under lib/ and 12 | lib/unicode_utils/ with suffix .rb. 13 | The whole tree under lib/ must be on the load path. 14 | 15 | 2. The compiled Unicode data files under cdata/. UnicodeUtils 16 | loads them from the UnicodeUtils::CDATA_DIR directory, 17 | which is defined in read_cdata.rb. 18 | 19 | The best strategy is to copy the library files to Ruby's +sitelibdir+. 20 | You can get that by running: 21 | 22 | $ ruby -r rbconfig -e "puts Config::CONFIG['sitelibdir']" 23 | 24 | Then copy all files under cdata/ to 25 | /unicode_utils. And last but not least, change 26 | the definition of +CDATA_DIR+ in 27 | /unicode_utils/read_cdata.rb to 28 | File.absolute_path(File.dirname(\_\_FILE\_\_)). 29 | 30 | In fact, UnicodeUtils comes with an install.rb script that does 31 | all that: 32 | 33 | $ ruby install.rb install 34 | 35 | or: 36 | 37 | $ ruby install.rb install /some/other/dir 38 | -------------------------------------------------------------------------------- /ISSUES.txt: -------------------------------------------------------------------------------- 1 | = Issues 2 | 3 | == "code point" vs. "codepoint" 4 | 5 | The Unicode standard consistently uses "code point". On the other 6 | hand, Ruby's String class has an "each_codepoint" method. 7 | 8 | Beginning with version 1.3.0, UnicodeUtils will use "code point" 9 | and the related Ruby symbol names "code_point", "CODE_POINT" and 10 | "CodePoint" throughout. 11 | 12 | The only exception is the "UnicodeUtils::Codepoint" class, which 13 | predates UnicodeUtils 1.3.0. 14 | 15 | == char_name 16 | 17 | Unfortunately deviates from the Unicode Name property. 18 | 19 | Possible course of action: 20 | * Add consistent way to access all Unicode properties 21 | E.g. UnicodeUtils::General_Category[code_point], 22 | UnicodeUtils::Name[code_point], ... 23 | * Deprecate char_name 24 | 25 | == Encoding of string property values 26 | 27 | The encoding of spring property values is currenctly undocumented. 28 | 29 | Possible course of action: 30 | * Use the same encoding for all string property values, preferably UTF-8 31 | * Document it 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2012, Stefan Lang 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials 13 | provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 25 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = Unicode Utils - Unicode algorithms for Ruby 1.9 2 | 3 | UnicodeUtils implements Unicode algorithms for case conversion, 4 | normalization, text segmentation and more in pure Ruby code. 5 | 6 | == Installation 7 | 8 | Install with RubyGems: 9 | 10 | gem install unicode_utils 11 | 12 | Or get the source from Github: http://github.com/lang/unicode_utils 13 | and follow the instructions in INSTALL.txt. 14 | 15 | UnicodeUtils works with Ruby 1.9.1 or later. 16 | 17 | == Synopsis 18 | 19 | require "unicode_utils/upcase" 20 | 21 | UnicodeUtils.upcase("weiß") => "WEISS" 22 | 23 | UnicodeUtils.upcase("i", :tr) => "İ" 24 | 25 | Start with the UnicodeUtils module in the API documentation for 26 | complete documentation. 27 | 28 | == License 29 | 30 | unicode_utils is licensed under the BSD license. Read the file 31 | LICENSE.txt in the unicode_utils package for details. 32 | 33 | == Links 34 | 35 | Online documentation:: http://unicode-utils.rubyforge.org 36 | Source code:: http://github.com/lang/unicode_utils 37 | Rubyforge project:: http://rubyforge.org/projects/unicode-utils 38 | Home of the Unicode Consortium:: http://unicode.org 39 | 40 | == Who? 41 | 42 | UnicodeUtils is written by Stefan Lang. You can contact me at 43 | langstefan AT gmx.at. Contributions welcome! 44 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "#{File.dirname(__FILE__)}/lib/unicode_utils/version" 4 | 5 | suffix = ENV["SUFFIX"] 6 | 7 | gem_filename = "unicode_utils-#{UnicodeUtils::VERSION}.gem" 8 | 9 | task "default" => "quick-test" 10 | 11 | desc "Run unit tests." 12 | task "test" do 13 | ruby "-I lib test/suite.rb" 14 | end 15 | 16 | desc "Quick test run." 17 | task "quick-test" do 18 | ruby "-I lib -I . test/test_unicode_utils.rb" 19 | end 20 | 21 | desc "Run tests and generate coverage report." 22 | task "coverage" do 23 | ruby "-I lib test/coverage.rb" 24 | end 25 | 26 | desc "Build unicode_utils gem." 27 | task "gem" do 28 | sh "gem#{suffix} build unicode_utils.gemspec" 29 | mkdir "pkg" unless File.directory? "pkg" 30 | mv gem_filename, "pkg" 31 | end 32 | 33 | desc "Run rdoc to generate html documentation." 34 | task "doc" do 35 | sh "rdoc#{suffix} -o doc --charset=UTF-8 --title=UnicodeUtils --main=README.rdoc lib README.rdoc INSTALL.txt CHANGES.txt LICENSE.txt" 36 | end 37 | 38 | desc "Publish doc/ on unicode-utils.rubyfore.org. " + 39 | "Note: scp will prompt for rubyforge password." 40 | task "publish-doc" => "doc" do 41 | sh "scp -i ~/.ssh/id_rsa_s0 -r doc/* langi@rubyforge.org:/var/www/gforge-projects/unicode-utils/" 42 | end 43 | 44 | desc "Compile Unicode data files from data/ to cdata/." 45 | task "compile-data" do 46 | ruby "data/compile.rb" 47 | end 48 | 49 | desc "Remove generated packages and documentation." 50 | task "clean" do 51 | rm_r "pkg" if File.exist? "pkg" 52 | rm_r "doc" if File.exist? "doc" 53 | end 54 | -------------------------------------------------------------------------------- /bench/case_mappings.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "benchmark" 4 | 5 | require "unicode_utils/upcase" 6 | require "unicode_utils/downcase" 7 | require "unicode_utils/casefold" 8 | 9 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test") 10 | 11 | def read_txt(filename) 12 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-") 13 | end 14 | 15 | german_text = read_txt("dreilaendereck.txt") 16 | long_german_text = german_text * 100 17 | 18 | Benchmark.bm(35) do |x| 19 | x.report "String#upcase" do 20 | 100.times { german_text.upcase } 21 | end 22 | x.report "upcase, no language" do 23 | 100.times { UnicodeUtils.upcase(german_text) } 24 | end 25 | x.report "upcase, :de" do 26 | 100.times { UnicodeUtils.upcase(german_text, :de) } 27 | end 28 | x.report "upcase, :tr" do 29 | 100.times { UnicodeUtils.upcase(german_text, :tr) } 30 | end 31 | x.report "long text: String#upcase" do 32 | 1.times { long_german_text.upcase } 33 | end 34 | x.report "long text: upcase, no language" do 35 | 1.times { UnicodeUtils.upcase(long_german_text) } 36 | end 37 | x.report "long text: upcase, :de" do 38 | 1.times { UnicodeUtils.upcase(long_german_text, :de) } 39 | end 40 | x.report "long text: upcase, :tr" do 41 | 1.times { UnicodeUtils.upcase(long_german_text, :tr) } 42 | end 43 | 44 | x.report "String#downcase" do 45 | 100.times { german_text.downcase } 46 | end 47 | x.report "downcase, no language" do 48 | 100.times { UnicodeUtils.downcase(german_text) } 49 | end 50 | x.report "downcase, :de" do 51 | 100.times { UnicodeUtils.downcase(german_text, :de) } 52 | end 53 | x.report "downcase, :tr" do 54 | 100.times { UnicodeUtils.downcase(german_text, :tr) } 55 | end 56 | x.report "long text: String#downcase" do 57 | 1.times { long_german_text.downcase } 58 | end 59 | x.report "long text: downcase, no language" do 60 | 1.times { UnicodeUtils.downcase(long_german_text) } 61 | end 62 | x.report "long text: downcase, :de" do 63 | 1.times { UnicodeUtils.downcase(long_german_text, :de) } 64 | end 65 | x.report "long text: downcase, :tr" do 66 | 1.times { UnicodeUtils.downcase(long_german_text, :tr) } 67 | end 68 | 69 | x.report "casefold" do 70 | 100.times { UnicodeUtils.casefold(german_text) } 71 | end 72 | x.report "long text: casefold" do 73 | 1.times { UnicodeUtils.casefold(long_german_text) } 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /bench/char_name.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "benchmark" 4 | 5 | require "unicode_utils/char_name" 6 | require "unicode_utils/codepoint" 7 | 8 | def all_char_names 9 | UnicodeUtils::Codepoint::RANGE.each { |code_point| 10 | UnicodeUtils.char_name(code_point) 11 | } 12 | end 13 | 14 | def cjk_char_names 15 | [0x3400..0x4DB5, 0x4E00..0x9FC3, 0x20000..0x2A6D6].each { |range| 16 | range.each { |code_point| 17 | UnicodeUtils.char_name(code_point) 18 | } 19 | } 20 | end 21 | 22 | def hangul_syllable_char_names 23 | (0xAC00..0xD7A3).each { |code_point| 24 | UnicodeUtils.char_name(code_point) 25 | } 26 | end 27 | 28 | def name_map_lookup(code_point) 29 | UnicodeUtils::NAME_MAP[code_point] 30 | end 31 | 32 | puts "UnicodeUtils.char_name benchmarks" 33 | 34 | Benchmark.bm { |x| 35 | x.report("baseline") { 36 | UnicodeUtils::Codepoint::RANGE.each { |code_point| 37 | name_map_lookup(code_point) 38 | } 39 | } 40 | x.report("all code points") { 41 | all_char_names 42 | } 43 | x.report("CJK UNIFIED IDEOGRAPH") { 44 | cjk_char_names 45 | } 46 | x.report("HANGUL SYLLABLE") { 47 | hangul_syllable_char_names 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /bench/each_grapheme.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "benchmark" 4 | 5 | require "unicode_utils/each_grapheme" 6 | 7 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test") 8 | 9 | def read_txt(filename) 10 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-") 11 | end 12 | 13 | german_text = read_txt("dreilaendereck.txt") 14 | long_german_text = german_text * 50 15 | 16 | Benchmark.bmbm do |x| 17 | x.report "each_grapheme" do 18 | 50.times { UnicodeUtils.each_grapheme(german_text) { |g| g } } 19 | end 20 | x.report "each_grapheme, long text" do 21 | 1.times { UnicodeUtils.each_grapheme(long_german_text) { |g| g } } 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /bench/each_word.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "benchmark" 4 | 5 | require "unicode_utils/each_word" 6 | 7 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test") 8 | 9 | def read_txt(filename) 10 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-") 11 | end 12 | 13 | german_text = read_txt("dreilaendereck.txt") 14 | long_german_text = german_text * 30 15 | 16 | Benchmark.bm(35) do |x| 17 | x.report "each_word" do 18 | 30.times { UnicodeUtils.each_word(german_text) { |w| w } } 19 | end 20 | x.report "each_word, long text" do 21 | 1.times { UnicodeUtils.each_word(long_german_text) { |w| w } } 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /bench/grep.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "benchmark" 4 | 5 | require "unicode_utils/grep" 6 | 7 | Benchmark.bm { |x| 8 | x.report("angstrom") { 9 | UnicodeUtils.grep(/angstrom/) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /bench/normalization.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "benchmark" 4 | 5 | require "unicode_utils/nfd" 6 | require "unicode_utils/nfkd" 7 | require "unicode_utils/nfc" 8 | require "unicode_utils/nfkc" 9 | 10 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test") 11 | 12 | def read_txt(filename) 13 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-") 14 | end 15 | 16 | german_text = read_txt("dreilaendereck.txt") 17 | long_german_text = german_text * 100 18 | 19 | Benchmark.bmbm do |x| 20 | x.report "nfd" do 21 | 100.times { UnicodeUtils.nfd(german_text) } 22 | end 23 | x.report "nfd, long text" do 24 | 1.times { UnicodeUtils.nfd(long_german_text) } 25 | end 26 | x.report "nfkd" do 27 | 100.times { UnicodeUtils.nfkd(german_text) } 28 | end 29 | x.report "nfkd, long text" do 30 | 1.times { UnicodeUtils.nfkd(long_german_text) } 31 | end 32 | x.report "nfc" do 33 | 100.times { UnicodeUtils.nfc(german_text) } 34 | end 35 | x.report "nfc, long text" do 36 | 1.times { UnicodeUtils.nfc(long_german_text) } 37 | end 38 | x.report "nfkc" do 39 | 100.times { UnicodeUtils.nfkc(german_text) } 40 | end 41 | x.report "nfkc, long text" do 42 | 1.times { UnicodeUtils.nfkc(long_german_text) } 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /bench/titlecase.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "benchmark" 4 | 5 | require "unicode_utils/titlecase" 6 | 7 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test") 8 | 9 | def read_txt(filename) 10 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-") 11 | end 12 | 13 | german_text = read_txt("dreilaendereck.txt") 14 | long_german_text = german_text * 30 15 | 16 | Benchmark.bm(35) do |x| 17 | x.report "titlecase" do 18 | 30.times { UnicodeUtils.titlecase(german_text) } 19 | end 20 | x.report "titlecase, long text" do 21 | 1.times { UnicodeUtils.titlecase(long_german_text) } 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /cdata/case_ignorable_set: -------------------------------------------------------------------------------- 1 | 00003a0000b70003870005f400202700fe1300fe5500ff1a00005e0000600000a80000ad0000af0000b40000b80002b00002b10002b20002b30002b40002b50002b60002b70002b80002b90002ba0002bb0002bc0002bd0002be0002bf0002c00002c10002c20002c30002c40002c50002c60002c70002c80002c90002ca0002cb0002cc0002cd0002ce0002cf0002d00002d10002d20002d30002d40002d50002d60002d70002d80002d90002da0002db0002dc0002dd0002de0002df0002e00002e10002e20002e30002e40002e50002e60002e70002e80002e90002ea0002eb0002ec0002ed0002ee0002ef0002f00002f10002f20002f30002f40002f50002f60002f70002f80002f90002fa0002fb0002fc0002fd0002fe0002ff00030000030100030200030300030400030500030600030700030800030900030a00030b00030c00030d00030e00030f00031000031100031200031300031400031500031600031700031800031900031a00031b00031c00031d00031e00031f00032000032100032200032300032400032500032600032700032800032900032a00032b00032c00032d00032e00032f00033000033100033200033300033400033500033600033700033800033900033a00033b00033c00033d00033e00033f00034000034100034200034300034400034500034600034700034800034900034a00034b00034c00034d00034e00034f00035000035100035200035300035400035500035600035700035800035900035a00035b00035c00035d00035e00035f00036000036100036200036300036400036500036600036700036800036900036a00036b00036c00036d00036e00036f00037400037500037a00038400038500048300048400048500048600048700048800048900055900059100059200059300059400059500059600059700059800059900059a00059b00059c00059d00059e00059f0005a00005a10005a20005a30005a40005a50005a60005a70005a80005a90005aa0005ab0005ac0005ad0005ae0005af0005b00005b10005b20005b30005b40005b50005b60005b70005b80005b90005ba0005bb0005bc0005bd0005bf0005c10005c20005c40005c50005c700060000060100060200060300060400061000061100061200061300061400061500061600061700061800061900061a00064000064b00064c00064d00064e00064f00065000065100065200065300065400065500065600065700065800065900065a00065b00065c00065d00065e00065f0006700006d60006d70006d80006d90006da0006db0006dc0006dd0006df0006e00006e10006e20006e30006e40006e50006e60006e70006e80006ea0006eb0006ec0006ed00070f00071100073000073100073200073300073400073500073600073700073800073900073a00073b00073c00073d00073e00073f00074000074100074200074300074400074500074600074700074800074900074a0007a60007a70007a80007a90007aa0007ab0007ac0007ad0007ae0007af0007b00007eb0007ec0007ed0007ee0007ef0007f00007f10007f20007f30007f40007f50007fa00081600081700081800081900081a00081b00081c00081d00081e00081f00082000082100082200082300082400082500082600082700082800082900082a00082b00082c00082d00085900085a00085b0008e40008e50008e60008e70008e80008e90008ea0008eb0008ec0008ed0008ee0008ef0008f00008f10008f20008f30008f40008f50008f60008f70008f80008f90008fa0008fb0008fc0008fd0008fe00090000090100090200093a00093c00094100094200094300094400094500094600094700094800094d0009510009520009530009540009550009560009570009620009630009710009810009bc0009c10009c20009c30009c40009cd0009e20009e3000a01000a02000a3c000a41000a42000a47000a48000a4b000a4c000a4d000a51000a70000a71000a75000a81000a82000abc000ac1000ac2000ac3000ac4000ac5000ac7000ac8000acd000ae2000ae3000b01000b3c000b3f000b41000b42000b43000b44000b4d000b56000b62000b63000b82000bc0000bcd000c3e000c3f000c40000c46000c47000c48000c4a000c4b000c4c000c4d000c55000c56000c62000c63000cbc000cbf000cc6000ccc000ccd000ce2000ce3000d41000d42000d43000d44000d4d000d62000d63000dca000dd2000dd3000dd4000dd6000e31000e34000e35000e36000e37000e38000e39000e3a000e46000e47000e48000e49000e4a000e4b000e4c000e4d000e4e000eb1000eb4000eb5000eb6000eb7000eb8000eb9000ebb000ebc000ec6000ec8000ec9000eca000ecb000ecc000ecd000f18000f19000f35000f37000f39000f71000f72000f73000f74000f75000f76000f77000f78000f79000f7a000f7b000f7c000f7d000f7e000f80000f81000f82000f83000f84000f86000f87000f8d000f8e000f8f000f90000f91000f92000f93000f94000f95000f96000f97000f99000f9a000f9b000f9c000f9d000f9e000f9f000fa0000fa1000fa2000fa3000fa4000fa5000fa6000fa7000fa8000fa9000faa000fab000fac000fad000fae000faf000fb0000fb1000fb2000fb3000fb4000fb5000fb6000fb7000fb8000fb9000fba000fbb000fbc000fc600102d00102e00102f00103000103200103300103400103500103600103700103900103a00103d00103e00105800105900105e00105f00106000107100107200107300107400108200108500108600108d00109d0010fc00135d00135e00135f0017120017130017140017320017330017340017520017530017720017730017b40017b50017b70017b80017b90017ba0017bb0017bc0017bd0017c60017c90017ca0017cb0017cc0017cd0017ce0017cf0017d00017d10017d20017d30017d70017dd00180b00180c00180d0018430018a900192000192100192200192700192800193200193900193a00193b001a17001a18001a56001a58001a59001a5a001a5b001a5c001a5d001a5e001a60001a62001a65001a66001a67001a68001a69001a6a001a6b001a6c001a73001a74001a75001a76001a77001a78001a79001a7a001a7b001a7c001a7f001aa7001b00001b01001b02001b03001b34001b36001b37001b38001b39001b3a001b3c001b42001b6b001b6c001b6d001b6e001b6f001b70001b71001b72001b73001b80001b81001ba2001ba3001ba4001ba5001ba8001ba9001bab001be6001be8001be9001bed001bef001bf0001bf1001c2c001c2d001c2e001c2f001c30001c31001c32001c33001c36001c37001c78001c79001c7a001c7b001c7c001c7d001cd0001cd1001cd2001cd4001cd5001cd6001cd7001cd8001cd9001cda001cdb001cdc001cdd001cde001cdf001ce0001ce2001ce3001ce4001ce5001ce6001ce7001ce8001ced001cf4001d2c001d2d001d2e001d2f001d30001d31001d32001d33001d34001d35001d36001d37001d38001d39001d3a001d3b001d3c001d3d001d3e001d3f001d40001d41001d42001d43001d44001d45001d46001d47001d48001d49001d4a001d4b001d4c001d4d001d4e001d4f001d50001d51001d52001d53001d54001d55001d56001d57001d58001d59001d5a001d5b001d5c001d5d001d5e001d5f001d60001d61001d62001d63001d64001d65001d66001d67001d68001d69001d6a001d78001d9b001d9c001d9d001d9e001d9f001da0001da1001da2001da3001da4001da5001da6001da7001da8001da9001daa001dab001dac001dad001dae001daf001db0001db1001db2001db3001db4001db5001db6001db7001db8001db9001dba001dbb001dbc001dbd001dbe001dbf001dc0001dc1001dc2001dc3001dc4001dc5001dc6001dc7001dc8001dc9001dca001dcb001dcc001dcd001dce001dcf001dd0001dd1001dd2001dd3001dd4001dd5001dd6001dd7001dd8001dd9001dda001ddb001ddc001ddd001dde001ddf001de0001de1001de2001de3001de4001de5001de6001dfc001dfd001dfe001dff001fbd001fbf001fc0001fc1001fcd001fce001fcf001fdd001fde001fdf001fed001fee001fef001ffd001ffe00200b00200c00200d00200e00200f00202a00202b00202c00202d00202e00206000206100206200206300206400206a00206b00206c00206d00206e00206f00207100207f00209000209100209200209300209400209500209600209700209800209900209a00209b00209c0020d00020d10020d20020d30020d40020d50020d60020d70020d80020d90020da0020db0020dc0020dd0020de0020df0020e00020e10020e20020e30020e40020e50020e60020e70020e80020e90020ea0020eb0020ec0020ed0020ee0020ef0020f0002c7c002c7d002cef002cf0002cf1002d6f002d7f002de0002de1002de2002de3002de4002de5002de6002de7002de8002de9002dea002deb002dec002ded002dee002def002df0002df1002df2002df3002df4002df5002df6002df7002df8002df9002dfa002dfb002dfc002dfd002dfe002dff002e2f00300500302a00302b00302c00302d00303100303200303300303400303500303b00309900309a00309b00309c00309d00309e0030fc0030fd0030fe00a01500a4f800a4f900a4fa00a4fb00a4fc00a4fd00a60c00a66f00a67000a67100a67200a67400a67500a67600a67700a67800a67900a67a00a67b00a67c00a67d00a67f00a69f00a6f000a6f100a70000a70100a70200a70300a70400a70500a70600a70700a70800a70900a70a00a70b00a70c00a70d00a70e00a70f00a71000a71100a71200a71300a71400a71500a71600a71700a71800a71900a71a00a71b00a71c00a71d00a71e00a71f00a72000a72100a77000a78800a78900a78a00a7f800a7f900a80200a80600a80b00a82500a82600a8c400a8e000a8e100a8e200a8e300a8e400a8e500a8e600a8e700a8e800a8e900a8ea00a8eb00a8ec00a8ed00a8ee00a8ef00a8f000a8f100a92600a92700a92800a92900a92a00a92b00a92c00a92d00a94700a94800a94900a94a00a94b00a94c00a94d00a94e00a94f00a95000a95100a98000a98100a98200a9b300a9b600a9b700a9b800a9b900a9bc00a9cf00aa2900aa2a00aa2b00aa2c00aa2d00aa2e00aa3100aa3200aa3500aa3600aa4300aa4c00aa7000aab000aab200aab300aab400aab700aab800aabe00aabf00aac100aadd00aaec00aaed00aaf300aaf400aaf600abe500abe800abed00fb1e00fbb200fbb300fbb400fbb500fbb600fbb700fbb800fbb900fbba00fbbb00fbbc00fbbd00fbbe00fbbf00fbc000fbc100fe0000fe0100fe0200fe0300fe0400fe0500fe0600fe0700fe0800fe0900fe0a00fe0b00fe0c00fe0d00fe0e00fe0f00fe2000fe2100fe2200fe2300fe2400fe2500fe2600feff00ff3e00ff4000ff7000ff9e00ff9f00ffe300fff900fffa00fffb0101fd010a01010a02010a03010a05010a06010a0c010a0d010a0e010a0f010a38010a39010a3a010a3f01100101103801103901103a01103b01103c01103d01103e01103f0110400110410110420110430110440110450110460110800110810110b30110b40110b50110b60110b90110ba0110bd01110001110101110201112701112801112901112a01112b01112d01112e01112f0111300111310111320111330111340111800111810111b60111b70111b80111b90111ba0111bb0111bc0111bd0111be0116ab0116ad0116b00116b10116b20116b30116b40116b50116b7016f8f016f90016f91016f92016f93016f94016f95016f96016f97016f98016f99016f9a016f9b016f9c016f9d016f9e016f9f01d16701d16801d16901d17301d17401d17501d17601d17701d17801d17901d17a01d17b01d17c01d17d01d17e01d17f01d18001d18101d18201d18501d18601d18701d18801d18901d18a01d18b01d1aa01d1ab01d1ac01d1ad01d24201d24301d2440e00010e00200e00210e00220e00230e00240e00250e00260e00270e00280e00290e002a0e002b0e002c0e002d0e002e0e002f0e00300e00310e00320e00330e00340e00350e00360e00370e00380e00390e003a0e003b0e003c0e003d0e003e0e003f0e00400e00410e00420e00430e00440e00450e00460e00470e00480e00490e004a0e004b0e004c0e004d0e004e0e004f0e00500e00510e00520e00530e00540e00550e00560e00570e00580e00590e005a0e005b0e005c0e005d0e005e0e005f0e00600e00610e00620e00630e00640e00650e00660e00670e00680e00690e006a0e006b0e006c0e006d0e006e0e006f0e00700e00710e00720e00730e00740e00750e00760e00770e00780e00790e007a0e007b0e007c0e007d0e007e0e007f0e01000e01010e01020e01030e01040e01050e01060e01070e01080e01090e010a0e010b0e010c0e010d0e010e0e010f0e01100e01110e01120e01130e01140e01150e01160e01170e01180e01190e011a0e011b0e011c0e011d0e011e0e011f0e01200e01210e01220e01230e01240e01250e01260e01270e01280e01290e012a0e012b0e012c0e012d0e012e0e012f0e01300e01310e01320e01330e01340e01350e01360e01370e01380e01390e013a0e013b0e013c0e013d0e013e0e013f0e01400e01410e01420e01430e01440e01450e01460e01470e01480e01490e014a0e014b0e014c0e014d0e014e0e014f0e01500e01510e01520e01530e01540e01550e01560e01570e01580e01590e015a0e015b0e015c0e015d0e015e0e015f0e01600e01610e01620e01630e01640e01650e01660e01670e01680e01690e016a0e016b0e016c0e016d0e016e0e016f0e01700e01710e01720e01730e01740e01750e01760e01770e01780e01790e017a0e017b0e017c0e017d0e017e0e017f0e01800e01810e01820e01830e01840e01850e01860e01870e01880e01890e018a0e018b0e018c0e018d0e018e0e018f0e01900e01910e01920e01930e01940e01950e01960e01970e01980e01990e019a0e019b0e019c0e019d0e019e0e019f0e01a00e01a10e01a20e01a30e01a40e01a50e01a60e01a70e01a80e01a90e01aa0e01ab0e01ac0e01ad0e01ae0e01af0e01b00e01b10e01b20e01b30e01b40e01b50e01b60e01b70e01b80e01b90e01ba0e01bb0e01bc0e01bd0e01be0e01bf0e01c00e01c10e01c20e01c30e01c40e01c50e01c60e01c70e01c80e01c90e01ca0e01cb0e01cc0e01cd0e01ce0e01cf0e01d00e01d10e01d20e01d30e01d40e01d50e01d60e01d70e01d80e01d90e01da0e01db0e01dc0e01dd0e01de0e01df0e01e00e01e10e01e20e01e30e01e40e01e50e01e60e01e70e01e80e01e90e01ea0e01eb0e01ec0e01ed0e01ee0e01ef -------------------------------------------------------------------------------- /cdata/casefold_c_map: -------------------------------------------------------------------------------- 1 | 00004100006100004200006200004300006300004400006400004500006500004600006600004700006700004800006800004900006900004a00006a00004b00006b00004c00006c00004d00006d00004e00006e00004f00006f00005000007000005100007100005200007200005300007300005400007400005500007500005600007600005700007700005800007800005900007900005a00007a0000b50003bc0000c00000e00000c10000e10000c20000e20000c30000e30000c40000e40000c50000e50000c60000e60000c70000e70000c80000e80000c90000e90000ca0000ea0000cb0000eb0000cc0000ec0000cd0000ed0000ce0000ee0000cf0000ef0000d00000f00000d10000f10000d20000f20000d30000f30000d40000f40000d50000f50000d60000f60000d80000f80000d90000f90000da0000fa0000db0000fb0000dc0000fc0000dd0000fd0000de0000fe00010000010100010200010300010400010500010600010700010800010900010a00010b00010c00010d00010e00010f00011000011100011200011300011400011500011600011700011800011900011a00011b00011c00011d00011e00011f00012000012100012200012300012400012500012600012700012800012900012a00012b00012c00012d00012e00012f00013200013300013400013500013600013700013900013a00013b00013c00013d00013e00013f00014000014100014200014300014400014500014600014700014800014a00014b00014c00014d00014e00014f00015000015100015200015300015400015500015600015700015800015900015a00015b00015c00015d00015e00015f00016000016100016200016300016400016500016600016700016800016900016a00016b00016c00016d00016e00016f0001700001710001720001730001740001750001760001770001780000ff00017900017a00017b00017c00017d00017e00017f00007300018100025300018200018300018400018500018600025400018700018800018900025600018a00025700018b00018c00018e0001dd00018f00025900019000025b00019100019200019300026000019400026300019600026900019700026800019800019900019c00026f00019d00027200019f0002750001a00001a10001a20001a30001a40001a50001a60002800001a70001a80001a90002830001ac0001ad0001ae0002880001af0001b00001b100028a0001b200028b0001b30001b40001b50001b60001b70002920001b80001b90001bc0001bd0001c40001c60001c50001c60001c70001c90001c80001c90001ca0001cc0001cb0001cc0001cd0001ce0001cf0001d00001d10001d20001d30001d40001d50001d60001d70001d80001d90001da0001db0001dc0001de0001df0001e00001e10001e20001e30001e40001e50001e60001e70001e80001e90001ea0001eb0001ec0001ed0001ee0001ef0001f10001f30001f20001f30001f40001f50001f60001950001f70001bf0001f80001f90001fa0001fb0001fc0001fd0001fe0001ff00020000020100020200020300020400020500020600020700020800020900020a00020b00020c00020d00020e00020f00021000021100021200021300021400021500021600021700021800021900021a00021b00021c00021d00021e00021f00022000019e00022200022300022400022500022600022700022800022900022a00022b00022c00022d00022e00022f00023000023100023200023300023a002c6500023b00023c00023d00019a00023e002c6600024100024200024300018000024400028900024500028c00024600024700024800024900024a00024b00024c00024d00024e00024f0003450003b90003700003710003720003730003760003770003860003ac0003880003ad0003890003ae00038a0003af00038c0003cc00038e0003cd00038f0003ce0003910003b10003920003b20003930003b30003940003b40003950003b50003960003b60003970003b70003980003b80003990003b900039a0003ba00039b0003bb00039c0003bc00039d0003bd00039e0003be00039f0003bf0003a00003c00003a10003c10003a30003c30003a40003c40003a50003c50003a60003c60003a70003c70003a80003c80003a90003c90003aa0003ca0003ab0003cb0003c20003c30003cf0003d70003d00003b20003d10003b80003d50003c60003d60003c00003d80003d90003da0003db0003dc0003dd0003de0003df0003e00003e10003e20003e30003e40003e50003e60003e70003e80003e90003ea0003eb0003ec0003ed0003ee0003ef0003f00003ba0003f10003c10003f40003b80003f50003b50003f70003f80003f90003f20003fa0003fb0003fd00037b0003fe00037c0003ff00037d00040000045000040100045100040200045200040300045300040400045400040500045500040600045600040700045700040800045800040900045900040a00045a00040b00045b00040c00045c00040d00045d00040e00045e00040f00045f00041000043000041100043100041200043200041300043300041400043400041500043500041600043600041700043700041800043800041900043900041a00043a00041b00043b00041c00043c00041d00043d00041e00043e00041f00043f00042000044000042100044100042200044200042300044300042400044400042500044500042600044600042700044700042800044800042900044900042a00044a00042b00044b00042c00044c00042d00044d00042e00044e00042f00044f00046000046100046200046300046400046500046600046700046800046900046a00046b00046c00046d00046e00046f00047000047100047200047300047400047500047600047700047800047900047a00047b00047c00047d00047e00047f00048000048100048a00048b00048c00048d00048e00048f00049000049100049200049300049400049500049600049700049800049900049a00049b00049c00049d00049e00049f0004a00004a10004a20004a30004a40004a50004a60004a70004a80004a90004aa0004ab0004ac0004ad0004ae0004af0004b00004b10004b20004b30004b40004b50004b60004b70004b80004b90004ba0004bb0004bc0004bd0004be0004bf0004c00004cf0004c10004c20004c30004c40004c50004c60004c70004c80004c90004ca0004cb0004cc0004cd0004ce0004d00004d10004d20004d30004d40004d50004d60004d70004d80004d90004da0004db0004dc0004dd0004de0004df0004e00004e10004e20004e30004e40004e50004e60004e70004e80004e90004ea0004eb0004ec0004ed0004ee0004ef0004f00004f10004f20004f30004f40004f50004f60004f70004f80004f90004fa0004fb0004fc0004fd0004fe0004ff00050000050100050200050300050400050500050600050700050800050900050a00050b00050c00050d00050e00050f00051000051100051200051300051400051500051600051700051800051900051a00051b00051c00051d00051e00051f00052000052100052200052300052400052500052600052700053100056100053200056200053300056300053400056400053500056500053600056600053700056700053800056800053900056900053a00056a00053b00056b00053c00056c00053d00056d00053e00056e00053f00056f00054000057000054100057100054200057200054300057300054400057400054500057500054600057600054700057700054800057800054900057900054a00057a00054b00057b00054c00057c00054d00057d00054e00057e00054f00057f0005500005800005510005810005520005820005530005830005540005840005550005850005560005860010a0002d000010a1002d010010a2002d020010a3002d030010a4002d040010a5002d050010a6002d060010a7002d070010a8002d080010a9002d090010aa002d0a0010ab002d0b0010ac002d0c0010ad002d0d0010ae002d0e0010af002d0f0010b0002d100010b1002d110010b2002d120010b3002d130010b4002d140010b5002d150010b6002d160010b7002d170010b8002d180010b9002d190010ba002d1a0010bb002d1b0010bc002d1c0010bd002d1d0010be002d1e0010bf002d1f0010c0002d200010c1002d210010c2002d220010c3002d230010c4002d240010c5002d250010c7002d270010cd002d2d001e00001e01001e02001e03001e04001e05001e06001e07001e08001e09001e0a001e0b001e0c001e0d001e0e001e0f001e10001e11001e12001e13001e14001e15001e16001e17001e18001e19001e1a001e1b001e1c001e1d001e1e001e1f001e20001e21001e22001e23001e24001e25001e26001e27001e28001e29001e2a001e2b001e2c001e2d001e2e001e2f001e30001e31001e32001e33001e34001e35001e36001e37001e38001e39001e3a001e3b001e3c001e3d001e3e001e3f001e40001e41001e42001e43001e44001e45001e46001e47001e48001e49001e4a001e4b001e4c001e4d001e4e001e4f001e50001e51001e52001e53001e54001e55001e56001e57001e58001e59001e5a001e5b001e5c001e5d001e5e001e5f001e60001e61001e62001e63001e64001e65001e66001e67001e68001e69001e6a001e6b001e6c001e6d001e6e001e6f001e70001e71001e72001e73001e74001e75001e76001e77001e78001e79001e7a001e7b001e7c001e7d001e7e001e7f001e80001e81001e82001e83001e84001e85001e86001e87001e88001e89001e8a001e8b001e8c001e8d001e8e001e8f001e90001e91001e92001e93001e94001e95001e9b001e61001ea0001ea1001ea2001ea3001ea4001ea5001ea6001ea7001ea8001ea9001eaa001eab001eac001ead001eae001eaf001eb0001eb1001eb2001eb3001eb4001eb5001eb6001eb7001eb8001eb9001eba001ebb001ebc001ebd001ebe001ebf001ec0001ec1001ec2001ec3001ec4001ec5001ec6001ec7001ec8001ec9001eca001ecb001ecc001ecd001ece001ecf001ed0001ed1001ed2001ed3001ed4001ed5001ed6001ed7001ed8001ed9001eda001edb001edc001edd001ede001edf001ee0001ee1001ee2001ee3001ee4001ee5001ee6001ee7001ee8001ee9001eea001eeb001eec001eed001eee001eef001ef0001ef1001ef2001ef3001ef4001ef5001ef6001ef7001ef8001ef9001efa001efb001efc001efd001efe001eff001f08001f00001f09001f01001f0a001f02001f0b001f03001f0c001f04001f0d001f05001f0e001f06001f0f001f07001f18001f10001f19001f11001f1a001f12001f1b001f13001f1c001f14001f1d001f15001f28001f20001f29001f21001f2a001f22001f2b001f23001f2c001f24001f2d001f25001f2e001f26001f2f001f27001f38001f30001f39001f31001f3a001f32001f3b001f33001f3c001f34001f3d001f35001f3e001f36001f3f001f37001f48001f40001f49001f41001f4a001f42001f4b001f43001f4c001f44001f4d001f45001f59001f51001f5b001f53001f5d001f55001f5f001f57001f68001f60001f69001f61001f6a001f62001f6b001f63001f6c001f64001f6d001f65001f6e001f66001f6f001f67001fb8001fb0001fb9001fb1001fba001f70001fbb001f71001fbe0003b9001fc8001f72001fc9001f73001fca001f74001fcb001f75001fd8001fd0001fd9001fd1001fda001f76001fdb001f77001fe8001fe0001fe9001fe1001fea001f7a001feb001f7b001fec001fe5001ff8001f78001ff9001f79001ffa001f7c001ffb001f7d0021260003c900212a00006b00212b0000e500213200214e00216000217000216100217100216200217200216300217300216400217400216500217500216600217600216700217700216800217800216900217900216a00217a00216b00217b00216c00217c00216d00217d00216e00217e00216f00217f0021830021840024b60024d00024b70024d10024b80024d20024b90024d30024ba0024d40024bb0024d50024bc0024d60024bd0024d70024be0024d80024bf0024d90024c00024da0024c10024db0024c20024dc0024c30024dd0024c40024de0024c50024df0024c60024e00024c70024e10024c80024e20024c90024e30024ca0024e40024cb0024e50024cc0024e60024cd0024e70024ce0024e80024cf0024e9002c00002c30002c01002c31002c02002c32002c03002c33002c04002c34002c05002c35002c06002c36002c07002c37002c08002c38002c09002c39002c0a002c3a002c0b002c3b002c0c002c3c002c0d002c3d002c0e002c3e002c0f002c3f002c10002c40002c11002c41002c12002c42002c13002c43002c14002c44002c15002c45002c16002c46002c17002c47002c18002c48002c19002c49002c1a002c4a002c1b002c4b002c1c002c4c002c1d002c4d002c1e002c4e002c1f002c4f002c20002c50002c21002c51002c22002c52002c23002c53002c24002c54002c25002c55002c26002c56002c27002c57002c28002c58002c29002c59002c2a002c5a002c2b002c5b002c2c002c5c002c2d002c5d002c2e002c5e002c60002c61002c6200026b002c63001d7d002c6400027d002c67002c68002c69002c6a002c6b002c6c002c6d000251002c6e000271002c6f000250002c70000252002c72002c73002c75002c76002c7e00023f002c7f000240002c80002c81002c82002c83002c84002c85002c86002c87002c88002c89002c8a002c8b002c8c002c8d002c8e002c8f002c90002c91002c92002c93002c94002c95002c96002c97002c98002c99002c9a002c9b002c9c002c9d002c9e002c9f002ca0002ca1002ca2002ca3002ca4002ca5002ca6002ca7002ca8002ca9002caa002cab002cac002cad002cae002caf002cb0002cb1002cb2002cb3002cb4002cb5002cb6002cb7002cb8002cb9002cba002cbb002cbc002cbd002cbe002cbf002cc0002cc1002cc2002cc3002cc4002cc5002cc6002cc7002cc8002cc9002cca002ccb002ccc002ccd002cce002ccf002cd0002cd1002cd2002cd3002cd4002cd5002cd6002cd7002cd8002cd9002cda002cdb002cdc002cdd002cde002cdf002ce0002ce1002ce2002ce3002ceb002cec002ced002cee002cf2002cf300a64000a64100a64200a64300a64400a64500a64600a64700a64800a64900a64a00a64b00a64c00a64d00a64e00a64f00a65000a65100a65200a65300a65400a65500a65600a65700a65800a65900a65a00a65b00a65c00a65d00a65e00a65f00a66000a66100a66200a66300a66400a66500a66600a66700a66800a66900a66a00a66b00a66c00a66d00a68000a68100a68200a68300a68400a68500a68600a68700a68800a68900a68a00a68b00a68c00a68d00a68e00a68f00a69000a69100a69200a69300a69400a69500a69600a69700a72200a72300a72400a72500a72600a72700a72800a72900a72a00a72b00a72c00a72d00a72e00a72f00a73200a73300a73400a73500a73600a73700a73800a73900a73a00a73b00a73c00a73d00a73e00a73f00a74000a74100a74200a74300a74400a74500a74600a74700a74800a74900a74a00a74b00a74c00a74d00a74e00a74f00a75000a75100a75200a75300a75400a75500a75600a75700a75800a75900a75a00a75b00a75c00a75d00a75e00a75f00a76000a76100a76200a76300a76400a76500a76600a76700a76800a76900a76a00a76b00a76c00a76d00a76e00a76f00a77900a77a00a77b00a77c00a77d001d7900a77e00a77f00a78000a78100a78200a78300a78400a78500a78600a78700a78b00a78c00a78d00026500a79000a79100a79200a79300a7a000a7a100a7a200a7a300a7a400a7a500a7a600a7a700a7a800a7a900a7aa00026600ff2100ff4100ff2200ff4200ff2300ff4300ff2400ff4400ff2500ff4500ff2600ff4600ff2700ff4700ff2800ff4800ff2900ff4900ff2a00ff4a00ff2b00ff4b00ff2c00ff4c00ff2d00ff4d00ff2e00ff4e00ff2f00ff4f00ff3000ff5000ff3100ff5100ff3200ff5200ff3300ff5300ff3400ff5400ff3500ff5500ff3600ff5600ff3700ff5700ff3800ff5800ff3900ff5900ff3a00ff5a01040001042801040101042901040201042a01040301042b01040401042c01040501042d01040601042e01040701042f01040801043001040901043101040a01043201040b01043301040c01043401040d01043501040e01043601040f01043701041001043801041101043901041201043a01041301043b01041401043c01041501043d01041601043e01041701043f01041801044001041901044101041a01044201041b01044301041c01044401041d01044501041e01044601041f01044701042001044801042101044901042201044a01042301044b01042401044c01042501044d01042601044e01042701044f -------------------------------------------------------------------------------- /cdata/casefold_f_map: -------------------------------------------------------------------------------- 1 | 0000df000073000073xxxxxx000130000069000307xxxxxx0001490002bc00006exxxxxx0001f000006a00030cxxxxxx0003900003b9000308000301xxxxxx0003b00003c5000308000301xxxxxx000587000565000582xxxxxx001e96000068000331xxxxxx001e97000074000308xxxxxx001e9800007700030axxxxxx001e9900007900030axxxxxx001e9a0000610002bexxxxxx001e9e000073000073xxxxxx001f500003c5000313xxxxxx001f520003c5000313000300xxxxxx001f540003c5000313000301xxxxxx001f560003c5000313000342xxxxxx001f80001f000003b9xxxxxx001f81001f010003b9xxxxxx001f82001f020003b9xxxxxx001f83001f030003b9xxxxxx001f84001f040003b9xxxxxx001f85001f050003b9xxxxxx001f86001f060003b9xxxxxx001f87001f070003b9xxxxxx001f88001f000003b9xxxxxx001f89001f010003b9xxxxxx001f8a001f020003b9xxxxxx001f8b001f030003b9xxxxxx001f8c001f040003b9xxxxxx001f8d001f050003b9xxxxxx001f8e001f060003b9xxxxxx001f8f001f070003b9xxxxxx001f90001f200003b9xxxxxx001f91001f210003b9xxxxxx001f92001f220003b9xxxxxx001f93001f230003b9xxxxxx001f94001f240003b9xxxxxx001f95001f250003b9xxxxxx001f96001f260003b9xxxxxx001f97001f270003b9xxxxxx001f98001f200003b9xxxxxx001f99001f210003b9xxxxxx001f9a001f220003b9xxxxxx001f9b001f230003b9xxxxxx001f9c001f240003b9xxxxxx001f9d001f250003b9xxxxxx001f9e001f260003b9xxxxxx001f9f001f270003b9xxxxxx001fa0001f600003b9xxxxxx001fa1001f610003b9xxxxxx001fa2001f620003b9xxxxxx001fa3001f630003b9xxxxxx001fa4001f640003b9xxxxxx001fa5001f650003b9xxxxxx001fa6001f660003b9xxxxxx001fa7001f670003b9xxxxxx001fa8001f600003b9xxxxxx001fa9001f610003b9xxxxxx001faa001f620003b9xxxxxx001fab001f630003b9xxxxxx001fac001f640003b9xxxxxx001fad001f650003b9xxxxxx001fae001f660003b9xxxxxx001faf001f670003b9xxxxxx001fb2001f700003b9xxxxxx001fb30003b10003b9xxxxxx001fb40003ac0003b9xxxxxx001fb60003b1000342xxxxxx001fb70003b10003420003b9xxxxxx001fbc0003b10003b9xxxxxx001fc2001f740003b9xxxxxx001fc30003b70003b9xxxxxx001fc40003ae0003b9xxxxxx001fc60003b7000342xxxxxx001fc70003b70003420003b9xxxxxx001fcc0003b70003b9xxxxxx001fd20003b9000308000300xxxxxx001fd30003b9000308000301xxxxxx001fd60003b9000342xxxxxx001fd70003b9000308000342xxxxxx001fe20003c5000308000300xxxxxx001fe30003c5000308000301xxxxxx001fe40003c1000313xxxxxx001fe60003c5000342xxxxxx001fe70003c5000308000342xxxxxx001ff2001f7c0003b9xxxxxx001ff30003c90003b9xxxxxx001ff40003ce0003b9xxxxxx001ff60003c9000342xxxxxx001ff70003c90003420003b9xxxxxx001ffc0003c90003b9xxxxxx00fb00000066000066xxxxxx00fb01000066000069xxxxxx00fb0200006600006cxxxxxx00fb03000066000066000069xxxxxx00fb0400006600006600006cxxxxxx00fb05000073000074xxxxxx00fb06000073000074xxxxxx00fb13000574000576xxxxxx00fb14000574000565xxxxxx00fb1500057400056bxxxxxx00fb1600057e000576xxxxxx00fb1700057400056dxxxxxx -------------------------------------------------------------------------------- /cdata/casefold_s_map: -------------------------------------------------------------------------------- 1 | 001e9e0000df001f88001f80001f89001f81001f8a001f82001f8b001f83001f8c001f84001f8d001f85001f8e001f86001f8f001f87001f98001f90001f99001f91001f9a001f92001f9b001f93001f9c001f94001f9d001f95001f9e001f96001f9f001f97001fa8001fa0001fa9001fa1001faa001fa2001fab001fa3001fac001fa4001fad001fa5001fae001fa6001faf001fa7001fbc001fb3001fcc001fc3001ffc001ff3 -------------------------------------------------------------------------------- /cdata/cat_set_titlecase: -------------------------------------------------------------------------------- 1 | 0001c50001c80001cb0001f2001f88001f89001f8a001f8b001f8c001f8d001f8e001f8f001f98001f99001f9a001f9b001f9c001f9d001f9e001f9f001fa8001fa9001faa001fab001fac001fad001fae001faf001fbc001fcc001ffc -------------------------------------------------------------------------------- /cdata/combining_class_map: -------------------------------------------------------------------------------- 1 | 0003340100033501000336010003370100033801001cd401001ce201001ce301001ce401001ce501001ce601001ce701001ce8010020d2010020d3010020d8010020d9010020da010020e5010020e6010020ea010020eb01010a390101d1670101d1680101d1690100093c070009bc07000a3c07000abc07000b3c07000cbc0700103707001b3407001be607001c370700a9b3070110ba070116b7070030990800309a0800094d090009cd09000a4d09000acd09000b4d09000bcd09000c4d09000ccd09000d4d09000dca09000e3a09000f84090010390900103a0900171409001734090017d209001a6009001b4409001baa09001bab09001bf209001bf309002d7f0900a8060900a8c40900a9530900a9c00900aaf60900abed09010a3f09011046090110b90901113309011134090111c0090116b6090005b00a0005b10b0005b20c0005b30d0005b40e0005b50f0005b6100005b7110005b8120005c7120005b9130005ba130005bb140005bc150005bd160005bf170005c1180005c21900fb1e1a00064b1b0008f01b00064c1c0008f11c00064d1d0008f21d0006181e00064e1e0006191f00064f1f00061a200006502000065121000652220006702300071124000c5554000c565b000e3867000e3967000e486b000e496b000e4a6b000e4b6b000eb876000eb976000ec87a000ec97a000eca7a000ecb7a000f7181000f7282000f7a82000f7b82000f7c82000f7d82000f8082000f7484000321ca000322ca000327ca000328ca001dd0ca001dced600031bd8000f39d801d165d801d166d801d16ed801d16fd801d170d801d171d801d172d800302ada000316dc000317dc000318dc000319dc00031cdc00031ddc00031edc00031fdc000320dc000323dc000324dc000325dc000326dc000329dc00032adc00032bdc00032cdc00032ddc00032edc00032fdc000330dc000331dc000332dc000333dc000339dc00033adc00033bdc00033cdc000347dc000348dc000349dc00034ddc00034edc000353dc000354dc000355dc000356dc000359dc00035adc000591dc000596dc00059bdc0005a2dc0005a3dc0005a4dc0005a5dc0005a6dc0005a7dc0005aadc0005c5dc000655dc000656dc00065cdc00065fdc0006e3dc0006eadc0006eddc000731dc000734dc000737dc000738dc000739dc00073bdc00073cdc00073edc000742dc000744dc000746dc000748dc0007f2dc000859dc00085adc00085bdc0008e6dc0008e9dc0008eddc0008eedc0008efdc0008f6dc0008f9dc0008fadc000952dc000f18dc000f19dc000f35dc000f37dc000fc6dc00108ddc00193bdc001a18dc001a7fdc001b6cdc001cd5dc001cd6dc001cd7dc001cd8dc001cd9dc001cdcdc001cdddc001cdedc001cdfdc001ceddc001dc2dc001dcadc001dcfdc001dfddc001dffdc0020e8dc0020ecdc0020eddc0020eedc0020efdc00a92bdc00a92cdc00a92ddc00aab4dc0101fddc010a0ddc010a3adc01d17bdc01d17cdc01d17ddc01d17edc01d17fdc01d180dc01d181dc01d182dc01d18adc01d18bdc00059ade0005adde001939de00302dde00302ee000302fe001d16de20005aee40018a9e400302be4000300e6000301e6000302e6000303e6000304e6000305e6000306e6000307e6000308e6000309e600030ae600030be600030ce600030de600030ee600030fe6000310e6000311e6000312e6000313e6000314e600033de600033ee600033fe6000340e6000341e6000342e6000343e6000344e6000346e600034ae600034be600034ce6000350e6000351e6000352e6000357e600035be6000363e6000364e6000365e6000366e6000367e6000368e6000369e600036ae600036be600036ce600036de600036ee600036fe6000483e6000484e6000485e6000486e6000487e6000592e6000593e6000594e6000595e6000597e6000598e6000599e600059ce600059de600059ee600059fe60005a0e60005a1e60005a8e60005a9e60005abe60005ace60005afe60005c4e6000610e6000611e6000612e6000613e6000614e6000615e6000616e6000617e6000653e6000654e6000657e6000658e6000659e600065ae600065be600065de600065ee60006d6e60006d7e60006d8e60006d9e60006dae60006dbe60006dce60006dfe60006e0e60006e1e60006e2e60006e4e60006e7e60006e8e60006ebe60006ece6000730e6000732e6000733e6000735e6000736e600073ae600073de600073fe6000740e6000741e6000743e6000745e6000747e6000749e600074ae60007ebe60007ece60007ede60007eee60007efe60007f0e60007f1e60007f3e6000816e6000817e6000818e6000819e600081be600081ce600081de600081ee600081fe6000820e6000821e6000822e6000823e6000825e6000826e6000827e6000829e600082ae600082be600082ce600082de60008e4e60008e5e60008e7e60008e8e60008eae60008ebe60008ece60008f3e60008f4e60008f5e60008f7e60008f8e60008fbe60008fce60008fde60008fee6000951e6000953e6000954e6000f82e6000f83e6000f86e6000f87e600135de600135ee600135fe60017dde600193ae6001a17e6001a75e6001a76e6001a77e6001a78e6001a79e6001a7ae6001a7be6001a7ce6001b6be6001b6de6001b6ee6001b6fe6001b70e6001b71e6001b72e6001b73e6001cd0e6001cd1e6001cd2e6001cdae6001cdbe6001ce0e6001cf4e6001dc0e6001dc1e6001dc3e6001dc4e6001dc5e6001dc6e6001dc7e6001dc8e6001dc9e6001dcbe6001dcce6001dd1e6001dd2e6001dd3e6001dd4e6001dd5e6001dd6e6001dd7e6001dd8e6001dd9e6001ddae6001ddbe6001ddce6001ddde6001ddee6001ddfe6001de0e6001de1e6001de2e6001de3e6001de4e6001de5e6001de6e6001dfee60020d0e60020d1e60020d4e60020d5e60020d6e60020d7e60020dbe60020dce60020e1e60020e7e60020e9e60020f0e6002cefe6002cf0e6002cf1e6002de0e6002de1e6002de2e6002de3e6002de4e6002de5e6002de6e6002de7e6002de8e6002de9e6002deae6002debe6002dece6002dede6002deee6002defe6002df0e6002df1e6002df2e6002df3e6002df4e6002df5e6002df6e6002df7e6002df8e6002df9e6002dfae6002dfbe6002dfce6002dfde6002dfee6002dffe600a66fe600a674e600a675e600a676e600a677e600a678e600a679e600a67ae600a67be600a67ce600a67de600a69fe600a6f0e600a6f1e600a8e0e600a8e1e600a8e2e600a8e3e600a8e4e600a8e5e600a8e6e600a8e7e600a8e8e600a8e9e600a8eae600a8ebe600a8ece600a8ede600a8eee600a8efe600a8f0e600a8f1e600aab0e600aab2e600aab3e600aab7e600aab8e600aabee600aabfe600aac1e600fe20e600fe21e600fe22e600fe23e600fe24e600fe25e600fe26e6010a0fe6010a38e6011100e6011101e6011102e601d185e601d186e601d187e601d188e601d189e601d1aae601d1abe601d1ace601d1ade601d242e601d243e601d244e6000315e800031ae8000358e800302ce800035ce900035fe9000362e9001dfce900035dea00035eea000360ea000361ea001dcdea000345f0 -------------------------------------------------------------------------------- /cdata/composition_exclusion_set: -------------------------------------------------------------------------------- 1 | 00034000034100034300034400037400037e00038700095800095900095a00095b00095c00095d00095e00095f0009dc0009dd0009df000a33000a36000a59000a5a000a5b000a5e000b5c000b5d000f43000f4d000f52000f57000f5c000f69000f73000f75000f76000f78000f81000f93000f9d000fa2000fa7000fac000fb9001f71001f73001f75001f77001f79001f7b001f7d001fbb001fbe001fc9001fcb001fd3001fdb001fe3001feb001fee001fef001ff9001ffb001ffd00200000200100212600212a00212b00232900232a002adc00f90000f90100f90200f90300f90400f90500f90600f90700f90800f90900f90a00f90b00f90c00f90d00f90e00f90f00f91000f91100f91200f91300f91400f91500f91600f91700f91800f91900f91a00f91b00f91c00f91d00f91e00f91f00f92000f92100f92200f92300f92400f92500f92600f92700f92800f92900f92a00f92b00f92c00f92d00f92e00f92f00f93000f93100f93200f93300f93400f93500f93600f93700f93800f93900f93a00f93b00f93c00f93d00f93e00f93f00f94000f94100f94200f94300f94400f94500f94600f94700f94800f94900f94a00f94b00f94c00f94d00f94e00f94f00f95000f95100f95200f95300f95400f95500f95600f95700f95800f95900f95a00f95b00f95c00f95d00f95e00f95f00f96000f96100f96200f96300f96400f96500f96600f96700f96800f96900f96a00f96b00f96c00f96d00f96e00f96f00f97000f97100f97200f97300f97400f97500f97600f97700f97800f97900f97a00f97b00f97c00f97d00f97e00f97f00f98000f98100f98200f98300f98400f98500f98600f98700f98800f98900f98a00f98b00f98c00f98d00f98e00f98f00f99000f99100f99200f99300f99400f99500f99600f99700f99800f99900f99a00f99b00f99c00f99d00f99e00f99f00f9a000f9a100f9a200f9a300f9a400f9a500f9a600f9a700f9a800f9a900f9aa00f9ab00f9ac00f9ad00f9ae00f9af00f9b000f9b100f9b200f9b300f9b400f9b500f9b600f9b700f9b800f9b900f9ba00f9bb00f9bc00f9bd00f9be00f9bf00f9c000f9c100f9c200f9c300f9c400f9c500f9c600f9c700f9c800f9c900f9ca00f9cb00f9cc00f9cd00f9ce00f9cf00f9d000f9d100f9d200f9d300f9d400f9d500f9d600f9d700f9d800f9d900f9da00f9db00f9dc00f9dd00f9de00f9df00f9e000f9e100f9e200f9e300f9e400f9e500f9e600f9e700f9e800f9e900f9ea00f9eb00f9ec00f9ed00f9ee00f9ef00f9f000f9f100f9f200f9f300f9f400f9f500f9f600f9f700f9f800f9f900f9fa00f9fb00f9fc00f9fd00f9fe00f9ff00fa0000fa0100fa0200fa0300fa0400fa0500fa0600fa0700fa0800fa0900fa0a00fa0b00fa0c00fa0d00fa1000fa1200fa1500fa1600fa1700fa1800fa1900fa1a00fa1b00fa1c00fa1d00fa1e00fa2000fa2200fa2500fa2600fa2a00fa2b00fa2c00fa2d00fa2e00fa2f00fa3000fa3100fa3200fa3300fa3400fa3500fa3600fa3700fa3800fa3900fa3a00fa3b00fa3c00fa3d00fa3e00fa3f00fa4000fa4100fa4200fa4300fa4400fa4500fa4600fa4700fa4800fa4900fa4a00fa4b00fa4c00fa4d00fa4e00fa4f00fa5000fa5100fa5200fa5300fa5400fa5500fa5600fa5700fa5800fa5900fa5a00fa5b00fa5c00fa5d00fa5e00fa5f00fa6000fa6100fa6200fa6300fa6400fa6500fa6600fa6700fa6800fa6900fa6a00fa6b00fa6c00fa6d00fa7000fa7100fa7200fa7300fa7400fa7500fa7600fa7700fa7800fa7900fa7a00fa7b00fa7c00fa7d00fa7e00fa7f00fa8000fa8100fa8200fa8300fa8400fa8500fa8600fa8700fa8800fa8900fa8a00fa8b00fa8c00fa8d00fa8e00fa8f00fa9000fa9100fa9200fa9300fa9400fa9500fa9600fa9700fa9800fa9900fa9a00fa9b00fa9c00fa9d00fa9e00fa9f00faa000faa100faa200faa300faa400faa500faa600faa700faa800faa900faaa00faab00faac00faad00faae00faaf00fab000fab100fab200fab300fab400fab500fab600fab700fab800fab900faba00fabb00fabc00fabd00fabe00fabf00fac000fac100fac200fac300fac400fac500fac600fac700fac800fac900faca00facb00facc00facd00face00facf00fad000fad100fad200fad300fad400fad500fad600fad700fad800fad900fb1d00fb1f00fb2a00fb2b00fb2c00fb2d00fb2e00fb2f00fb3000fb3100fb3200fb3300fb3400fb3500fb3600fb3800fb3900fb3a00fb3b00fb3c00fb3e00fb4000fb4100fb4300fb4400fb4600fb4700fb4800fb4900fb4a00fb4b00fb4c00fb4d00fb4e01d15e01d15f01d16001d16101d16201d16301d16401d1bb01d1bc01d1bd01d1be01d1bf01d1c002f80002f80102f80202f80302f80402f80502f80602f80702f80802f80902f80a02f80b02f80c02f80d02f80e02f80f02f81002f81102f81202f81302f81402f81502f81602f81702f81802f81902f81a02f81b02f81c02f81d02f81e02f81f02f82002f82102f82202f82302f82402f82502f82602f82702f82802f82902f82a02f82b02f82c02f82d02f82e02f82f02f83002f83102f83202f83302f83402f83502f83602f83702f83802f83902f83a02f83b02f83c02f83d02f83e02f83f02f84002f84102f84202f84302f84402f84502f84602f84702f84802f84902f84a02f84b02f84c02f84d02f84e02f84f02f85002f85102f85202f85302f85402f85502f85602f85702f85802f85902f85a02f85b02f85c02f85d02f85e02f85f02f86002f86102f86202f86302f86402f86502f86602f86702f86802f86902f86a02f86b02f86c02f86d02f86e02f86f02f87002f87102f87202f87302f87402f87502f87602f87702f87802f87902f87a02f87b02f87c02f87d02f87e02f87f02f88002f88102f88202f88302f88402f88502f88602f88702f88802f88902f88a02f88b02f88c02f88d02f88e02f88f02f89002f89102f89202f89302f89402f89502f89602f89702f89802f89902f89a02f89b02f89c02f89d02f89e02f89f02f8a002f8a102f8a202f8a302f8a402f8a502f8a602f8a702f8a802f8a902f8aa02f8ab02f8ac02f8ad02f8ae02f8af02f8b002f8b102f8b202f8b302f8b402f8b502f8b602f8b702f8b802f8b902f8ba02f8bb02f8bc02f8bd02f8be02f8bf02f8c002f8c102f8c202f8c302f8c402f8c502f8c602f8c702f8c802f8c902f8ca02f8cb02f8cc02f8cd02f8ce02f8cf02f8d002f8d102f8d202f8d302f8d402f8d502f8d602f8d702f8d802f8d902f8da02f8db02f8dc02f8dd02f8de02f8df02f8e002f8e102f8e202f8e302f8e402f8e502f8e602f8e702f8e802f8e902f8ea02f8eb02f8ec02f8ed02f8ee02f8ef02f8f002f8f102f8f202f8f302f8f402f8f502f8f602f8f702f8f802f8f902f8fa02f8fb02f8fc02f8fd02f8fe02f8ff02f90002f90102f90202f90302f90402f90502f90602f90702f90802f90902f90a02f90b02f90c02f90d02f90e02f90f02f91002f91102f91202f91302f91402f91502f91602f91702f91802f91902f91a02f91b02f91c02f91d02f91e02f91f02f92002f92102f92202f92302f92402f92502f92602f92702f92802f92902f92a02f92b02f92c02f92d02f92e02f92f02f93002f93102f93202f93302f93402f93502f93602f93702f93802f93902f93a02f93b02f93c02f93d02f93e02f93f02f94002f94102f94202f94302f94402f94502f94602f94702f94802f94902f94a02f94b02f94c02f94d02f94e02f94f02f95002f95102f95202f95302f95402f95502f95602f95702f95802f95902f95a02f95b02f95c02f95d02f95e02f95f02f96002f96102f96202f96302f96402f96502f96602f96702f96802f96902f96a02f96b02f96c02f96d02f96e02f96f02f97002f97102f97202f97302f97402f97502f97602f97702f97802f97902f97a02f97b02f97c02f97d02f97e02f97f02f98002f98102f98202f98302f98402f98502f98602f98702f98802f98902f98a02f98b02f98c02f98d02f98e02f98f02f99002f99102f99202f99302f99402f99502f99602f99702f99802f99902f99a02f99b02f99c02f99d02f99e02f99f02f9a002f9a102f9a202f9a302f9a402f9a502f9a602f9a702f9a802f9a902f9aa02f9ab02f9ac02f9ad02f9ae02f9af02f9b002f9b102f9b202f9b302f9b402f9b502f9b602f9b702f9b802f9b902f9ba02f9bb02f9bc02f9bd02f9be02f9bf02f9c002f9c102f9c202f9c302f9c402f9c502f9c602f9c702f9c802f9c902f9ca02f9cb02f9cc02f9cd02f9ce02f9cf02f9d002f9d102f9d202f9d302f9d402f9d502f9d602f9d702f9d802f9d902f9da02f9db02f9dc02f9dd02f9de02f9df02f9e002f9e102f9e202f9e302f9e402f9e502f9e602f9e702f9e802f9e902f9ea02f9eb02f9ec02f9ed02f9ee02f9ef02f9f002f9f102f9f202f9f302f9f402f9f502f9f602f9f702f9f802f9f902f9fa02f9fb02f9fc02f9fd02f9fe02f9ff02fa0002fa0102fa0202fa0302fa0402fa0502fa0602fa0702fa0802fa0902fa0a02fa0b02fa0c02fa0d02fa0e02fa0f02fa1002fa1102fa1202fa1302fa1402fa1502fa1602fa1702fa1802fa1902fa1a02fa1b02fa1c02fa1d -------------------------------------------------------------------------------- /cdata/cond_lc_map: -------------------------------------------------------------------------------- 1 | 000049;000069,000307;lt;More_Above 2 | 000049;000131;az;Not_Before_Dot 3 | 000049;000131;tr;Not_Before_Dot 4 | 00004a;00006a,000307;lt;More_Above 5 | 000069;000069;az; 6 | 000069;000069;tr; 7 | 0000cc;000069,000307,000300;lt; 8 | 0000cd;000069,000307,000301;lt; 9 | 000128;000069,000307,000303;lt; 10 | 00012e;00012f,000307;lt;More_Above 11 | 000130;000069;az; 12 | 000130;000069;tr; 13 | 000307;000307;lt;After_Soft_Dotted 14 | 000307;;az;After_I 15 | 000307;;tr;After_I 16 | 0003a3;0003c2;;Final_Sigma 17 | -------------------------------------------------------------------------------- /cdata/cond_tc_map: -------------------------------------------------------------------------------- 1 | 0003a3;0003a3;;Final_Sigma 2 | 000307;;lt;After_Soft_Dotted 3 | 000049;000049;lt;More_Above 4 | 00004a;00004a;lt;More_Above 5 | 00012e;00012e;lt;More_Above 6 | 0000cc;0000cc;lt; 7 | 0000cd;0000cd;lt; 8 | 000128;000128;lt; 9 | 000130;000130;tr; 10 | 000130;000130;az; 11 | 000307;000307;tr;After_I 12 | 000307;000307;az;After_I 13 | 000049;000049;tr;Not_Before_Dot 14 | 000049;000049;az;Not_Before_Dot 15 | 000069;000130;tr; 16 | 000069;000130;az; 17 | -------------------------------------------------------------------------------- /cdata/cond_uc_map: -------------------------------------------------------------------------------- 1 | 000049;000049;az;Not_Before_Dot 2 | 000049;000049;lt;More_Above 3 | 000049;000049;tr;Not_Before_Dot 4 | 00004a;00004a;lt;More_Above 5 | 000069;000130;az; 6 | 000069;000130;tr; 7 | 0000cc;0000cc;lt; 8 | 0000cd;0000cd;lt; 9 | 000128;000128;lt; 10 | 00012e;00012e;lt;More_Above 11 | 000130;000130;az; 12 | 000130;000130;tr; 13 | 000307;000307;az;After_I 14 | 000307;000307;tr;After_I 15 | 000307;;lt;After_Soft_Dotted 16 | 0003a3;0003a3;;Final_Sigma 17 | -------------------------------------------------------------------------------- /cdata/east_asian_width_property_ranges: -------------------------------------------------------------------------------- 1 | 003400004dbf3004e00009fff300ac0000d7a3300e00000f8ff102000002f7ff302fa1e02fffd303000003fffd30f00000ffffd110000010fffd1 -------------------------------------------------------------------------------- /cdata/general_category_aliases: -------------------------------------------------------------------------------- 1 | C;Other 2 | Cc;Control 3 | Cf;Format 4 | Cn;Unassigned 5 | Co;Private_Use 6 | Cs;Surrogate 7 | L;Letter 8 | LC;Cased_Letter 9 | Ll;Lowercase_Letter 10 | Lm;Modifier_Letter 11 | Lo;Other_Letter 12 | Lt;Titlecase_Letter 13 | Lu;Uppercase_Letter 14 | M;Mark 15 | Mc;Spacing_Mark 16 | Me;Enclosing_Mark 17 | Mn;Nonspacing_Mark 18 | N;Number 19 | Nd;Decimal_Number 20 | Nl;Letter_Number 21 | No;Other_Number 22 | P;Punctuation 23 | Pc;Connector_Punctuation 24 | Pd;Dash_Punctuation 25 | Pe;Close_Punctuation 26 | Pf;Final_Punctuation 27 | Pi;Initial_Punctuation 28 | Po;Other_Punctuation 29 | Ps;Open_Punctuation 30 | S;Symbol 31 | Sc;Currency_Symbol 32 | Sk;Modifier_Symbol 33 | Sm;Math_Symbol 34 | So;Other_Symbol 35 | Z;Separator 36 | Zl;Line_Separator 37 | Zp;Paragraph_Separator 38 | Zs;Space_Separator 39 | -------------------------------------------------------------------------------- /cdata/general_category_ranges: -------------------------------------------------------------------------------- 1 | 003400004db5Lo004e00009fccLo00ac0000d7a3Lo00d80000db7fCs00db8000dbffCs00dc0000dfffCs00e00000f8ffCo02000002a6d6Lo02a70002b734Lo02b74002b81dLo0f00000ffffdCo10000010fffdCo -------------------------------------------------------------------------------- /cdata/jamo_short_names: -------------------------------------------------------------------------------- 1 | 001100G 2 | 001101GG 3 | 001102N 4 | 001103D 5 | 001104DD 6 | 001105R 7 | 001106M 8 | 001107B 9 | 001108BB 10 | 001109S 11 | 00110aSS 12 | 00110cJ 13 | 00110dJJ 14 | 00110eC 15 | 00110fK 16 | 001110T 17 | 001111P 18 | 001112H 19 | 001161A 20 | 001162AE 21 | 001163YA 22 | 001164YAE 23 | 001165EO 24 | 001166E 25 | 001167YEO 26 | 001168YE 27 | 001169O 28 | 00116aWA 29 | 00116bWAE 30 | 00116cOE 31 | 00116dYO 32 | 00116eU 33 | 00116fWEO 34 | 001170WE 35 | 001171WI 36 | 001172YU 37 | 001173EU 38 | 001174YI 39 | 001175I 40 | 0011a8G 41 | 0011a9GG 42 | 0011aaGS 43 | 0011abN 44 | 0011acNJ 45 | 0011adNH 46 | 0011aeD 47 | 0011afL 48 | 0011b0LG 49 | 0011b1LM 50 | 0011b2LB 51 | 0011b3LS 52 | 0011b4LT 53 | 0011b5LP 54 | 0011b6LH 55 | 0011b7M 56 | 0011b8B 57 | 0011b9BS 58 | 0011baS 59 | 0011bbSS 60 | 0011bcNG 61 | 0011bdJ 62 | 0011beC 63 | 0011bfK 64 | 0011c0T 65 | 0011c1P 66 | 0011c2H 67 | -------------------------------------------------------------------------------- /cdata/name_aliases: -------------------------------------------------------------------------------- 1 | 0000002204NULL503NUL0000012210START OF HEADING503SOH000002220dSTART OF TEXT503STX000003220bEND OF TEXT503ETX0000042213END OF TRANSMISSION503EOT0000052207ENQUIRY503ENQ000006220bACKNOWLEDGE503ACK0000072205ALERT503BEL0000082209BACKSPACE502BS0000094214CHARACTER TABULATION215HORIZONTAL TABULATION502HT503TAB00000a6209LINE FEED208NEW LINE20bEND OF LINE502LF502NL503EOL00000b320fLINE TABULATION213VERTICAL TABULATION502VT00000c2209FORM FEED502FF00000d220fCARRIAGE RETURN502CR00000e3209SHIFT OUT211LOCKING-SHIFT ONE502SO00000f3208SHIFT IN212LOCKING-SHIFT ZERO502SI0000102210DATA LINK ESCAPE503DLE0000112212DEVICE CONTROL ONE503DC10000122212DEVICE CONTROL TWO503DC20000132214DEVICE CONTROL THREE503DC30000142213DEVICE CONTROL FOUR503DC40000152214NEGATIVE ACKNOWLEDGE503NAK0000162210SYNCHRONOUS IDLE503SYN0000172219END OF TRANSMISSION BLOCK503ETB0000182206CANCEL503CAN000019220dEND OF MEDIUM503EOM00001a220aSUBSTITUTE503SUB00001b2206ESCAPE503ESC00001c321aINFORMATION SEPARATOR FOUR20eFILE SEPARATOR502FS00001d321bINFORMATION SEPARATOR THREE20fGROUP SEPARATOR502GS00001e3219INFORMATION SEPARATOR TWO210RECORD SEPARATOR502RS00001f3219INFORMATION SEPARATOR ONE20eUNIT SEPARATOR502US0000201502SP00007f2206DELETE503DEL0000802411PADDING CHARACTER503PAD0000812411HIGH OCTET PRESET503HOP0000822214BREAK PERMITTED HERE503BPH000083220dNO BREAK HERE503NBH0000842205INDEX503IND0000852209NEXT LINE503NEL0000862216START OF SELECTED AREA503SSA0000872214END OF SELECTED AREA503ESA0000883218CHARACTER TABULATION SET219HORIZONTAL TABULATION SET503HTS0000893227CHARACTER TABULATION WITH JUSTIFICATION228HORIZONTAL TABULATION WITH JUSTIFICATION503HTJ00008a3213LINE TABULATION SET217VERTICAL TABULATION SET503VTS00008b3214PARTIAL LINE FORWARD211PARTIAL LINE DOWN503PLD00008c3215PARTIAL LINE BACKWARD20fPARTIAL LINE UP503PLU00008d3211REVERSE LINE FEED20dREVERSE INDEX502RI00008e3210SINGLE SHIFT TWO20eSINGLE-SHIFT-2503SS200008f3212SINGLE SHIFT THREE20eSINGLE-SHIFT-3503SS30000902215DEVICE CONTROL STRING503DCS000091320fPRIVATE USE ONE20dPRIVATE USE-1503PU1000092320fPRIVATE USE TWO20dPRIVATE USE-2503PU20000932212SET TRANSMIT STATE503STS0000942210CANCEL CHARACTER503CCH000095220fMESSAGE WAITING502MW0000963215START OF GUARDED AREA217START OF PROTECTED AREA503SPA0000973213END OF GUARDED AREA215END OF PROTECTED AREA503EPA000098220fSTART OF STRING503SOS0000992423SINGLE GRAPHIC CHARACTER INTRODUCER503SGC00009a221bSINGLE CHARACTER INTRODUCER503SCI00009b221bCONTROL SEQUENCE INTRODUCER503CSI00009c2211STRING TERMINATOR502ST00009d2218OPERATING SYSTEM COMMAND503OSC00009e220fPRIVACY MESSAGE502PM00009f221bAPPLICATION PROGRAM COMMAND503APC0000a01504NBSP0000ad1503SHY0001a21118LATIN CAPITAL LETTER GHA0001a31116LATIN SMALL LETTER GHA00034f1503CGJ0007091122SYRIAC SUBLINEAR COLON SKEWED LEFT000cde1113KANNADA LETTER LLLA000e9d1111LAO LETTER FO FON000e9f1111LAO LETTER FO FAY000ea3110dLAO LETTER RO000ea5110dLAO LETTER LO000fd01123TIBETAN MARK BKA- SHOG GI MGO RGYAN00180b1504FVS100180c1504FVS200180d1504FVS300180e1503MVS00200b1504ZWSP00200c1504ZWNJ00200d1503ZWJ00200e1503LRM00200f1503RLM00202a1503LRE00202b1503RLE00202c1503PDF00202d1503LRO00202e1503RLO00202f1505NNBSP00205f1504MMSP0020601502WJ002118111dWEIERSTRASS ELLIPTIC FUNCTION0024481111MICR ON US SYMBOL0024491110MICR DASH SYMBOL00a015111aYI SYLLABLE ITERATION MARK00fe18113dPRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET00fe001503VS100fe011503VS200fe021503VS300fe031503VS400fe041503VS500fe051503VS600fe061503VS700fe071503VS800fe081503VS900fe091504VS1000fe0a1504VS1100fe0b1504VS1200fe0c1504VS1300fe0d1504VS1400fe0e1504VS1500fe0f1504VS1600feff330fBYTE ORDER MARK503BOM506ZWNBSP01d0c51134BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS0e01001504VS170e01011504VS180e01021504VS190e01031504VS200e01041504VS210e01051504VS220e01061504VS230e01071504VS240e01081504VS250e01091504VS260e010a1504VS270e010b1504VS280e010c1504VS290e010d1504VS300e010e1504VS310e010f1504VS320e01101504VS330e01111504VS340e01121504VS350e01131504VS360e01141504VS370e01151504VS380e01161504VS390e01171504VS400e01181504VS410e01191504VS420e011a1504VS430e011b1504VS440e011c1504VS450e011d1504VS460e011e1504VS470e011f1504VS480e01201504VS490e01211504VS500e01221504VS510e01231504VS520e01241504VS530e01251504VS540e01261504VS550e01271504VS560e01281504VS570e01291504VS580e012a1504VS590e012b1504VS600e012c1504VS610e012d1504VS620e012e1504VS630e012f1504VS640e01301504VS650e01311504VS660e01321504VS670e01331504VS680e01341504VS690e01351504VS700e01361504VS710e01371504VS720e01381504VS730e01391504VS740e013a1504VS750e013b1504VS760e013c1504VS770e013d1504VS780e013e1504VS790e013f1504VS800e01401504VS810e01411504VS820e01421504VS830e01431504VS840e01441504VS850e01451504VS860e01461504VS870e01471504VS880e01481504VS890e01491504VS900e014a1504VS910e014b1504VS920e014c1504VS930e014d1504VS940e014e1504VS950e014f1504VS960e01501504VS970e01511504VS980e01521504VS990e01531505VS1000e01541505VS1010e01551505VS1020e01561505VS1030e01571505VS1040e01581505VS1050e01591505VS1060e015a1505VS1070e015b1505VS1080e015c1505VS1090e015d1505VS1100e015e1505VS1110e015f1505VS1120e01601505VS1130e01611505VS1140e01621505VS1150e01631505VS1160e01641505VS1170e01651505VS1180e01661505VS1190e01671505VS1200e01681505VS1210e01691505VS1220e016a1505VS1230e016b1505VS1240e016c1505VS1250e016d1505VS1260e016e1505VS1270e016f1505VS1280e01701505VS1290e01711505VS1300e01721505VS1310e01731505VS1320e01741505VS1330e01751505VS1340e01761505VS1350e01771505VS1360e01781505VS1370e01791505VS1380e017a1505VS1390e017b1505VS1400e017c1505VS1410e017d1505VS1420e017e1505VS1430e017f1505VS1440e01801505VS1450e01811505VS1460e01821505VS1470e01831505VS1480e01841505VS1490e01851505VS1500e01861505VS1510e01871505VS1520e01881505VS1530e01891505VS1540e018a1505VS1550e018b1505VS1560e018c1505VS1570e018d1505VS1580e018e1505VS1590e018f1505VS1600e01901505VS1610e01911505VS1620e01921505VS1630e01931505VS1640e01941505VS1650e01951505VS1660e01961505VS1670e01971505VS1680e01981505VS1690e01991505VS1700e019a1505VS1710e019b1505VS1720e019c1505VS1730e019d1505VS1740e019e1505VS1750e019f1505VS1760e01a01505VS1770e01a11505VS1780e01a21505VS1790e01a31505VS1800e01a41505VS1810e01a51505VS1820e01a61505VS1830e01a71505VS1840e01a81505VS1850e01a91505VS1860e01aa1505VS1870e01ab1505VS1880e01ac1505VS1890e01ad1505VS1900e01ae1505VS1910e01af1505VS1920e01b01505VS1930e01b11505VS1940e01b21505VS1950e01b31505VS1960e01b41505VS1970e01b51505VS1980e01b61505VS1990e01b71505VS2000e01b81505VS2010e01b91505VS2020e01ba1505VS2030e01bb1505VS2040e01bc1505VS2050e01bd1505VS2060e01be1505VS2070e01bf1505VS2080e01c01505VS2090e01c11505VS2100e01c21505VS2110e01c31505VS2120e01c41505VS2130e01c51505VS2140e01c61505VS2150e01c71505VS2160e01c81505VS2170e01c91505VS2180e01ca1505VS2190e01cb1505VS2200e01cc1505VS2210e01cd1505VS2220e01ce1505VS2230e01cf1505VS2240e01d01505VS2250e01d11505VS2260e01d21505VS2270e01d31505VS2280e01d41505VS2290e01d51505VS2300e01d61505VS2310e01d71505VS2320e01d81505VS2330e01d91505VS2340e01da1505VS2350e01db1505VS2360e01dc1505VS2370e01dd1505VS2380e01de1505VS2390e01df1505VS2400e01e01505VS2410e01e11505VS2420e01e21505VS2430e01e31505VS2440e01e41505VS2450e01e51505VS2460e01e61505VS2470e01e71505VS2480e01e81505VS2490e01e91505VS2500e01ea1505VS2510e01eb1505VS2520e01ec1505VS2530e01ed1505VS2540e01ee1505VS2550e01ef1505VS256 -------------------------------------------------------------------------------- /cdata/prop_set_lowercase: -------------------------------------------------------------------------------- 1 | 00006100006200006300006400006500006600006700006800006900006a00006b00006c00006d00006e00006f00007000007100007200007300007400007500007600007700007800007900007a0000aa0000b50000ba0000df0000e00000e10000e20000e30000e40000e50000e60000e70000e80000e90000ea0000eb0000ec0000ed0000ee0000ef0000f00000f10000f20000f30000f40000f50000f60000f80000f90000fa0000fb0000fc0000fd0000fe0000ff00010100010300010500010700010900010b00010d00010f00011100011300011500011700011900011b00011d00011f00012100012300012500012700012900012b00012d00012f00013100013300013500013700013800013a00013c00013e00014000014200014400014600014800014900014b00014d00014f00015100015300015500015700015900015b00015d00015f00016100016300016500016700016900016b00016d00016f00017100017300017500017700017a00017c00017e00017f00018000018300018500018800018c00018d00019200019500019900019a00019b00019e0001a10001a30001a50001a80001aa0001ab0001ad0001b00001b40001b60001b90001ba0001bd0001be0001bf0001c60001c90001cc0001ce0001d00001d20001d40001d60001d80001da0001dc0001dd0001df0001e10001e30001e50001e70001e90001eb0001ed0001ef0001f00001f30001f50001f90001fb0001fd0001ff00020100020300020500020700020900020b00020d00020f00021100021300021500021700021900021b00021d00021f00022100022300022500022700022900022b00022d00022f00023100023300023400023500023600023700023800023900023c00023f00024000024200024700024900024b00024d00024f00025000025100025200025300025400025500025600025700025800025900025a00025b00025c00025d00025e00025f00026000026100026200026300026400026500026600026700026800026900026a00026b00026c00026d00026e00026f00027000027100027200027300027400027500027600027700027800027900027a00027b00027c00027d00027e00027f00028000028100028200028300028400028500028600028700028800028900028a00028b00028c00028d00028e00028f00029000029100029200029300029500029600029700029800029900029a00029b00029c00029d00029e00029f0002a00002a10002a20002a30002a40002a50002a60002a70002a80002a90002aa0002ab0002ac0002ad0002ae0002af0002b00002b10002b20002b30002b40002b50002b60002b70002b80002c00002c10002e00002e10002e20002e30002e400034500037100037300037700037a00037b00037c00037d0003900003ac0003ad0003ae0003af0003b00003b10003b20003b30003b40003b50003b60003b70003b80003b90003ba0003bb0003bc0003bd0003be0003bf0003c00003c10003c20003c30003c40003c50003c60003c70003c80003c90003ca0003cb0003cc0003cd0003ce0003d00003d10003d50003d60003d70003d90003db0003dd0003df0003e10003e30003e50003e70003e90003eb0003ed0003ef0003f00003f10003f20003f30003f50003f80003fb0003fc00043000043100043200043300043400043500043600043700043800043900043a00043b00043c00043d00043e00043f00044000044100044200044300044400044500044600044700044800044900044a00044b00044c00044d00044e00044f00045000045100045200045300045400045500045600045700045800045900045a00045b00045c00045d00045e00045f00046100046300046500046700046900046b00046d00046f00047100047300047500047700047900047b00047d00047f00048100048b00048d00048f00049100049300049500049700049900049b00049d00049f0004a10004a30004a50004a70004a90004ab0004ad0004af0004b10004b30004b50004b70004b90004bb0004bd0004bf0004c20004c40004c60004c80004ca0004cc0004ce0004cf0004d10004d30004d50004d70004d90004db0004dd0004df0004e10004e30004e50004e70004e90004eb0004ed0004ef0004f10004f30004f50004f70004f90004fb0004fd0004ff00050100050300050500050700050900050b00050d00050f00051100051300051500051700051900051b00051d00051f00052100052300052500052700056100056200056300056400056500056600056700056800056900056a00056b00056c00056d00056e00056f00057000057100057200057300057400057500057600057700057800057900057a00057b00057c00057d00057e00057f000580000581000582000583000584000585000586000587001d00001d01001d02001d03001d04001d05001d06001d07001d08001d09001d0a001d0b001d0c001d0d001d0e001d0f001d10001d11001d12001d13001d14001d15001d16001d17001d18001d19001d1a001d1b001d1c001d1d001d1e001d1f001d20001d21001d22001d23001d24001d25001d26001d27001d28001d29001d2a001d2b001d2c001d2d001d2e001d2f001d30001d31001d32001d33001d34001d35001d36001d37001d38001d39001d3a001d3b001d3c001d3d001d3e001d3f001d40001d41001d42001d43001d44001d45001d46001d47001d48001d49001d4a001d4b001d4c001d4d001d4e001d4f001d50001d51001d52001d53001d54001d55001d56001d57001d58001d59001d5a001d5b001d5c001d5d001d5e001d5f001d60001d61001d62001d63001d64001d65001d66001d67001d68001d69001d6a001d6b001d6c001d6d001d6e001d6f001d70001d71001d72001d73001d74001d75001d76001d77001d78001d79001d7a001d7b001d7c001d7d001d7e001d7f001d80001d81001d82001d83001d84001d85001d86001d87001d88001d89001d8a001d8b001d8c001d8d001d8e001d8f001d90001d91001d92001d93001d94001d95001d96001d97001d98001d99001d9a001d9b001d9c001d9d001d9e001d9f001da0001da1001da2001da3001da4001da5001da6001da7001da8001da9001daa001dab001dac001dad001dae001daf001db0001db1001db2001db3001db4001db5001db6001db7001db8001db9001dba001dbb001dbc001dbd001dbe001dbf001e01001e03001e05001e07001e09001e0b001e0d001e0f001e11001e13001e15001e17001e19001e1b001e1d001e1f001e21001e23001e25001e27001e29001e2b001e2d001e2f001e31001e33001e35001e37001e39001e3b001e3d001e3f001e41001e43001e45001e47001e49001e4b001e4d001e4f001e51001e53001e55001e57001e59001e5b001e5d001e5f001e61001e63001e65001e67001e69001e6b001e6d001e6f001e71001e73001e75001e77001e79001e7b001e7d001e7f001e81001e83001e85001e87001e89001e8b001e8d001e8f001e91001e93001e95001e96001e97001e98001e99001e9a001e9b001e9c001e9d001e9f001ea1001ea3001ea5001ea7001ea9001eab001ead001eaf001eb1001eb3001eb5001eb7001eb9001ebb001ebd001ebf001ec1001ec3001ec5001ec7001ec9001ecb001ecd001ecf001ed1001ed3001ed5001ed7001ed9001edb001edd001edf001ee1001ee3001ee5001ee7001ee9001eeb001eed001eef001ef1001ef3001ef5001ef7001ef9001efb001efd001eff001f00001f01001f02001f03001f04001f05001f06001f07001f10001f11001f12001f13001f14001f15001f20001f21001f22001f23001f24001f25001f26001f27001f30001f31001f32001f33001f34001f35001f36001f37001f40001f41001f42001f43001f44001f45001f50001f51001f52001f53001f54001f55001f56001f57001f60001f61001f62001f63001f64001f65001f66001f67001f70001f71001f72001f73001f74001f75001f76001f77001f78001f79001f7a001f7b001f7c001f7d001f80001f81001f82001f83001f84001f85001f86001f87001f90001f91001f92001f93001f94001f95001f96001f97001fa0001fa1001fa2001fa3001fa4001fa5001fa6001fa7001fb0001fb1001fb2001fb3001fb4001fb6001fb7001fbe001fc2001fc3001fc4001fc6001fc7001fd0001fd1001fd2001fd3001fd6001fd7001fe0001fe1001fe2001fe3001fe4001fe5001fe6001fe7001ff2001ff3001ff4001ff6001ff700207100207f00209000209100209200209300209400209500209600209700209800209900209a00209b00209c00210a00210e00210f00211300212f00213400213900213c00213d00214600214700214800214900214e00217000217100217200217300217400217500217600217700217800217900217a00217b00217c00217d00217e00217f0021840024d00024d10024d20024d30024d40024d50024d60024d70024d80024d90024da0024db0024dc0024dd0024de0024df0024e00024e10024e20024e30024e40024e50024e60024e70024e80024e9002c30002c31002c32002c33002c34002c35002c36002c37002c38002c39002c3a002c3b002c3c002c3d002c3e002c3f002c40002c41002c42002c43002c44002c45002c46002c47002c48002c49002c4a002c4b002c4c002c4d002c4e002c4f002c50002c51002c52002c53002c54002c55002c56002c57002c58002c59002c5a002c5b002c5c002c5d002c5e002c61002c65002c66002c68002c6a002c6c002c71002c73002c74002c76002c77002c78002c79002c7a002c7b002c7c002c7d002c81002c83002c85002c87002c89002c8b002c8d002c8f002c91002c93002c95002c97002c99002c9b002c9d002c9f002ca1002ca3002ca5002ca7002ca9002cab002cad002caf002cb1002cb3002cb5002cb7002cb9002cbb002cbd002cbf002cc1002cc3002cc5002cc7002cc9002ccb002ccd002ccf002cd1002cd3002cd5002cd7002cd9002cdb002cdd002cdf002ce1002ce3002ce4002cec002cee002cf3002d00002d01002d02002d03002d04002d05002d06002d07002d08002d09002d0a002d0b002d0c002d0d002d0e002d0f002d10002d11002d12002d13002d14002d15002d16002d17002d18002d19002d1a002d1b002d1c002d1d002d1e002d1f002d20002d21002d22002d23002d24002d25002d27002d2d00a64100a64300a64500a64700a64900a64b00a64d00a64f00a65100a65300a65500a65700a65900a65b00a65d00a65f00a66100a66300a66500a66700a66900a66b00a66d00a68100a68300a68500a68700a68900a68b00a68d00a68f00a69100a69300a69500a69700a72300a72500a72700a72900a72b00a72d00a72f00a73000a73100a73300a73500a73700a73900a73b00a73d00a73f00a74100a74300a74500a74700a74900a74b00a74d00a74f00a75100a75300a75500a75700a75900a75b00a75d00a75f00a76100a76300a76500a76700a76900a76b00a76d00a76f00a77000a77100a77200a77300a77400a77500a77600a77700a77800a77a00a77c00a77f00a78100a78300a78500a78700a78c00a78e00a79100a79300a7a100a7a300a7a500a7a700a7a900a7f800a7f900a7fa00fb0000fb0100fb0200fb0300fb0400fb0500fb0600fb1300fb1400fb1500fb1600fb1700ff4100ff4200ff4300ff4400ff4500ff4600ff4700ff4800ff4900ff4a00ff4b00ff4c00ff4d00ff4e00ff4f00ff5000ff5100ff5200ff5300ff5400ff5500ff5600ff5700ff5800ff5900ff5a01042801042901042a01042b01042c01042d01042e01042f01043001043101043201043301043401043501043601043701043801043901043a01043b01043c01043d01043e01043f01044001044101044201044301044401044501044601044701044801044901044a01044b01044c01044d01044e01044f01d41a01d41b01d41c01d41d01d41e01d41f01d42001d42101d42201d42301d42401d42501d42601d42701d42801d42901d42a01d42b01d42c01d42d01d42e01d42f01d43001d43101d43201d43301d44e01d44f01d45001d45101d45201d45301d45401d45601d45701d45801d45901d45a01d45b01d45c01d45d01d45e01d45f01d46001d46101d46201d46301d46401d46501d46601d46701d48201d48301d48401d48501d48601d48701d48801d48901d48a01d48b01d48c01d48d01d48e01d48f01d49001d49101d49201d49301d49401d49501d49601d49701d49801d49901d49a01d49b01d4b601d4b701d4b801d4b901d4bb01d4bd01d4be01d4bf01d4c001d4c101d4c201d4c301d4c501d4c601d4c701d4c801d4c901d4ca01d4cb01d4cc01d4cd01d4ce01d4cf01d4ea01d4eb01d4ec01d4ed01d4ee01d4ef01d4f001d4f101d4f201d4f301d4f401d4f501d4f601d4f701d4f801d4f901d4fa01d4fb01d4fc01d4fd01d4fe01d4ff01d50001d50101d50201d50301d51e01d51f01d52001d52101d52201d52301d52401d52501d52601d52701d52801d52901d52a01d52b01d52c01d52d01d52e01d52f01d53001d53101d53201d53301d53401d53501d53601d53701d55201d55301d55401d55501d55601d55701d55801d55901d55a01d55b01d55c01d55d01d55e01d55f01d56001d56101d56201d56301d56401d56501d56601d56701d56801d56901d56a01d56b01d58601d58701d58801d58901d58a01d58b01d58c01d58d01d58e01d58f01d59001d59101d59201d59301d59401d59501d59601d59701d59801d59901d59a01d59b01d59c01d59d01d59e01d59f01d5ba01d5bb01d5bc01d5bd01d5be01d5bf01d5c001d5c101d5c201d5c301d5c401d5c501d5c601d5c701d5c801d5c901d5ca01d5cb01d5cc01d5cd01d5ce01d5cf01d5d001d5d101d5d201d5d301d5ee01d5ef01d5f001d5f101d5f201d5f301d5f401d5f501d5f601d5f701d5f801d5f901d5fa01d5fb01d5fc01d5fd01d5fe01d5ff01d60001d60101d60201d60301d60401d60501d60601d60701d62201d62301d62401d62501d62601d62701d62801d62901d62a01d62b01d62c01d62d01d62e01d62f01d63001d63101d63201d63301d63401d63501d63601d63701d63801d63901d63a01d63b01d65601d65701d65801d65901d65a01d65b01d65c01d65d01d65e01d65f01d66001d66101d66201d66301d66401d66501d66601d66701d66801d66901d66a01d66b01d66c01d66d01d66e01d66f01d68a01d68b01d68c01d68d01d68e01d68f01d69001d69101d69201d69301d69401d69501d69601d69701d69801d69901d69a01d69b01d69c01d69d01d69e01d69f01d6a001d6a101d6a201d6a301d6a401d6a501d6c201d6c301d6c401d6c501d6c601d6c701d6c801d6c901d6ca01d6cb01d6cc01d6cd01d6ce01d6cf01d6d001d6d101d6d201d6d301d6d401d6d501d6d601d6d701d6d801d6d901d6da01d6dc01d6dd01d6de01d6df01d6e001d6e101d6fc01d6fd01d6fe01d6ff01d70001d70101d70201d70301d70401d70501d70601d70701d70801d70901d70a01d70b01d70c01d70d01d70e01d70f01d71001d71101d71201d71301d71401d71601d71701d71801d71901d71a01d71b01d73601d73701d73801d73901d73a01d73b01d73c01d73d01d73e01d73f01d74001d74101d74201d74301d74401d74501d74601d74701d74801d74901d74a01d74b01d74c01d74d01d74e01d75001d75101d75201d75301d75401d75501d77001d77101d77201d77301d77401d77501d77601d77701d77801d77901d77a01d77b01d77c01d77d01d77e01d77f01d78001d78101d78201d78301d78401d78501d78601d78701d78801d78a01d78b01d78c01d78d01d78e01d78f01d7aa01d7ab01d7ac01d7ad01d7ae01d7af01d7b001d7b101d7b201d7b301d7b401d7b501d7b601d7b701d7b801d7b901d7ba01d7bb01d7bc01d7bd01d7be01d7bf01d7c001d7c101d7c201d7c401d7c501d7c601d7c701d7c801d7c901d7cb -------------------------------------------------------------------------------- /cdata/prop_set_uppercase: -------------------------------------------------------------------------------- 1 | 00004100004200004300004400004500004600004700004800004900004a00004b00004c00004d00004e00004f00005000005100005200005300005400005500005600005700005800005900005a0000c00000c10000c20000c30000c40000c50000c60000c70000c80000c90000ca0000cb0000cc0000cd0000ce0000cf0000d00000d10000d20000d30000d40000d50000d60000d80000d90000da0000db0000dc0000dd0000de00010000010200010400010600010800010a00010c00010e00011000011200011400011600011800011a00011c00011e00012000012200012400012600012800012a00012c00012e00013000013200013400013600013900013b00013d00013f00014100014300014500014700014a00014c00014e00015000015200015400015600015800015a00015c00015e00016000016200016400016600016800016a00016c00016e00017000017200017400017600017800017900017b00017d00018100018200018400018600018700018900018a00018b00018e00018f00019000019100019300019400019600019700019800019c00019d00019f0001a00001a20001a40001a60001a70001a90001ac0001ae0001af0001b10001b20001b30001b50001b70001b80001bc0001c40001c70001ca0001cd0001cf0001d10001d30001d50001d70001d90001db0001de0001e00001e20001e40001e60001e80001ea0001ec0001ee0001f10001f40001f60001f70001f80001fa0001fc0001fe00020000020200020400020600020800020a00020c00020e00021000021200021400021600021800021a00021c00021e00022000022200022400022600022800022a00022c00022e00023000023200023a00023b00023d00023e00024100024300024400024500024600024800024a00024c00024e00037000037200037600038600038800038900038a00038c00038e00038f00039100039200039300039400039500039600039700039800039900039a00039b00039c00039d00039e00039f0003a00003a10003a30003a40003a50003a60003a70003a80003a90003aa0003ab0003cf0003d20003d30003d40003d80003da0003dc0003de0003e00003e20003e40003e60003e80003ea0003ec0003ee0003f40003f70003f90003fa0003fd0003fe0003ff00040000040100040200040300040400040500040600040700040800040900040a00040b00040c00040d00040e00040f00041000041100041200041300041400041500041600041700041800041900041a00041b00041c00041d00041e00041f00042000042100042200042300042400042500042600042700042800042900042a00042b00042c00042d00042e00042f00046000046200046400046600046800046a00046c00046e00047000047200047400047600047800047a00047c00047e00048000048a00048c00048e00049000049200049400049600049800049a00049c00049e0004a00004a20004a40004a60004a80004aa0004ac0004ae0004b00004b20004b40004b60004b80004ba0004bc0004be0004c00004c10004c30004c50004c70004c90004cb0004cd0004d00004d20004d40004d60004d80004da0004dc0004de0004e00004e20004e40004e60004e80004ea0004ec0004ee0004f00004f20004f40004f60004f80004fa0004fc0004fe00050000050200050400050600050800050a00050c00050e00051000051200051400051600051800051a00051c00051e00052000052200052400052600053100053200053300053400053500053600053700053800053900053a00053b00053c00053d00053e00053f00054000054100054200054300054400054500054600054700054800054900054a00054b00054c00054d00054e00054f0005500005510005520005530005540005550005560010a00010a10010a20010a30010a40010a50010a60010a70010a80010a90010aa0010ab0010ac0010ad0010ae0010af0010b00010b10010b20010b30010b40010b50010b60010b70010b80010b90010ba0010bb0010bc0010bd0010be0010bf0010c00010c10010c20010c30010c40010c50010c70010cd001e00001e02001e04001e06001e08001e0a001e0c001e0e001e10001e12001e14001e16001e18001e1a001e1c001e1e001e20001e22001e24001e26001e28001e2a001e2c001e2e001e30001e32001e34001e36001e38001e3a001e3c001e3e001e40001e42001e44001e46001e48001e4a001e4c001e4e001e50001e52001e54001e56001e58001e5a001e5c001e5e001e60001e62001e64001e66001e68001e6a001e6c001e6e001e70001e72001e74001e76001e78001e7a001e7c001e7e001e80001e82001e84001e86001e88001e8a001e8c001e8e001e90001e92001e94001e9e001ea0001ea2001ea4001ea6001ea8001eaa001eac001eae001eb0001eb2001eb4001eb6001eb8001eba001ebc001ebe001ec0001ec2001ec4001ec6001ec8001eca001ecc001ece001ed0001ed2001ed4001ed6001ed8001eda001edc001ede001ee0001ee2001ee4001ee6001ee8001eea001eec001eee001ef0001ef2001ef4001ef6001ef8001efa001efc001efe001f08001f09001f0a001f0b001f0c001f0d001f0e001f0f001f18001f19001f1a001f1b001f1c001f1d001f28001f29001f2a001f2b001f2c001f2d001f2e001f2f001f38001f39001f3a001f3b001f3c001f3d001f3e001f3f001f48001f49001f4a001f4b001f4c001f4d001f59001f5b001f5d001f5f001f68001f69001f6a001f6b001f6c001f6d001f6e001f6f001fb8001fb9001fba001fbb001fc8001fc9001fca001fcb001fd8001fd9001fda001fdb001fe8001fe9001fea001feb001fec001ff8001ff9001ffa001ffb00210200210700210b00210c00210d00211000211100211200211500211900211a00211b00211c00211d00212400212600212800212a00212b00212c00212d00213000213100213200213300213e00213f00214500216000216100216200216300216400216500216600216700216800216900216a00216b00216c00216d00216e00216f0021830024b60024b70024b80024b90024ba0024bb0024bc0024bd0024be0024bf0024c00024c10024c20024c30024c40024c50024c60024c70024c80024c90024ca0024cb0024cc0024cd0024ce0024cf002c00002c01002c02002c03002c04002c05002c06002c07002c08002c09002c0a002c0b002c0c002c0d002c0e002c0f002c10002c11002c12002c13002c14002c15002c16002c17002c18002c19002c1a002c1b002c1c002c1d002c1e002c1f002c20002c21002c22002c23002c24002c25002c26002c27002c28002c29002c2a002c2b002c2c002c2d002c2e002c60002c62002c63002c64002c67002c69002c6b002c6d002c6e002c6f002c70002c72002c75002c7e002c7f002c80002c82002c84002c86002c88002c8a002c8c002c8e002c90002c92002c94002c96002c98002c9a002c9c002c9e002ca0002ca2002ca4002ca6002ca8002caa002cac002cae002cb0002cb2002cb4002cb6002cb8002cba002cbc002cbe002cc0002cc2002cc4002cc6002cc8002cca002ccc002cce002cd0002cd2002cd4002cd6002cd8002cda002cdc002cde002ce0002ce2002ceb002ced002cf200a64000a64200a64400a64600a64800a64a00a64c00a64e00a65000a65200a65400a65600a65800a65a00a65c00a65e00a66000a66200a66400a66600a66800a66a00a66c00a68000a68200a68400a68600a68800a68a00a68c00a68e00a69000a69200a69400a69600a72200a72400a72600a72800a72a00a72c00a72e00a73200a73400a73600a73800a73a00a73c00a73e00a74000a74200a74400a74600a74800a74a00a74c00a74e00a75000a75200a75400a75600a75800a75a00a75c00a75e00a76000a76200a76400a76600a76800a76a00a76c00a76e00a77900a77b00a77d00a77e00a78000a78200a78400a78600a78b00a78d00a79000a79200a7a000a7a200a7a400a7a600a7a800a7aa00ff2100ff2200ff2300ff2400ff2500ff2600ff2700ff2800ff2900ff2a00ff2b00ff2c00ff2d00ff2e00ff2f00ff3000ff3100ff3200ff3300ff3400ff3500ff3600ff3700ff3800ff3900ff3a01040001040101040201040301040401040501040601040701040801040901040a01040b01040c01040d01040e01040f01041001041101041201041301041401041501041601041701041801041901041a01041b01041c01041d01041e01041f01042001042101042201042301042401042501042601042701d40001d40101d40201d40301d40401d40501d40601d40701d40801d40901d40a01d40b01d40c01d40d01d40e01d40f01d41001d41101d41201d41301d41401d41501d41601d41701d41801d41901d43401d43501d43601d43701d43801d43901d43a01d43b01d43c01d43d01d43e01d43f01d44001d44101d44201d44301d44401d44501d44601d44701d44801d44901d44a01d44b01d44c01d44d01d46801d46901d46a01d46b01d46c01d46d01d46e01d46f01d47001d47101d47201d47301d47401d47501d47601d47701d47801d47901d47a01d47b01d47c01d47d01d47e01d47f01d48001d48101d49c01d49e01d49f01d4a201d4a501d4a601d4a901d4aa01d4ab01d4ac01d4ae01d4af01d4b001d4b101d4b201d4b301d4b401d4b501d4d001d4d101d4d201d4d301d4d401d4d501d4d601d4d701d4d801d4d901d4da01d4db01d4dc01d4dd01d4de01d4df01d4e001d4e101d4e201d4e301d4e401d4e501d4e601d4e701d4e801d4e901d50401d50501d50701d50801d50901d50a01d50d01d50e01d50f01d51001d51101d51201d51301d51401d51601d51701d51801d51901d51a01d51b01d51c01d53801d53901d53b01d53c01d53d01d53e01d54001d54101d54201d54301d54401d54601d54a01d54b01d54c01d54d01d54e01d54f01d55001d56c01d56d01d56e01d56f01d57001d57101d57201d57301d57401d57501d57601d57701d57801d57901d57a01d57b01d57c01d57d01d57e01d57f01d58001d58101d58201d58301d58401d58501d5a001d5a101d5a201d5a301d5a401d5a501d5a601d5a701d5a801d5a901d5aa01d5ab01d5ac01d5ad01d5ae01d5af01d5b001d5b101d5b201d5b301d5b401d5b501d5b601d5b701d5b801d5b901d5d401d5d501d5d601d5d701d5d801d5d901d5da01d5db01d5dc01d5dd01d5de01d5df01d5e001d5e101d5e201d5e301d5e401d5e501d5e601d5e701d5e801d5e901d5ea01d5eb01d5ec01d5ed01d60801d60901d60a01d60b01d60c01d60d01d60e01d60f01d61001d61101d61201d61301d61401d61501d61601d61701d61801d61901d61a01d61b01d61c01d61d01d61e01d61f01d62001d62101d63c01d63d01d63e01d63f01d64001d64101d64201d64301d64401d64501d64601d64701d64801d64901d64a01d64b01d64c01d64d01d64e01d64f01d65001d65101d65201d65301d65401d65501d67001d67101d67201d67301d67401d67501d67601d67701d67801d67901d67a01d67b01d67c01d67d01d67e01d67f01d68001d68101d68201d68301d68401d68501d68601d68701d68801d68901d6a801d6a901d6aa01d6ab01d6ac01d6ad01d6ae01d6af01d6b001d6b101d6b201d6b301d6b401d6b501d6b601d6b701d6b801d6b901d6ba01d6bb01d6bc01d6bd01d6be01d6bf01d6c001d6e201d6e301d6e401d6e501d6e601d6e701d6e801d6e901d6ea01d6eb01d6ec01d6ed01d6ee01d6ef01d6f001d6f101d6f201d6f301d6f401d6f501d6f601d6f701d6f801d6f901d6fa01d71c01d71d01d71e01d71f01d72001d72101d72201d72301d72401d72501d72601d72701d72801d72901d72a01d72b01d72c01d72d01d72e01d72f01d73001d73101d73201d73301d73401d75601d75701d75801d75901d75a01d75b01d75c01d75d01d75e01d75f01d76001d76101d76201d76301d76401d76501d76601d76701d76801d76901d76a01d76b01d76c01d76d01d76e01d79001d79101d79201d79301d79401d79501d79601d79701d79801d79901d79a01d79b01d79c01d79d01d79e01d79f01d7a001d7a101d7a201d7a301d7a401d7a501d7a601d7a701d7a801d7ca -------------------------------------------------------------------------------- /cdata/simple_lc_map: -------------------------------------------------------------------------------- 1 | 00004100006100004200006200004300006300004400006400004500006500004600006600004700006700004800006800004900006900004a00006a00004b00006b00004c00006c00004d00006d00004e00006e00004f00006f00005000007000005100007100005200007200005300007300005400007400005500007500005600007600005700007700005800007800005900007900005a00007a0000c00000e00000c10000e10000c20000e20000c30000e30000c40000e40000c50000e50000c60000e60000c70000e70000c80000e80000c90000e90000ca0000ea0000cb0000eb0000cc0000ec0000cd0000ed0000ce0000ee0000cf0000ef0000d00000f00000d10000f10000d20000f20000d30000f30000d40000f40000d50000f50000d60000f60000d80000f80000d90000f90000da0000fa0000db0000fb0000dc0000fc0000dd0000fd0000de0000fe00010000010100010200010300010400010500010600010700010800010900010a00010b00010c00010d00010e00010f00011000011100011200011300011400011500011600011700011800011900011a00011b00011c00011d00011e00011f00012000012100012200012300012400012500012600012700012800012900012a00012b00012c00012d00012e00012f00013000006900013200013300013400013500013600013700013900013a00013b00013c00013d00013e00013f00014000014100014200014300014400014500014600014700014800014a00014b00014c00014d00014e00014f00015000015100015200015300015400015500015600015700015800015900015a00015b00015c00015d00015e00015f00016000016100016200016300016400016500016600016700016800016900016a00016b00016c00016d00016e00016f0001700001710001720001730001740001750001760001770001780000ff00017900017a00017b00017c00017d00017e00018100025300018200018300018400018500018600025400018700018800018900025600018a00025700018b00018c00018e0001dd00018f00025900019000025b00019100019200019300026000019400026300019600026900019700026800019800019900019c00026f00019d00027200019f0002750001a00001a10001a20001a30001a40001a50001a60002800001a70001a80001a90002830001ac0001ad0001ae0002880001af0001b00001b100028a0001b200028b0001b30001b40001b50001b60001b70002920001b80001b90001bc0001bd0001c40001c60001c50001c60001c70001c90001c80001c90001ca0001cc0001cb0001cc0001cd0001ce0001cf0001d00001d10001d20001d30001d40001d50001d60001d70001d80001d90001da0001db0001dc0001de0001df0001e00001e10001e20001e30001e40001e50001e60001e70001e80001e90001ea0001eb0001ec0001ed0001ee0001ef0001f10001f30001f20001f30001f40001f50001f60001950001f70001bf0001f80001f90001fa0001fb0001fc0001fd0001fe0001ff00020000020100020200020300020400020500020600020700020800020900020a00020b00020c00020d00020e00020f00021000021100021200021300021400021500021600021700021800021900021a00021b00021c00021d00021e00021f00022000019e00022200022300022400022500022600022700022800022900022a00022b00022c00022d00022e00022f00023000023100023200023300023a002c6500023b00023c00023d00019a00023e002c6600024100024200024300018000024400028900024500028c00024600024700024800024900024a00024b00024c00024d00024e00024f0003700003710003720003730003760003770003860003ac0003880003ad0003890003ae00038a0003af00038c0003cc00038e0003cd00038f0003ce0003910003b10003920003b20003930003b30003940003b40003950003b50003960003b60003970003b70003980003b80003990003b900039a0003ba00039b0003bb00039c0003bc00039d0003bd00039e0003be00039f0003bf0003a00003c00003a10003c10003a30003c30003a40003c40003a50003c50003a60003c60003a70003c70003a80003c80003a90003c90003aa0003ca0003ab0003cb0003cf0003d70003d80003d90003da0003db0003dc0003dd0003de0003df0003e00003e10003e20003e30003e40003e50003e60003e70003e80003e90003ea0003eb0003ec0003ed0003ee0003ef0003f40003b80003f70003f80003f90003f20003fa0003fb0003fd00037b0003fe00037c0003ff00037d00040000045000040100045100040200045200040300045300040400045400040500045500040600045600040700045700040800045800040900045900040a00045a00040b00045b00040c00045c00040d00045d00040e00045e00040f00045f00041000043000041100043100041200043200041300043300041400043400041500043500041600043600041700043700041800043800041900043900041a00043a00041b00043b00041c00043c00041d00043d00041e00043e00041f00043f00042000044000042100044100042200044200042300044300042400044400042500044500042600044600042700044700042800044800042900044900042a00044a00042b00044b00042c00044c00042d00044d00042e00044e00042f00044f00046000046100046200046300046400046500046600046700046800046900046a00046b00046c00046d00046e00046f00047000047100047200047300047400047500047600047700047800047900047a00047b00047c00047d00047e00047f00048000048100048a00048b00048c00048d00048e00048f00049000049100049200049300049400049500049600049700049800049900049a00049b00049c00049d00049e00049f0004a00004a10004a20004a30004a40004a50004a60004a70004a80004a90004aa0004ab0004ac0004ad0004ae0004af0004b00004b10004b20004b30004b40004b50004b60004b70004b80004b90004ba0004bb0004bc0004bd0004be0004bf0004c00004cf0004c10004c20004c30004c40004c50004c60004c70004c80004c90004ca0004cb0004cc0004cd0004ce0004d00004d10004d20004d30004d40004d50004d60004d70004d80004d90004da0004db0004dc0004dd0004de0004df0004e00004e10004e20004e30004e40004e50004e60004e70004e80004e90004ea0004eb0004ec0004ed0004ee0004ef0004f00004f10004f20004f30004f40004f50004f60004f70004f80004f90004fa0004fb0004fc0004fd0004fe0004ff00050000050100050200050300050400050500050600050700050800050900050a00050b00050c00050d00050e00050f00051000051100051200051300051400051500051600051700051800051900051a00051b00051c00051d00051e00051f00052000052100052200052300052400052500052600052700053100056100053200056200053300056300053400056400053500056500053600056600053700056700053800056800053900056900053a00056a00053b00056b00053c00056c00053d00056d00053e00056e00053f00056f00054000057000054100057100054200057200054300057300054400057400054500057500054600057600054700057700054800057800054900057900054a00057a00054b00057b00054c00057c00054d00057d00054e00057e00054f00057f0005500005800005510005810005520005820005530005830005540005840005550005850005560005860010a0002d000010a1002d010010a2002d020010a3002d030010a4002d040010a5002d050010a6002d060010a7002d070010a8002d080010a9002d090010aa002d0a0010ab002d0b0010ac002d0c0010ad002d0d0010ae002d0e0010af002d0f0010b0002d100010b1002d110010b2002d120010b3002d130010b4002d140010b5002d150010b6002d160010b7002d170010b8002d180010b9002d190010ba002d1a0010bb002d1b0010bc002d1c0010bd002d1d0010be002d1e0010bf002d1f0010c0002d200010c1002d210010c2002d220010c3002d230010c4002d240010c5002d250010c7002d270010cd002d2d001e00001e01001e02001e03001e04001e05001e06001e07001e08001e09001e0a001e0b001e0c001e0d001e0e001e0f001e10001e11001e12001e13001e14001e15001e16001e17001e18001e19001e1a001e1b001e1c001e1d001e1e001e1f001e20001e21001e22001e23001e24001e25001e26001e27001e28001e29001e2a001e2b001e2c001e2d001e2e001e2f001e30001e31001e32001e33001e34001e35001e36001e37001e38001e39001e3a001e3b001e3c001e3d001e3e001e3f001e40001e41001e42001e43001e44001e45001e46001e47001e48001e49001e4a001e4b001e4c001e4d001e4e001e4f001e50001e51001e52001e53001e54001e55001e56001e57001e58001e59001e5a001e5b001e5c001e5d001e5e001e5f001e60001e61001e62001e63001e64001e65001e66001e67001e68001e69001e6a001e6b001e6c001e6d001e6e001e6f001e70001e71001e72001e73001e74001e75001e76001e77001e78001e79001e7a001e7b001e7c001e7d001e7e001e7f001e80001e81001e82001e83001e84001e85001e86001e87001e88001e89001e8a001e8b001e8c001e8d001e8e001e8f001e90001e91001e92001e93001e94001e95001e9e0000df001ea0001ea1001ea2001ea3001ea4001ea5001ea6001ea7001ea8001ea9001eaa001eab001eac001ead001eae001eaf001eb0001eb1001eb2001eb3001eb4001eb5001eb6001eb7001eb8001eb9001eba001ebb001ebc001ebd001ebe001ebf001ec0001ec1001ec2001ec3001ec4001ec5001ec6001ec7001ec8001ec9001eca001ecb001ecc001ecd001ece001ecf001ed0001ed1001ed2001ed3001ed4001ed5001ed6001ed7001ed8001ed9001eda001edb001edc001edd001ede001edf001ee0001ee1001ee2001ee3001ee4001ee5001ee6001ee7001ee8001ee9001eea001eeb001eec001eed001eee001eef001ef0001ef1001ef2001ef3001ef4001ef5001ef6001ef7001ef8001ef9001efa001efb001efc001efd001efe001eff001f08001f00001f09001f01001f0a001f02001f0b001f03001f0c001f04001f0d001f05001f0e001f06001f0f001f07001f18001f10001f19001f11001f1a001f12001f1b001f13001f1c001f14001f1d001f15001f28001f20001f29001f21001f2a001f22001f2b001f23001f2c001f24001f2d001f25001f2e001f26001f2f001f27001f38001f30001f39001f31001f3a001f32001f3b001f33001f3c001f34001f3d001f35001f3e001f36001f3f001f37001f48001f40001f49001f41001f4a001f42001f4b001f43001f4c001f44001f4d001f45001f59001f51001f5b001f53001f5d001f55001f5f001f57001f68001f60001f69001f61001f6a001f62001f6b001f63001f6c001f64001f6d001f65001f6e001f66001f6f001f67001f88001f80001f89001f81001f8a001f82001f8b001f83001f8c001f84001f8d001f85001f8e001f86001f8f001f87001f98001f90001f99001f91001f9a001f92001f9b001f93001f9c001f94001f9d001f95001f9e001f96001f9f001f97001fa8001fa0001fa9001fa1001faa001fa2001fab001fa3001fac001fa4001fad001fa5001fae001fa6001faf001fa7001fb8001fb0001fb9001fb1001fba001f70001fbb001f71001fbc001fb3001fc8001f72001fc9001f73001fca001f74001fcb001f75001fcc001fc3001fd8001fd0001fd9001fd1001fda001f76001fdb001f77001fe8001fe0001fe9001fe1001fea001f7a001feb001f7b001fec001fe5001ff8001f78001ff9001f79001ffa001f7c001ffb001f7d001ffc001ff30021260003c900212a00006b00212b0000e500213200214e00216000217000216100217100216200217200216300217300216400217400216500217500216600217600216700217700216800217800216900217900216a00217a00216b00217b00216c00217c00216d00217d00216e00217e00216f00217f0021830021840024b60024d00024b70024d10024b80024d20024b90024d30024ba0024d40024bb0024d50024bc0024d60024bd0024d70024be0024d80024bf0024d90024c00024da0024c10024db0024c20024dc0024c30024dd0024c40024de0024c50024df0024c60024e00024c70024e10024c80024e20024c90024e30024ca0024e40024cb0024e50024cc0024e60024cd0024e70024ce0024e80024cf0024e9002c00002c30002c01002c31002c02002c32002c03002c33002c04002c34002c05002c35002c06002c36002c07002c37002c08002c38002c09002c39002c0a002c3a002c0b002c3b002c0c002c3c002c0d002c3d002c0e002c3e002c0f002c3f002c10002c40002c11002c41002c12002c42002c13002c43002c14002c44002c15002c45002c16002c46002c17002c47002c18002c48002c19002c49002c1a002c4a002c1b002c4b002c1c002c4c002c1d002c4d002c1e002c4e002c1f002c4f002c20002c50002c21002c51002c22002c52002c23002c53002c24002c54002c25002c55002c26002c56002c27002c57002c28002c58002c29002c59002c2a002c5a002c2b002c5b002c2c002c5c002c2d002c5d002c2e002c5e002c60002c61002c6200026b002c63001d7d002c6400027d002c67002c68002c69002c6a002c6b002c6c002c6d000251002c6e000271002c6f000250002c70000252002c72002c73002c75002c76002c7e00023f002c7f000240002c80002c81002c82002c83002c84002c85002c86002c87002c88002c89002c8a002c8b002c8c002c8d002c8e002c8f002c90002c91002c92002c93002c94002c95002c96002c97002c98002c99002c9a002c9b002c9c002c9d002c9e002c9f002ca0002ca1002ca2002ca3002ca4002ca5002ca6002ca7002ca8002ca9002caa002cab002cac002cad002cae002caf002cb0002cb1002cb2002cb3002cb4002cb5002cb6002cb7002cb8002cb9002cba002cbb002cbc002cbd002cbe002cbf002cc0002cc1002cc2002cc3002cc4002cc5002cc6002cc7002cc8002cc9002cca002ccb002ccc002ccd002cce002ccf002cd0002cd1002cd2002cd3002cd4002cd5002cd6002cd7002cd8002cd9002cda002cdb002cdc002cdd002cde002cdf002ce0002ce1002ce2002ce3002ceb002cec002ced002cee002cf2002cf300a64000a64100a64200a64300a64400a64500a64600a64700a64800a64900a64a00a64b00a64c00a64d00a64e00a64f00a65000a65100a65200a65300a65400a65500a65600a65700a65800a65900a65a00a65b00a65c00a65d00a65e00a65f00a66000a66100a66200a66300a66400a66500a66600a66700a66800a66900a66a00a66b00a66c00a66d00a68000a68100a68200a68300a68400a68500a68600a68700a68800a68900a68a00a68b00a68c00a68d00a68e00a68f00a69000a69100a69200a69300a69400a69500a69600a69700a72200a72300a72400a72500a72600a72700a72800a72900a72a00a72b00a72c00a72d00a72e00a72f00a73200a73300a73400a73500a73600a73700a73800a73900a73a00a73b00a73c00a73d00a73e00a73f00a74000a74100a74200a74300a74400a74500a74600a74700a74800a74900a74a00a74b00a74c00a74d00a74e00a74f00a75000a75100a75200a75300a75400a75500a75600a75700a75800a75900a75a00a75b00a75c00a75d00a75e00a75f00a76000a76100a76200a76300a76400a76500a76600a76700a76800a76900a76a00a76b00a76c00a76d00a76e00a76f00a77900a77a00a77b00a77c00a77d001d7900a77e00a77f00a78000a78100a78200a78300a78400a78500a78600a78700a78b00a78c00a78d00026500a79000a79100a79200a79300a7a000a7a100a7a200a7a300a7a400a7a500a7a600a7a700a7a800a7a900a7aa00026600ff2100ff4100ff2200ff4200ff2300ff4300ff2400ff4400ff2500ff4500ff2600ff4600ff2700ff4700ff2800ff4800ff2900ff4900ff2a00ff4a00ff2b00ff4b00ff2c00ff4c00ff2d00ff4d00ff2e00ff4e00ff2f00ff4f00ff3000ff5000ff3100ff5100ff3200ff5200ff3300ff5300ff3400ff5400ff3500ff5500ff3600ff5600ff3700ff5700ff3800ff5800ff3900ff5900ff3a00ff5a01040001042801040101042901040201042a01040301042b01040401042c01040501042d01040601042e01040701042f01040801043001040901043101040a01043201040b01043301040c01043401040d01043501040e01043601040f01043701041001043801041101043901041201043a01041301043b01041401043c01041501043d01041601043e01041701043f01041801044001041901044101041a01044201041b01044301041c01044401041d01044501041e01044601041f01044701042001044801042101044901042201044a01042301044b01042401044c01042501044d01042601044e01042701044f -------------------------------------------------------------------------------- /cdata/simple_uc_map: -------------------------------------------------------------------------------- 1 | 00006100004100006200004200006300004300006400004400006500004500006600004600006700004700006800004800006900004900006a00004a00006b00004b00006c00004c00006d00004d00006e00004e00006f00004f00007000005000007100005100007200005200007300005300007400005400007500005500007600005600007700005700007800005800007900005900007a00005a0000b500039c0000e00000c00000e10000c10000e20000c20000e30000c30000e40000c40000e50000c50000e60000c60000e70000c70000e80000c80000e90000c90000ea0000ca0000eb0000cb0000ec0000cc0000ed0000cd0000ee0000ce0000ef0000cf0000f00000d00000f10000d10000f20000d20000f30000d30000f40000d40000f50000d50000f60000d60000f80000d80000f90000d90000fa0000da0000fb0000db0000fc0000dc0000fd0000dd0000fe0000de0000ff00017800010100010000010300010200010500010400010700010600010900010800010b00010a00010d00010c00010f00010e00011100011000011300011200011500011400011700011600011900011800011b00011a00011d00011c00011f00011e00012100012000012300012200012500012400012700012600012900012800012b00012a00012d00012c00012f00012e00013100004900013300013200013500013400013700013600013a00013900013c00013b00013e00013d00014000013f00014200014100014400014300014600014500014800014700014b00014a00014d00014c00014f00014e00015100015000015300015200015500015400015700015600015900015800015b00015a00015d00015c00015f00015e00016100016000016300016200016500016400016700016600016900016800016b00016a00016d00016c00016f00016e00017100017000017300017200017500017400017700017600017a00017900017c00017b00017e00017d00017f00005300018000024300018300018200018500018400018800018700018c00018b0001920001910001950001f600019900019800019a00023d00019e0002200001a10001a00001a30001a20001a50001a40001a80001a70001ad0001ac0001b00001af0001b40001b30001b60001b50001b90001b80001bd0001bc0001bf0001f70001c50001c40001c60001c40001c80001c70001c90001c70001cb0001ca0001cc0001ca0001ce0001cd0001d00001cf0001d20001d10001d40001d30001d60001d50001d80001d70001da0001d90001dc0001db0001dd00018e0001df0001de0001e10001e00001e30001e20001e50001e40001e70001e60001e90001e80001eb0001ea0001ed0001ec0001ef0001ee0001f20001f10001f30001f10001f50001f40001f90001f80001fb0001fa0001fd0001fc0001ff0001fe00020100020000020300020200020500020400020700020600020900020800020b00020a00020d00020c00020f00020e00021100021000021300021200021500021400021700021600021900021800021b00021a00021d00021c00021f00021e00022300022200022500022400022700022600022900022800022b00022a00022d00022c00022f00022e00023100023000023300023200023c00023b00023f002c7e000240002c7f00024200024100024700024600024900024800024b00024a00024d00024c00024f00024e000250002c6f000251002c6d000252002c7000025300018100025400018600025600018900025700018a00025900018f00025b00019000026000019300026300019400026500a78d00026600a7aa00026800019700026900019600026b002c6200026f00019c000271002c6e00027200019d00027500019f00027d002c640002800001a60002830001a90002880001ae00028900024400028a0001b100028b0001b200028c0002450002920001b700034500039900037100037000037300037200037700037600037b0003fd00037c0003fe00037d0003ff0003ac0003860003ad0003880003ae0003890003af00038a0003b10003910003b20003920003b30003930003b40003940003b50003950003b60003960003b70003970003b80003980003b90003990003ba00039a0003bb00039b0003bc00039c0003bd00039d0003be00039e0003bf00039f0003c00003a00003c10003a10003c20003a30003c30003a30003c40003a40003c50003a50003c60003a60003c70003a70003c80003a80003c90003a90003ca0003aa0003cb0003ab0003cc00038c0003cd00038e0003ce00038f0003d00003920003d10003980003d50003a60003d60003a00003d70003cf0003d90003d80003db0003da0003dd0003dc0003df0003de0003e10003e00003e30003e20003e50003e40003e70003e60003e90003e80003eb0003ea0003ed0003ec0003ef0003ee0003f000039a0003f10003a10003f20003f90003f50003950003f80003f70003fb0003fa00043000041000043100041100043200041200043300041300043400041400043500041500043600041600043700041700043800041800043900041900043a00041a00043b00041b00043c00041c00043d00041d00043e00041e00043f00041f00044000042000044100042100044200042200044300042300044400042400044500042500044600042600044700042700044800042800044900042900044a00042a00044b00042b00044c00042c00044d00042d00044e00042e00044f00042f00045000040000045100040100045200040200045300040300045400040400045500040500045600040600045700040700045800040800045900040900045a00040a00045b00040b00045c00040c00045d00040d00045e00040e00045f00040f00046100046000046300046200046500046400046700046600046900046800046b00046a00046d00046c00046f00046e00047100047000047300047200047500047400047700047600047900047800047b00047a00047d00047c00047f00047e00048100048000048b00048a00048d00048c00048f00048e00049100049000049300049200049500049400049700049600049900049800049b00049a00049d00049c00049f00049e0004a10004a00004a30004a20004a50004a40004a70004a60004a90004a80004ab0004aa0004ad0004ac0004af0004ae0004b10004b00004b30004b20004b50004b40004b70004b60004b90004b80004bb0004ba0004bd0004bc0004bf0004be0004c20004c10004c40004c30004c60004c50004c80004c70004ca0004c90004cc0004cb0004ce0004cd0004cf0004c00004d10004d00004d30004d20004d50004d40004d70004d60004d90004d80004db0004da0004dd0004dc0004df0004de0004e10004e00004e30004e20004e50004e40004e70004e60004e90004e80004eb0004ea0004ed0004ec0004ef0004ee0004f10004f00004f30004f20004f50004f40004f70004f60004f90004f80004fb0004fa0004fd0004fc0004ff0004fe00050100050000050300050200050500050400050700050600050900050800050b00050a00050d00050c00050f00050e00051100051000051300051200051500051400051700051600051900051800051b00051a00051d00051c00051f00051e00052100052000052300052200052500052400052700052600056100053100056200053200056300053300056400053400056500053500056600053600056700053700056800053800056900053900056a00053a00056b00053b00056c00053c00056d00053d00056e00053e00056f00053f00057000054000057100054100057200054200057300054300057400054400057500054500057600054600057700054700057800054800057900054900057a00054a00057b00054b00057c00054c00057d00054d00057e00054e00057f00054f000580000550000581000551000582000552000583000553000584000554000585000555000586000556001d7900a77d001d7d002c63001e01001e00001e03001e02001e05001e04001e07001e06001e09001e08001e0b001e0a001e0d001e0c001e0f001e0e001e11001e10001e13001e12001e15001e14001e17001e16001e19001e18001e1b001e1a001e1d001e1c001e1f001e1e001e21001e20001e23001e22001e25001e24001e27001e26001e29001e28001e2b001e2a001e2d001e2c001e2f001e2e001e31001e30001e33001e32001e35001e34001e37001e36001e39001e38001e3b001e3a001e3d001e3c001e3f001e3e001e41001e40001e43001e42001e45001e44001e47001e46001e49001e48001e4b001e4a001e4d001e4c001e4f001e4e001e51001e50001e53001e52001e55001e54001e57001e56001e59001e58001e5b001e5a001e5d001e5c001e5f001e5e001e61001e60001e63001e62001e65001e64001e67001e66001e69001e68001e6b001e6a001e6d001e6c001e6f001e6e001e71001e70001e73001e72001e75001e74001e77001e76001e79001e78001e7b001e7a001e7d001e7c001e7f001e7e001e81001e80001e83001e82001e85001e84001e87001e86001e89001e88001e8b001e8a001e8d001e8c001e8f001e8e001e91001e90001e93001e92001e95001e94001e9b001e60001ea1001ea0001ea3001ea2001ea5001ea4001ea7001ea6001ea9001ea8001eab001eaa001ead001eac001eaf001eae001eb1001eb0001eb3001eb2001eb5001eb4001eb7001eb6001eb9001eb8001ebb001eba001ebd001ebc001ebf001ebe001ec1001ec0001ec3001ec2001ec5001ec4001ec7001ec6001ec9001ec8001ecb001eca001ecd001ecc001ecf001ece001ed1001ed0001ed3001ed2001ed5001ed4001ed7001ed6001ed9001ed8001edb001eda001edd001edc001edf001ede001ee1001ee0001ee3001ee2001ee5001ee4001ee7001ee6001ee9001ee8001eeb001eea001eed001eec001eef001eee001ef1001ef0001ef3001ef2001ef5001ef4001ef7001ef6001ef9001ef8001efb001efa001efd001efc001eff001efe001f00001f08001f01001f09001f02001f0a001f03001f0b001f04001f0c001f05001f0d001f06001f0e001f07001f0f001f10001f18001f11001f19001f12001f1a001f13001f1b001f14001f1c001f15001f1d001f20001f28001f21001f29001f22001f2a001f23001f2b001f24001f2c001f25001f2d001f26001f2e001f27001f2f001f30001f38001f31001f39001f32001f3a001f33001f3b001f34001f3c001f35001f3d001f36001f3e001f37001f3f001f40001f48001f41001f49001f42001f4a001f43001f4b001f44001f4c001f45001f4d001f51001f59001f53001f5b001f55001f5d001f57001f5f001f60001f68001f61001f69001f62001f6a001f63001f6b001f64001f6c001f65001f6d001f66001f6e001f67001f6f001f70001fba001f71001fbb001f72001fc8001f73001fc9001f74001fca001f75001fcb001f76001fda001f77001fdb001f78001ff8001f79001ff9001f7a001fea001f7b001feb001f7c001ffa001f7d001ffb001f80001f88001f81001f89001f82001f8a001f83001f8b001f84001f8c001f85001f8d001f86001f8e001f87001f8f001f90001f98001f91001f99001f92001f9a001f93001f9b001f94001f9c001f95001f9d001f96001f9e001f97001f9f001fa0001fa8001fa1001fa9001fa2001faa001fa3001fab001fa4001fac001fa5001fad001fa6001fae001fa7001faf001fb0001fb8001fb1001fb9001fb3001fbc001fbe000399001fc3001fcc001fd0001fd8001fd1001fd9001fe0001fe8001fe1001fe9001fe5001fec001ff3001ffc00214e00213200217000216000217100216100217200216200217300216300217400216400217500216500217600216600217700216700217800216800217900216900217a00216a00217b00216b00217c00216c00217d00216d00217e00216e00217f00216f0021840021830024d00024b60024d10024b70024d20024b80024d30024b90024d40024ba0024d50024bb0024d60024bc0024d70024bd0024d80024be0024d90024bf0024da0024c00024db0024c10024dc0024c20024dd0024c30024de0024c40024df0024c50024e00024c60024e10024c70024e20024c80024e30024c90024e40024ca0024e50024cb0024e60024cc0024e70024cd0024e80024ce0024e90024cf002c30002c00002c31002c01002c32002c02002c33002c03002c34002c04002c35002c05002c36002c06002c37002c07002c38002c08002c39002c09002c3a002c0a002c3b002c0b002c3c002c0c002c3d002c0d002c3e002c0e002c3f002c0f002c40002c10002c41002c11002c42002c12002c43002c13002c44002c14002c45002c15002c46002c16002c47002c17002c48002c18002c49002c19002c4a002c1a002c4b002c1b002c4c002c1c002c4d002c1d002c4e002c1e002c4f002c1f002c50002c20002c51002c21002c52002c22002c53002c23002c54002c24002c55002c25002c56002c26002c57002c27002c58002c28002c59002c29002c5a002c2a002c5b002c2b002c5c002c2c002c5d002c2d002c5e002c2e002c61002c60002c6500023a002c6600023e002c68002c67002c6a002c69002c6c002c6b002c73002c72002c76002c75002c81002c80002c83002c82002c85002c84002c87002c86002c89002c88002c8b002c8a002c8d002c8c002c8f002c8e002c91002c90002c93002c92002c95002c94002c97002c96002c99002c98002c9b002c9a002c9d002c9c002c9f002c9e002ca1002ca0002ca3002ca2002ca5002ca4002ca7002ca6002ca9002ca8002cab002caa002cad002cac002caf002cae002cb1002cb0002cb3002cb2002cb5002cb4002cb7002cb6002cb9002cb8002cbb002cba002cbd002cbc002cbf002cbe002cc1002cc0002cc3002cc2002cc5002cc4002cc7002cc6002cc9002cc8002ccb002cca002ccd002ccc002ccf002cce002cd1002cd0002cd3002cd2002cd5002cd4002cd7002cd6002cd9002cd8002cdb002cda002cdd002cdc002cdf002cde002ce1002ce0002ce3002ce2002cec002ceb002cee002ced002cf3002cf2002d000010a0002d010010a1002d020010a2002d030010a3002d040010a4002d050010a5002d060010a6002d070010a7002d080010a8002d090010a9002d0a0010aa002d0b0010ab002d0c0010ac002d0d0010ad002d0e0010ae002d0f0010af002d100010b0002d110010b1002d120010b2002d130010b3002d140010b4002d150010b5002d160010b6002d170010b7002d180010b8002d190010b9002d1a0010ba002d1b0010bb002d1c0010bc002d1d0010bd002d1e0010be002d1f0010bf002d200010c0002d210010c1002d220010c2002d230010c3002d240010c4002d250010c5002d270010c7002d2d0010cd00a64100a64000a64300a64200a64500a64400a64700a64600a64900a64800a64b00a64a00a64d00a64c00a64f00a64e00a65100a65000a65300a65200a65500a65400a65700a65600a65900a65800a65b00a65a00a65d00a65c00a65f00a65e00a66100a66000a66300a66200a66500a66400a66700a66600a66900a66800a66b00a66a00a66d00a66c00a68100a68000a68300a68200a68500a68400a68700a68600a68900a68800a68b00a68a00a68d00a68c00a68f00a68e00a69100a69000a69300a69200a69500a69400a69700a69600a72300a72200a72500a72400a72700a72600a72900a72800a72b00a72a00a72d00a72c00a72f00a72e00a73300a73200a73500a73400a73700a73600a73900a73800a73b00a73a00a73d00a73c00a73f00a73e00a74100a74000a74300a74200a74500a74400a74700a74600a74900a74800a74b00a74a00a74d00a74c00a74f00a74e00a75100a75000a75300a75200a75500a75400a75700a75600a75900a75800a75b00a75a00a75d00a75c00a75f00a75e00a76100a76000a76300a76200a76500a76400a76700a76600a76900a76800a76b00a76a00a76d00a76c00a76f00a76e00a77a00a77900a77c00a77b00a77f00a77e00a78100a78000a78300a78200a78500a78400a78700a78600a78c00a78b00a79100a79000a79300a79200a7a100a7a000a7a300a7a200a7a500a7a400a7a700a7a600a7a900a7a800ff4100ff2100ff4200ff2200ff4300ff2300ff4400ff2400ff4500ff2500ff4600ff2600ff4700ff2700ff4800ff2800ff4900ff2900ff4a00ff2a00ff4b00ff2b00ff4c00ff2c00ff4d00ff2d00ff4e00ff2e00ff4f00ff2f00ff5000ff3000ff5100ff3100ff5200ff3200ff5300ff3300ff5400ff3400ff5500ff3500ff5600ff3600ff5700ff3700ff5800ff3800ff5900ff3900ff5a00ff3a01042801040001042901040101042a01040201042b01040301042c01040401042d01040501042e01040601042f01040701043001040801043101040901043201040a01043301040b01043401040c01043501040d01043601040e01043701040f01043801041001043901041101043a01041201043b01041301043c01041401043d01041501043e01041601043f01041701044001041801044101041901044201041a01044301041b01044401041c01044501041d01044601041e01044701041f01044801042001044901042101044a01042201044b01042301044c01042401044d01042501044e01042601044f010427 -------------------------------------------------------------------------------- /cdata/soft_dotted_set: -------------------------------------------------------------------------------- 1 | 00006900006a00012f00024900026800029d0002b20003f3000456000458001d62001d96001da4001da8001e2d001ecb002071002148002149002c7c01d42201d42301d45601d45701d48a01d48b01d4be01d4bf01d4f201d4f301d52601d52701d55a01d55b01d58e01d58f01d5c201d5c301d5f601d5f701d62a01d62b01d65e01d65f01d69201d693 -------------------------------------------------------------------------------- /cdata/special_lc_map: -------------------------------------------------------------------------------- 1 | 000130000069000307xxxxxx001f88001f80xxxxxx001f89001f81xxxxxx001f8a001f82xxxxxx001f8b001f83xxxxxx001f8c001f84xxxxxx001f8d001f85xxxxxx001f8e001f86xxxxxx001f8f001f87xxxxxx001f98001f90xxxxxx001f99001f91xxxxxx001f9a001f92xxxxxx001f9b001f93xxxxxx001f9c001f94xxxxxx001f9d001f95xxxxxx001f9e001f96xxxxxx001f9f001f97xxxxxx001fa8001fa0xxxxxx001fa9001fa1xxxxxx001faa001fa2xxxxxx001fab001fa3xxxxxx001fac001fa4xxxxxx001fad001fa5xxxxxx001fae001fa6xxxxxx001faf001fa7xxxxxx001fbc001fb3xxxxxx001fcc001fc3xxxxxx001ffc001ff3xxxxxx -------------------------------------------------------------------------------- /cdata/special_tc_map: -------------------------------------------------------------------------------- 1 | 0000df000053000073xxxxxx00fb00000046000066xxxxxx00fb01000046000069xxxxxx00fb0200004600006cxxxxxx00fb03000046000066000069xxxxxx00fb0400004600006600006cxxxxxx00fb05000053000074xxxxxx00fb06000053000074xxxxxx000587000535000582xxxxxx00fb13000544000576xxxxxx00fb14000544000565xxxxxx00fb1500054400056bxxxxxx00fb1600054e000576xxxxxx00fb1700054400056dxxxxxx0001490002bc00004exxxxxx000390000399000308000301xxxxxx0003b00003a5000308000301xxxxxx0001f000004a00030cxxxxxx001e96000048000331xxxxxx001e97000054000308xxxxxx001e9800005700030axxxxxx001e9900005900030axxxxxx001e9a0000410002bexxxxxx001f500003a5000313xxxxxx001f520003a5000313000300xxxxxx001f540003a5000313000301xxxxxx001f560003a5000313000342xxxxxx001fb6000391000342xxxxxx001fc6000397000342xxxxxx001fd2000399000308000300xxxxxx001fd3000399000308000301xxxxxx001fd6000399000342xxxxxx001fd7000399000308000342xxxxxx001fe20003a5000308000300xxxxxx001fe30003a5000308000301xxxxxx001fe40003a1000313xxxxxx001fe60003a5000342xxxxxx001fe70003a5000308000342xxxxxx001ff60003a9000342xxxxxx001f80001f88xxxxxx001f81001f89xxxxxx001f82001f8axxxxxx001f83001f8bxxxxxx001f84001f8cxxxxxx001f85001f8dxxxxxx001f86001f8exxxxxx001f87001f8fxxxxxx001f90001f98xxxxxx001f91001f99xxxxxx001f92001f9axxxxxx001f93001f9bxxxxxx001f94001f9cxxxxxx001f95001f9dxxxxxx001f96001f9exxxxxx001f97001f9fxxxxxx001fa0001fa8xxxxxx001fa1001fa9xxxxxx001fa2001faaxxxxxx001fa3001fabxxxxxx001fa4001facxxxxxx001fa5001fadxxxxxx001fa6001faexxxxxx001fa7001fafxxxxxx001fb3001fbcxxxxxx001fc3001fccxxxxxx001ff3001ffcxxxxxx001fb2001fba000345xxxxxx001fb4000386000345xxxxxx001fc2001fca000345xxxxxx001fc4000389000345xxxxxx001ff2001ffa000345xxxxxx001ff400038f000345xxxxxx001fb7000391000342000345xxxxxx001fc7000397000342000345xxxxxx001ff70003a9000342000345xxxxxx -------------------------------------------------------------------------------- /cdata/special_uc_map: -------------------------------------------------------------------------------- 1 | 0000df000053000053xxxxxx00fb00000046000046xxxxxx00fb01000046000049xxxxxx00fb0200004600004cxxxxxx00fb03000046000046000049xxxxxx00fb0400004600004600004cxxxxxx00fb05000053000054xxxxxx00fb06000053000054xxxxxx000587000535000552xxxxxx00fb13000544000546xxxxxx00fb14000544000535xxxxxx00fb1500054400053bxxxxxx00fb1600054e000546xxxxxx00fb1700054400053dxxxxxx0001490002bc00004exxxxxx000390000399000308000301xxxxxx0003b00003a5000308000301xxxxxx0001f000004a00030cxxxxxx001e96000048000331xxxxxx001e97000054000308xxxxxx001e9800005700030axxxxxx001e9900005900030axxxxxx001e9a0000410002bexxxxxx001f500003a5000313xxxxxx001f520003a5000313000300xxxxxx001f540003a5000313000301xxxxxx001f560003a5000313000342xxxxxx001fb6000391000342xxxxxx001fc6000397000342xxxxxx001fd2000399000308000300xxxxxx001fd3000399000308000301xxxxxx001fd6000399000342xxxxxx001fd7000399000308000342xxxxxx001fe20003a5000308000300xxxxxx001fe30003a5000308000301xxxxxx001fe40003a1000313xxxxxx001fe60003a5000342xxxxxx001fe70003a5000308000342xxxxxx001ff60003a9000342xxxxxx001f80001f08000399xxxxxx001f81001f09000399xxxxxx001f82001f0a000399xxxxxx001f83001f0b000399xxxxxx001f84001f0c000399xxxxxx001f85001f0d000399xxxxxx001f86001f0e000399xxxxxx001f87001f0f000399xxxxxx001f88001f08000399xxxxxx001f89001f09000399xxxxxx001f8a001f0a000399xxxxxx001f8b001f0b000399xxxxxx001f8c001f0c000399xxxxxx001f8d001f0d000399xxxxxx001f8e001f0e000399xxxxxx001f8f001f0f000399xxxxxx001f90001f28000399xxxxxx001f91001f29000399xxxxxx001f92001f2a000399xxxxxx001f93001f2b000399xxxxxx001f94001f2c000399xxxxxx001f95001f2d000399xxxxxx001f96001f2e000399xxxxxx001f97001f2f000399xxxxxx001f98001f28000399xxxxxx001f99001f29000399xxxxxx001f9a001f2a000399xxxxxx001f9b001f2b000399xxxxxx001f9c001f2c000399xxxxxx001f9d001f2d000399xxxxxx001f9e001f2e000399xxxxxx001f9f001f2f000399xxxxxx001fa0001f68000399xxxxxx001fa1001f69000399xxxxxx001fa2001f6a000399xxxxxx001fa3001f6b000399xxxxxx001fa4001f6c000399xxxxxx001fa5001f6d000399xxxxxx001fa6001f6e000399xxxxxx001fa7001f6f000399xxxxxx001fa8001f68000399xxxxxx001fa9001f69000399xxxxxx001faa001f6a000399xxxxxx001fab001f6b000399xxxxxx001fac001f6c000399xxxxxx001fad001f6d000399xxxxxx001fae001f6e000399xxxxxx001faf001f6f000399xxxxxx001fb3000391000399xxxxxx001fbc000391000399xxxxxx001fc3000397000399xxxxxx001fcc000397000399xxxxxx001ff30003a9000399xxxxxx001ffc0003a9000399xxxxxx001fb2001fba000399xxxxxx001fb4000386000399xxxxxx001fc2001fca000399xxxxxx001fc4000389000399xxxxxx001ff2001ffa000399xxxxxx001ff400038f000399xxxxxx001fb7000391000342000399xxxxxx001fc7000397000342000399xxxxxx001ff70003a9000342000399xxxxxx -------------------------------------------------------------------------------- /cdata/white_space_set: -------------------------------------------------------------------------------- 1 | 00000900000a00000b00000c00000d0000200000850000a000168000180e00200000200100200200200300200400200500200600200700200800200900200a00202800202900202f00205f003000 -------------------------------------------------------------------------------- /data/Jamo.txt: -------------------------------------------------------------------------------- 1 | # Jamo-6.2.0.txt 2 | # Date: 2012-05-15, 22:23:00 GMT [KW, LI] 3 | # 4 | # Unicode Character Database 5 | # Copyright (c) 1991-2012 Unicode, Inc. 6 | # For terms of use, see http://www.unicode.org/terms_of_use.html 7 | # For documentation, see http://www.unicode.org/reports/tr44/ 8 | # 9 | # This file defines the Jamo_Short_Name property. 10 | # 11 | # See Section 3.12 of The Unicode Standard, Version 6.2 12 | # for more information. 13 | # 14 | # Each line contains two fields, separated by a semicolon. 15 | # 16 | # The first field gives the code point, in 4-digit hexadecimal 17 | # form, of a conjoining jamo character that participates in the 18 | # algorithmic determination of Hangul syllable character names. 19 | # The second field gives the Jamo_Short_Name as a one-, two-, 20 | # or three-character ASCII string (or in one case, for U+110B, 21 | # the null string). 22 | # 23 | # ############################################################# 24 | 25 | 1100; G # HANGUL CHOSEONG KIYEOK 26 | 1101; GG # HANGUL CHOSEONG SSANGKIYEOK 27 | 1102; N # HANGUL CHOSEONG NIEUN 28 | 1103; D # HANGUL CHOSEONG TIKEUT 29 | 1104; DD # HANGUL CHOSEONG SSANGTIKEUT 30 | 1105; R # HANGUL CHOSEONG RIEUL 31 | 1106; M # HANGUL CHOSEONG MIEUM 32 | 1107; B # HANGUL CHOSEONG PIEUP 33 | 1108; BB # HANGUL CHOSEONG SSANGPIEUP 34 | 1109; S # HANGUL CHOSEONG SIOS 35 | 110A; SS # HANGUL CHOSEONG SSANGSIOS 36 | 110B; # HANGUL CHOSEONG IEUNG 37 | 110C; J # HANGUL CHOSEONG CIEUC 38 | 110D; JJ # HANGUL CHOSEONG SSANGCIEUC 39 | 110E; C # HANGUL CHOSEONG CHIEUCH 40 | 110F; K # HANGUL CHOSEONG KHIEUKH 41 | 1110; T # HANGUL CHOSEONG THIEUTH 42 | 1111; P # HANGUL CHOSEONG PHIEUPH 43 | 1112; H # HANGUL CHOSEONG HIEUH 44 | 1161; A # HANGUL JUNGSEONG A 45 | 1162; AE # HANGUL JUNGSEONG AE 46 | 1163; YA # HANGUL JUNGSEONG YA 47 | 1164; YAE # HANGUL JUNGSEONG YAE 48 | 1165; EO # HANGUL JUNGSEONG EO 49 | 1166; E # HANGUL JUNGSEONG E 50 | 1167; YEO # HANGUL JUNGSEONG YEO 51 | 1168; YE # HANGUL JUNGSEONG YE 52 | 1169; O # HANGUL JUNGSEONG O 53 | 116A; WA # HANGUL JUNGSEONG WA 54 | 116B; WAE # HANGUL JUNGSEONG WAE 55 | 116C; OE # HANGUL JUNGSEONG OE 56 | 116D; YO # HANGUL JUNGSEONG YO 57 | 116E; U # HANGUL JUNGSEONG U 58 | 116F; WEO # HANGUL JUNGSEONG WEO 59 | 1170; WE # HANGUL JUNGSEONG WE 60 | 1171; WI # HANGUL JUNGSEONG WI 61 | 1172; YU # HANGUL JUNGSEONG YU 62 | 1173; EU # HANGUL JUNGSEONG EU 63 | 1174; YI # HANGUL JUNGSEONG YI 64 | 1175; I # HANGUL JUNGSEONG I 65 | 11A8; G # HANGUL JONGSEONG KIYEOK 66 | 11A9; GG # HANGUL JONGSEONG SSANGKIYEOK 67 | 11AA; GS # HANGUL JONGSEONG KIYEOK-SIOS 68 | 11AB; N # HANGUL JONGSEONG NIEUN 69 | 11AC; NJ # HANGUL JONGSEONG NIEUN-CIEUC 70 | 11AD; NH # HANGUL JONGSEONG NIEUN-HIEUH 71 | 11AE; D # HANGUL JONGSEONG TIKEUT 72 | 11AF; L # HANGUL JONGSEONG RIEUL 73 | 11B0; LG # HANGUL JONGSEONG RIEUL-KIYEOK 74 | 11B1; LM # HANGUL JONGSEONG RIEUL-MIEUM 75 | 11B2; LB # HANGUL JONGSEONG RIEUL-PIEUP 76 | 11B3; LS # HANGUL JONGSEONG RIEUL-SIOS 77 | 11B4; LT # HANGUL JONGSEONG RIEUL-THIEUTH 78 | 11B5; LP # HANGUL JONGSEONG RIEUL-PHIEUPH 79 | 11B6; LH # HANGUL JONGSEONG RIEUL-HIEUH 80 | 11B7; M # HANGUL JONGSEONG MIEUM 81 | 11B8; B # HANGUL JONGSEONG PIEUP 82 | 11B9; BS # HANGUL JONGSEONG PIEUP-SIOS 83 | 11BA; S # HANGUL JONGSEONG SIOS 84 | 11BB; SS # HANGUL JONGSEONG SSANGSIOS 85 | 11BC; NG # HANGUL JONGSEONG IEUNG 86 | 11BD; J # HANGUL JONGSEONG CIEUC 87 | 11BE; C # HANGUL JONGSEONG CHIEUCH 88 | 11BF; K # HANGUL JONGSEONG KHIEUKH 89 | 11C0; T # HANGUL JONGSEONG THIEUTH 90 | 11C1; P # HANGUL JONGSEONG PHIEUPH 91 | 11C2; H # HANGUL JONGSEONG HIEUH 92 | 93 | # EOF 94 | -------------------------------------------------------------------------------- /data/README.txt: -------------------------------------------------------------------------------- 1 | The data files in this directory were obtained from 2 | http://www.unicode.org/Public/6.2.0/ucd/ on 2012-09-30. 3 | -------------------------------------------------------------------------------- /install.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "rbconfig" 4 | require "fileutils" 5 | 6 | sitelibdir = Config::CONFIG['sitelibdir'] 7 | installdir = ARGV[1] || sitelibdir 8 | 9 | HELP = <unicode_utils/u assigns the UnicodeUtils 54 | # module to the toplevel U constant and loads all methods: 55 | # 56 | # $ irb -r unicode_utils/u 57 | # irb(main):001:0> U.grep /angstrom/ 58 | # => [#] 59 | # 60 | # If a method takes a character as argument (usually named +char+), 61 | # that argument can be an integer or a string (in which case the 62 | # first code point counts) or any other object that responds to +ord+ 63 | # by returning an integer. 64 | # 65 | # All methods are non-destructive, string return values are in the 66 | # same encoding as strings passed as arguments, which must be in one 67 | # of the Unicode encodings. 68 | # 69 | # Highlevel methods are: 70 | # 71 | # UnicodeUtils.upcase:: full conversion to uppercase 72 | # UnicodeUtils.downcase:: full conversion to lowercase 73 | # UnicodeUtils.titlecase:: full conversion to titlecase 74 | # UnicodeUtils.casefold:: case folding (case insensitive string comparison) 75 | # UnicodeUtils.nfd:: Normalization Form D 76 | # UnicodeUtils.nfc:: Normalization Form C 77 | # UnicodeUtils.nfkd:: Normalization Form KD 78 | # UnicodeUtils.nfkc:: Normalization Form KC 79 | # UnicodeUtils.each_grapheme:: grapheme boundaries 80 | # UnicodeUtils.each_word:: word boundaries 81 | # UnicodeUtils.char_name:: character names 82 | # UnicodeUtils.grep:: find code points by character name 83 | module UnicodeUtils 84 | end 85 | -------------------------------------------------------------------------------- /lib/unicode_utils/canonical_decomposition.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/hangul_syllable_decomposition" 5 | require "unicode_utils/combining_class" 6 | 7 | module UnicodeUtils 8 | 9 | CANONICAL_DECOMPOSITION_MAP = 10 | Impl.read_multivalued_map("canonical_decomposition_map") # :nodoc: 11 | 12 | # Get the canonical decomposition of the given string, also called 13 | # Normalization Form D or short NFD. 14 | # 15 | # The Unicode standard has multiple representations for some 16 | # characters. One representation as a single code point and other 17 | # representation(s) as a combination of multiple code points. This 18 | # function "decomposes" these characters in +str+ into the latter 19 | # representation. 20 | # 21 | # Example: 22 | # 23 | # require "unicode_utils/canonical_decomposition" 24 | # # LATIN SMALL LETTER A WITH ACUTE => LATIN SMALL LETTER A, COMBINING ACUTE ACCENT 25 | # UnicodeUtils.canonical_decomposition("\u{E1}") => "\u{61}\u{301}" 26 | # 27 | # See also: UnicodeUtils.nfd 28 | def canonical_decomposition(str) 29 | res = String.new.force_encoding(str.encoding) 30 | str.each_codepoint { |cp| 31 | if cp >= 0xAC00 && cp <= 0xD7A3 # hangul syllable 32 | Impl.append_hangul_syllable_decomposition(res, cp) 33 | else 34 | mapping = CANONICAL_DECOMPOSITION_MAP[cp] 35 | if mapping 36 | Impl.append_recursive_canonical_decomposition_mapping(res, mapping) 37 | else 38 | res << cp 39 | end 40 | end 41 | } 42 | Impl.put_into_canonical_order(res) 43 | end 44 | module_function :canonical_decomposition 45 | 46 | module Impl # :nodoc: 47 | 48 | def self.append_recursive_canonical_decomposition_mapping(str, mapping) 49 | mapping.each { |cp| 50 | mapping_ = CANONICAL_DECOMPOSITION_MAP[cp] 51 | if mapping_ 52 | append_recursive_canonical_decomposition_mapping(str, mapping_) 53 | else 54 | str << cp 55 | end 56 | } 57 | end 58 | 59 | def self.put_into_canonical_order(str) 60 | reorder_needed = false 61 | last_cp = nil 62 | last_cc = nil 63 | str.each_codepoint { |cp| 64 | cc = COMBINING_CLASS_MAP[cp] 65 | if last_cp && cc != 0 && last_cc > cc 66 | reorder_needed = true 67 | break 68 | end 69 | last_cp = cp 70 | last_cc = cc 71 | } 72 | return str unless reorder_needed 73 | res = String.new.force_encoding(str.encoding) 74 | last_cp = nil 75 | last_cc = nil 76 | str.each_codepoint { |cp| 77 | cc = COMBINING_CLASS_MAP[cp] 78 | if last_cp 79 | if cc != 0 && last_cc > cc 80 | res << cp 81 | cp = nil 82 | cc = nil 83 | end 84 | res << last_cp 85 | end 86 | last_cp = cp 87 | last_cc = cc 88 | } 89 | res << last_cp if last_cp 90 | put_into_canonical_order(res) 91 | end 92 | 93 | end 94 | 95 | end 96 | -------------------------------------------------------------------------------- /lib/unicode_utils/canonical_equivalents_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/canonical_decomposition" 4 | 5 | module UnicodeUtils 6 | 7 | # The strings +a+ and +b+ are canonical equivalents if their 8 | # canonical decompositions are equal. 9 | # 10 | # Example: 11 | # 12 | # require "unicode_utils/canonical_equivalents_q" 13 | # UnicodeUtils.canonical_equivalents?("Äste", "A\u{308}ste") => true 14 | # UnicodeUtils.canonical_equivalents?("Äste", "Aste") => false 15 | def canonical_equivalents?(a, b) 16 | UnicodeUtils.canonical_decomposition(a) == 17 | UnicodeUtils.canonical_decomposition(b) 18 | end 19 | module_function :canonical_equivalents? 20 | 21 | end 22 | -------------------------------------------------------------------------------- /lib/unicode_utils/case_ignorable_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | CASE_IGNORABLE_SET = Impl.read_code_point_set("case_ignorable_set") # :nodoc: 8 | 9 | # Returns true if the given character is case-ignorable as defined 10 | # by Unicode 5.0, section 3.13. 11 | def case_ignorable_char?(char) 12 | CASE_IGNORABLE_SET.include?(char.ord) 13 | end 14 | module_function :case_ignorable_char? 15 | 16 | end 17 | -------------------------------------------------------------------------------- /lib/unicode_utils/cased_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/lowercase_char_q" 4 | require "unicode_utils/uppercase_char_q" 5 | require "unicode_utils/titlecase_char_q" 6 | 7 | module UnicodeUtils 8 | 9 | # A cased char is a character that has the Unicode property 10 | # Lowercase or Uppercase or the general category Titlecase_Letter. 11 | # 12 | # See also: lowercase_char?, uppercase_char?, titlecase_char? 13 | def cased_char?(char) 14 | lowercase_char?(char) || uppercase_char?(char) || titlecase_char?(char) 15 | end 16 | module_function :cased_char? 17 | 18 | end 19 | -------------------------------------------------------------------------------- /lib/unicode_utils/casefold.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/simple_casefold" 5 | 6 | module UnicodeUtils 7 | 8 | CASEFOLD_F_MAP = Impl.read_multivalued_map("casefold_f_map") # :nodoc: 9 | 10 | # Perform full case folding. The returned string may be longer than 11 | # +str+. The purpose of case folding is case insensitive string 12 | # comparison. 13 | # 14 | # Examples: 15 | # 16 | # require "unicode_utils/casefold" 17 | # UnicodeUtils.casefold("Ümit") == UnicodeUtils.casefold("ümit") => true 18 | # UnicodeUtils.casefold("WEISS") == UnicodeUtils.casefold("weiß") => true 19 | def casefold(str) 20 | String.new.force_encoding(str.encoding).tap do |res| 21 | str.each_codepoint { |cp| 22 | if mapping = CASEFOLD_C_MAP[cp] 23 | res << mapping 24 | elsif mapping = CASEFOLD_F_MAP[cp] 25 | mapping.each { |m| res << m } 26 | else 27 | res << cp 28 | end 29 | } 30 | end 31 | end 32 | module_function :casefold 33 | 34 | end 35 | -------------------------------------------------------------------------------- /lib/unicode_utils/char_display_width.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/display_width" 4 | 5 | module UnicodeUtils 6 | 7 | # Get the width of +char+ when displayed with a fixed pitch font. 8 | # 9 | # Some code points (especially from east asian scripts) take the 10 | # width of two characters, while others have no width. 11 | # 12 | # Examples: 13 | # 14 | # require "unicode_utils/char_display_width" 15 | # UnicodeUtils.char_display_width("別") # => 2 16 | # UnicodeUtils.char_display_width(0x308) # => 0 17 | # UnicodeUtils.char_display_width("a") # => 1 18 | # 19 | # Performs the same logic as UnicodeUtils.display_width, but for a 20 | # single code point. 21 | def char_display_width(char) 22 | cp = char.ord 23 | # copied from display_width, keep in sync! 24 | case UnicodeUtils.east_asian_width(cp) 25 | when :Wide, :Fullwidth then 2 26 | else GENERAL_CATEGORY_BASIC_WIDTH_MAP[UnicodeUtils.gc(cp)] 27 | end 28 | end 29 | module_function :char_display_width 30 | 31 | end 32 | -------------------------------------------------------------------------------- /lib/unicode_utils/char_name.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/hangul_syllable_decomposition" 5 | require "unicode_utils/jamo_short_name" 6 | 7 | module UnicodeUtils 8 | 9 | NAME_MAP = Impl.read_names("names") # :nodoc: 10 | 11 | # Get the normative Unicode name of the given character. 12 | # 13 | # Private Use code points have no name, this function returns nil for 14 | # such code points. 15 | # 16 | # All control characters have the special name "". All 17 | # other characters have a unique name. 18 | # 19 | # Example: 20 | # 21 | # require "unicode_utils/char_name" 22 | # UnicodeUtils.char_name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI" 23 | # UnicodeUtils.char_name "\t" => "" 24 | # 25 | # Note that this method deviates from the Unicode Name property in two 26 | # points: 27 | # 28 | # 1. It returns "" for control codes, the Unicode Name property for 29 | # these code points is an empty string 30 | # 2. It returns nil for other non-graphic, non-format code points, the 31 | # Unicode Name property for these code points is an empty string 32 | # 33 | # See also: UnicodeUtils.sid 34 | def char_name(char) 35 | # TODO: improve with code point labels, see section 4.8 in Unicode 6.0.0 36 | if char.kind_of?(Integer) 37 | cp = char 38 | str = nil 39 | else 40 | cp = char.ord 41 | str = char 42 | end 43 | NAME_MAP[cp] || 44 | case cp 45 | when 0x3400..0x4DB5, 0x4E00..0x9FCC, 0x20000..0x2A6D6, 0x2A700..0x2B734, 0x2B740..0x2B81D 46 | "CJK UNIFIED IDEOGRAPH-#{sprintf('%04X', cp)}" 47 | when 0xAC00..0xD7A3 48 | str ||= cp.chr(Encoding::UTF_8) 49 | "HANGUL SYLLABLE ".tap do |n| 50 | hangul_syllable_decomposition(str).each_char { |c| 51 | n << (jamo_short_name(c) || '') 52 | } 53 | end 54 | end 55 | end 56 | module_function :char_name 57 | 58 | end 59 | -------------------------------------------------------------------------------- /lib/unicode_utils/char_type.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/gc" 4 | require "unicode_utils/general_category" 5 | 6 | module UnicodeUtils 7 | 8 | GENERAL_CATEGORY_TYPE_MAP = Hash.new.tap { |map| 9 | GENERAL_CATEGORY_ALIAS_MAP.each_pair { |short, long| 10 | if short.length == 2 11 | map[short] = GENERAL_CATEGORY_ALIAS_MAP[short[0].to_sym] 12 | end 13 | } 14 | } # :nodoc: 15 | 16 | # Get the long major general category alias of char. 17 | # 18 | # Example: 19 | # 20 | # require "unicode_utils/char_type" 21 | # UnicodeUtils.char_type("1") # => :Number 22 | # 23 | # Always returns a symbol when char is in the Unicode code point 24 | # range. 25 | # 26 | # See also: UnicodeUtils.general_category 27 | def char_type(char) 28 | GENERAL_CATEGORY_TYPE_MAP[UnicodeUtils.gc(char)] 29 | end 30 | module_function :char_type 31 | 32 | end 33 | -------------------------------------------------------------------------------- /lib/unicode_utils/code_point_type.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/gc" 4 | 5 | module UnicodeUtils 6 | 7 | GENERAL_CATEGORY_CODE_POINT_TYPE = { 8 | Lu: :Graphic, Ll: :Graphic, Lt: :Graphic, Lm: :Graphic, Lo: :Graphic, 9 | Mn: :Graphic, Mc: :Graphic, Me: :Graphic, 10 | Nd: :Graphic, Nl: :Graphic, No: :Graphic, 11 | Pc: :Graphic, Pd: :Graphic, Ps: :Graphic, 12 | Pe: :Graphic, Pi: :Graphic, Pf: :Graphic, Po: :Graphic, 13 | Sm: :Graphic, Sc: :Graphic, Sk: :Graphic, So: :Graphic, 14 | Zs: :Graphic, Zl: :Format, Zp: :Format, 15 | Cc: :Control, Cf: :Format, Cs: :Surrogate, Co: :Private_Use, 16 | # Cn is splitted into two types (Reserved and Noncharacter)! 17 | Cn: false 18 | } # :nodoc: 19 | 20 | CN_CODE_POINT_TYPE = Hash.new.tap { |h| 21 | h.default = :Reserved 22 | # Sixty-six code points are noncharacters 23 | ary = (0xFDD0..0xFDEF).to_a 24 | 0.upto(16) { |d| 25 | ary << "#{d.to_s(16)}FFFE".to_i(16) 26 | ary << "#{d.to_s(16)}FFFF".to_i(16) 27 | } 28 | ary.each { |cp| h[cp] = :Noncharacter } 29 | raise "assertion error #{h.size}" unless h.size == 66 30 | } # :nodoc: 31 | 32 | # Get the code point type of the given +integer+ (must be instance 33 | # of Integer) as defined by the Unicode standard. 34 | # 35 | # If +integer+ is a code point (anything in 36 | # UnicodeUtils::Codepoint::RANGE), returns one of the following 37 | # symbols: 38 | # 39 | # :Graphic 40 | # :Format 41 | # :Control 42 | # :Private_Use 43 | # :Surrogate 44 | # :Noncharacter 45 | # :Reserved 46 | # 47 | # For an exact meaning of these values, read the sections 48 | # "Conformance/Characters and Encoding" and "General 49 | # Structure/Types of Codepoints" in the Unicode standard. 50 | # 51 | # Following is a paraphrased excerpt: 52 | # 53 | # +Surrogate+, +Noncharacter+ and +Reserved+ code points are not 54 | # assigned to an _abstract character_. All other code points are 55 | # assigned to an abstract character. 56 | # 57 | # +Reserved+ code points are also called _Undesignated_ code points, 58 | # all others are _Designated_ code points. 59 | # 60 | # Returns nil if +integer+ is not a code point. 61 | def code_point_type(integer) 62 | cpt = GENERAL_CATEGORY_CODE_POINT_TYPE[UnicodeUtils.gc(integer)] 63 | if false == cpt 64 | cpt = CN_CODE_POINT_TYPE[integer] 65 | end 66 | cpt 67 | end 68 | module_function :code_point_type 69 | 70 | end 71 | -------------------------------------------------------------------------------- /lib/unicode_utils/codepoint.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/char_name" 4 | 5 | module UnicodeUtils 6 | 7 | # A Codepoint instance represents a single Unicode code point. 8 | # 9 | # UnicodeUtils::Codepoint.new(0x20ac) => # 10 | class Codepoint 11 | 12 | # The Unicode codespace. Any integer in this range is a Unicode 13 | # code point. 14 | RANGE = 0..0x10FFFF 15 | 16 | # Create a Codepoint instance that wraps the given Integer. +int+ 17 | # must be in Codepoint::RANGE. 18 | def initialize(int) 19 | unless RANGE.include?(int) 20 | raise ArgumentError, "#{int} not in codespace" 21 | end 22 | @int = int 23 | end 24 | 25 | # Convert to Integer. 26 | def ord 27 | @int 28 | end 29 | 30 | # Format in U+ notation. 31 | # 32 | # Codepoint.new(0xc5).uplus => "U+00C5" 33 | def uplus 34 | sprintf('U+%04X', @int) 35 | end 36 | 37 | # Get the normative Unicode name of this code point. 38 | # 39 | # See also: UnicodeUtils.char_name 40 | def name 41 | UnicodeUtils.char_name(@int) 42 | end 43 | 44 | # Convert this code point to an UTF-8 encoded string. Returns a new 45 | # string on each call and thus it is allowed to mutate the return 46 | # value. 47 | def to_s 48 | @int.chr(Encoding::UTF_8) 49 | end 50 | 51 | # Get the bytes used to encode this code point in UTF-8, 52 | # hex-formatted. 53 | # 54 | # Codepoint.new(0xe4).hexbytes => "c3,a4" 55 | def hexbytes 56 | to_s.bytes.map { |b| sprintf("%02x", b) }.join(",") 57 | end 58 | 59 | # # 60 | def inspect 61 | "#<#{uplus} #{to_s.inspect} #{name || "nil"} utf8:#{hexbytes}>" 62 | end 63 | 64 | end 65 | 66 | end 67 | -------------------------------------------------------------------------------- /lib/unicode_utils/combining_class.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | COMBINING_CLASS_MAP = Impl.read_combining_class_map() # :nodoc: 8 | COMBINING_CLASS_MAP.default = 0 9 | 10 | # Get the combining class of the given character as an integer in 11 | # the range 0..255. 12 | def combining_class(char) 13 | COMBINING_CLASS_MAP[char.ord] 14 | end 15 | module_function :combining_class 16 | 17 | end 18 | -------------------------------------------------------------------------------- /lib/unicode_utils/compatibility_decomposition.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/canonical_decomposition" 5 | require "unicode_utils/hangul_syllable_decomposition" 6 | 7 | module UnicodeUtils 8 | 9 | COMPATIBILITY_DECOMPOSITION_MAP = 10 | Impl.read_multivalued_map("compatibility_decomposition_map") # :nodoc: 11 | 12 | # Get the compatibility decomposition of the given string, also 13 | # called Normalization Form KD or short NFKD. 14 | # 15 | # Compatibility decomposition decomposes more code points than 16 | # canonical decomposition and contrary to Normalization Form D and 17 | # C, this normalization can alter how a string is displayed. 18 | # 19 | # Example: 20 | # 21 | # require "unicode_utils/compatibility_decomposition" 22 | # # LATIN SMALL LIGATURE FI => LATIN SMALL LETTER F, LATIN SMALL LETTER I 23 | # UnicodeUtils.compatibility_decomposition("fi") => "fi" 24 | # 25 | # See also: UnicodeUtils.nfkd 26 | def compatibility_decomposition(str) 27 | res = String.new.force_encoding(str.encoding) 28 | str.each_codepoint { |cp| 29 | if cp >= 0xAC00 && cp <= 0xD7A3 # hangul syllable 30 | Impl.append_hangul_syllable_decomposition(res, cp) 31 | else 32 | Impl.append_recursive_compatibility_decomposition_mapping(res, cp) 33 | end 34 | } 35 | Impl.put_into_canonical_order(res) 36 | end 37 | module_function :compatibility_decomposition 38 | 39 | module Impl # :nodoc: 40 | 41 | def self.append_recursive_compatibility_decomposition_mapping(str, cp) 42 | mapping = COMPATIBILITY_DECOMPOSITION_MAP[cp] 43 | mapping ||= CANONICAL_DECOMPOSITION_MAP[cp] 44 | if mapping 45 | mapping.each { |c| 46 | append_recursive_compatibility_decomposition_mapping(str, c) 47 | } 48 | else 49 | str << cp 50 | end 51 | end 52 | 53 | end 54 | 55 | end 56 | -------------------------------------------------------------------------------- /lib/unicode_utils/conditional_casing.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/cased_char_q" 4 | require "unicode_utils/case_ignorable_char_q" 5 | require "unicode_utils/soft_dotted_char_q" 6 | require "unicode_utils/combining_class" 7 | require "unicode_utils/read_cdata" 8 | 9 | module UnicodeUtils 10 | 11 | module Impl # :nodoc:all 12 | 13 | LANGS_WITH_RULES = {:tr => true, :lt => true, :az => true} 14 | 15 | class ConditionalCasing 16 | 17 | attr_reader :mapping 18 | 19 | def initialize(mapping) 20 | @mapping = mapping 21 | end 22 | 23 | def context_match?(str, pos) 24 | true 25 | end 26 | 27 | end 28 | 29 | class BeforeDotConditionalCasing < ConditionalCasing 30 | 31 | def context_match?(str, pos) 32 | (pos + 1).upto(str.length - 1) { |i| 33 | c = str[i] 34 | return true if c.ord == 0x0307 35 | cc = UnicodeUtils.combining_class(c) 36 | return false if cc == 0 || cc == 230 37 | } 38 | false # "combining dot above" not found 39 | end 40 | 41 | end 42 | 43 | class NotBeforeDotConditionalCasing < BeforeDotConditionalCasing 44 | 45 | def context_match?(str, pos) 46 | !super 47 | end 48 | 49 | end 50 | 51 | class MoreAboveConditionalCasing < ConditionalCasing 52 | 53 | def context_match?(str, pos) 54 | (pos + 1).upto(str.length - 1) { |i| 55 | c = str[i] 56 | cc = UnicodeUtils.combining_class(c) 57 | return true if cc == 230 58 | return false if cc == 0 59 | } 60 | false 61 | end 62 | 63 | end 64 | 65 | class AfterIConditionalCasing < ConditionalCasing 66 | 67 | def context_match?(str, pos) 68 | (pos - 1).downto(0) { |i| 69 | c = str[i] 70 | return true if c.ord == 0x49 # uppercase I 71 | cc = UnicodeUtils.combining_class(c) 72 | return false if cc == 0 || cc == 230 73 | } 74 | false # uppercase I not found 75 | end 76 | 77 | end 78 | 79 | class AfterSoftDottedConditionalCasing < ConditionalCasing 80 | 81 | def context_match?(str, pos) 82 | (pos - 1).downto(0) { |i| 83 | c = str[i] 84 | return true if UnicodeUtils.soft_dotted_char?(c) 85 | cc = UnicodeUtils.combining_class(c) 86 | return false if cc == 0 || cc == 230 87 | } 88 | false 89 | end 90 | 91 | end 92 | 93 | class FinalSigmaConditionalCasing < ConditionalCasing 94 | 95 | def context_match?(str, pos) 96 | before_match?(str, pos) && !after_match?(str, pos) 97 | end 98 | 99 | private 100 | 101 | def before_match?(str, pos) 102 | (pos - 1).downto(0) { |i| 103 | c = str[i] 104 | return true if UnicodeUtils.cased_char?(c) 105 | return false unless UnicodeUtils.case_ignorable_char?(c) 106 | } 107 | false # no cased char 108 | end 109 | 110 | def after_match?(str, pos) 111 | (pos + 1).upto(str.length - 1) { |i| 112 | c = str[i] 113 | return true if UnicodeUtils.cased_char?(c) 114 | return false unless UnicodeUtils.case_ignorable_char?(c) 115 | } 116 | false 117 | end 118 | 119 | end 120 | 121 | CONDITIONAL_UPCASE_MAP = 122 | read_conditional_casings("cond_uc_map") 123 | 124 | CONDITIONAL_DOWNCASE_MAP = 125 | read_conditional_casings("cond_lc_map") 126 | 127 | CONDITIONAL_TITLECASE_MAP = 128 | read_conditional_casings("cond_tc_map") 129 | 130 | def self.conditional_upcase_mapping(cp, str, pos, language_id) 131 | lang_map = CONDITIONAL_UPCASE_MAP[cp] 132 | if lang_map 133 | casing = lang_map[language_id] || lang_map[nil] 134 | if casing && casing.context_match?(str, pos) 135 | casing.mapping 136 | end 137 | end 138 | end 139 | 140 | def self.conditional_downcase_mapping(cp, str, pos, language_id) 141 | lang_map = CONDITIONAL_DOWNCASE_MAP[cp] 142 | if lang_map 143 | casing = lang_map[language_id] || lang_map[nil] 144 | if casing && casing.context_match?(str, pos) 145 | casing.mapping 146 | end 147 | end 148 | end 149 | 150 | def self.conditional_titlecase_mapping(cp, str, pos, language_id) 151 | lang_map = CONDITIONAL_TITLECASE_MAP[cp] 152 | if lang_map 153 | casing = lang_map[language_id] || lang_map[nil] 154 | if casing && casing.context_match?(str, pos) 155 | casing.mapping 156 | end 157 | end 158 | end 159 | 160 | end 161 | 162 | end 163 | -------------------------------------------------------------------------------- /lib/unicode_utils/debug.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/display_width" 4 | require "unicode_utils/graphic_char_q" 5 | require "unicode_utils/char_display_width" 6 | require "unicode_utils/sid" 7 | require "unicode_utils/general_category" 8 | 9 | module UnicodeUtils 10 | 11 | # Print a table with detailed information about each code point in 12 | # +str+. +opts+ can have the following keys: 13 | # 14 | # +:io+:: An IO compatible object. Receives the output. 15 | # Defaults to $stdout. 16 | # 17 | # +str+ may also be an Integer, in which case it is interpreted as a 18 | # single code point that must be in UnicodeUtils::Codepoint::RANGE. 19 | # 20 | # Examples: 21 | # 22 | # $ ruby -r unicode_utils/u -e 'U.debug "良い一日"' 23 | # Char | Ordinal | Sid | General Category | UTF-8 24 | # ------+---------+----------------------------+------------------+---------- 25 | # "良" | 826F | CJK UNIFIED IDEOGRAPH-826F | Other_Letter | E8 89 AF 26 | # "い" | 3044 | HIRAGANA LETTER I | Other_Letter | E3 81 84 27 | # "一" | 4E00 | CJK UNIFIED IDEOGRAPH-4E00 | Other_Letter | E4 B8 80 28 | # "日" | 65E5 | CJK UNIFIED IDEOGRAPH-65E5 | Other_Letter | E6 97 A5 29 | # 30 | # $ ruby -r unicode_utils/u -e 'U.debug 0xd800' 31 | # Char | Ordinal | Sid | General Category | UTF-8 32 | # ------+---------+------------------+------------------+------- 33 | # N/A | D800 | | Surrogate | N/A 34 | # 35 | # The output is purely informal and may change even in minor 36 | # releases. 37 | def debug(str, opts = {}) 38 | io = opts[:io] || $stdout 39 | table = [Impl::DEBUG_COLUMNS.keys] 40 | if str.kind_of?(Integer) 41 | table << Impl::DEBUG_COLUMNS.values.map { |f| f.call(str) } 42 | else 43 | str.each_codepoint { |cp| 44 | table << Impl::DEBUG_COLUMNS.values.map { |f| f.call(cp) } 45 | } 46 | end 47 | Impl.print_table(table, io) 48 | nil 49 | end 50 | module_function :debug 51 | 52 | module Impl # :nodoc:all 53 | 54 | DEBUG_COLUMNS = { 55 | "Char" => -> cp { 56 | case cp 57 | when 0x07 then '"\a"' 58 | when 0x08 then '"\b"' 59 | when 0x09 then '"\t"' 60 | when 0x0A then '"\n"' 61 | when 0x0D then '"\r"' 62 | else 63 | if UnicodeUtils.graphic_char?(cp) && 64 | UnicodeUtils.char_display_width(cp) > 0 65 | '"' + cp.chr(Encoding::UTF_8) + '"' 66 | else 67 | "N/A" 68 | end 69 | end 70 | }, 71 | "Ordinal" => -> cp { 72 | cp.to_s(16).upcase.rjust(7) 73 | }, 74 | "Sid" => -> cp { 75 | UnicodeUtils.sid(cp) 76 | }, 77 | "General Category" => -> cp { 78 | UnicodeUtils.general_category(cp).to_s 79 | }, 80 | "UTF-8" => -> cp { 81 | begin 82 | cp.chr(Encoding::UTF_8).bytes.map { |b| sprintf("%02X", b) }.join(" ") 83 | rescue RangeError # surrogate code points are not valid in utf-8 84 | "N/A" 85 | end 86 | } 87 | } 88 | 89 | def self.column_widths(table) 90 | Array.new.tap { |column_widths| 91 | table.each_with_index { |row| 92 | row.each_with_index { |txt, col_i| 93 | dw = UnicodeUtils.display_width(txt) 94 | cw = column_widths[col_i] 95 | column_widths[col_i] = dw if cw.nil? || cw < dw 96 | } 97 | } 98 | } 99 | end 100 | 101 | def self.print_row(row, column_widths, io) 102 | row.each_with_index { |txt, col_i| 103 | io.print(" ") 104 | io.print(txt) 105 | if col_i != row.length - 1 106 | dw = UnicodeUtils.display_width(txt) 107 | d = column_widths[col_i] - dw 108 | io.print(" " * (d + 1)) 109 | io.print("|") 110 | end 111 | } 112 | io.puts 113 | end 114 | 115 | def self.print_separator_row(column_widths, io) 116 | column_widths.each_with_index { |cw, col_i| 117 | io.print("-" * (cw + 2)) 118 | if col_i != column_widths.length - 1 119 | io.print("+") 120 | end 121 | } 122 | io.puts 123 | end 124 | 125 | def self.print_table(table, io) 126 | cws = column_widths(table) 127 | print_row(table[0], cws, io) 128 | print_separator_row(cws, io) 129 | table[1..-1].each { |row| 130 | print_row(row, cws, io) 131 | } 132 | io.flush 133 | end 134 | 135 | end 136 | 137 | end 138 | -------------------------------------------------------------------------------- /lib/unicode_utils/default_ignorable_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | PROP_DEFAULT_IGNORABLE_SET = 8 | Impl.read_code_point_set("prop_set_default_ignorable") # :nodoc: 9 | 10 | # True if the given character has the Unicode property 11 | # Default_Ingorable_Code_Point (see section 5.3 in Unicode 6.0.0). 12 | # 13 | # When a system (e.g. font) can't display a default ignorable 14 | # code point, it is allowed to simply ignore, i.e. skip it (as 15 | # opposed to other characters, which must at least be displayed with 16 | # a replacement character). 17 | def default_ignorable_char?(char) 18 | PROP_DEFAULT_IGNORABLE_SET.include?(char.ord) 19 | end 20 | module_function :default_ignorable_char? 21 | 22 | end 23 | -------------------------------------------------------------------------------- /lib/unicode_utils/display_width.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/east_asian_width" 4 | require "unicode_utils/gc" 5 | require "unicode_utils/graphic_char_q" 6 | 7 | module UnicodeUtils 8 | 9 | GENERAL_CATEGORY_BASIC_WIDTH_MAP = Hash.new.tap do |h| 10 | GENERAL_CATEGORY_IS_GRAPHIC_MAP.each_pair { |key, value| 11 | if value && key != :Mn && key != :Me 12 | h[key] = 1 13 | else 14 | h[key] = 0 15 | end 16 | } 17 | end # :nodoc: 18 | 19 | # Get the width of +str+ when displayed with a fixed pitch font. 20 | # 21 | # Counts code points, where code points with an east asian width of 22 | # +Wide+ or +Fullwidth+ count for two, non-graphic code points (e.g. 23 | # control characters, including newline!) and non-spacing marks 24 | # count for zero and all others count for one. 25 | # 26 | # Examples: 27 | # 28 | # require "unicode_utils/display_width" 29 | # "別れ".length => 2 30 | # UnicodeUtils.display_width("別れ") => 4 31 | # "12".length => 2 32 | # UnicodeUtils.display_width("12") => 2 33 | # "a\u{308}".length => 2 34 | # UnicodeUtils.display_width("a\u{308}") => 1 35 | # 36 | # Unicode assigns some reserved code points an east asian width of 37 | # +Wide+. Some systems correctly display a double width replacement 38 | # character, others not. 39 | # 40 | # See also: UnicodeUtils.graphic_char?, UnicodeUtils.east_asian_width 41 | def display_width(str) 42 | str.each_codepoint.reduce(0) { |sum, cp| 43 | sum + 44 | case UnicodeUtils.east_asian_width(cp) 45 | when :Wide, :Fullwidth then 2 46 | else GENERAL_CATEGORY_BASIC_WIDTH_MAP[UnicodeUtils.gc(cp)] 47 | end 48 | } 49 | end 50 | module_function :display_width 51 | 52 | end 53 | -------------------------------------------------------------------------------- /lib/unicode_utils/downcase.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/simple_downcase" 5 | require "unicode_utils/conditional_casing" 6 | 7 | module UnicodeUtils 8 | 9 | SPECIAL_DOWNCASE_MAP = Impl.read_multivalued_map("special_lc_map") # :nodoc: 10 | 11 | # Perform a full case-conversion of +str+ to lowercase according to 12 | # the Unicode standard. 13 | # 14 | # Some conversion rules are language dependent, these are in effect 15 | # when a non-nil +language_id+ is given. If non-nil, the 16 | # +language_id+ must be a two letter language code as defined in BCP 17 | # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a 18 | # language doesn't have a two letter code, the three letter code is 19 | # to be used. If locale independent behaviour is required, +nil+ 20 | # should be passed explicitely, because a later version of 21 | # UnicodeUtils may default to something else. 22 | # 23 | # Examples: 24 | # 25 | # require "unicode_utils/downcase" 26 | # UnicodeUtils.downcase("ᾈ") => "ᾀ" 27 | # UnicodeUtils.downcase("aBI\u{307}", :tr) => "abi" 28 | def downcase(str, language_id = nil) 29 | String.new.force_encoding(str.encoding).tap { |res| 30 | if Impl::LANGS_WITH_RULES.include?(language_id) 31 | # ensure O(1) lookup by index 32 | str = str.encode(Encoding::UTF_32LE) 33 | end 34 | pos = 0 35 | str.each_codepoint { |cp| 36 | special_mapping = 37 | Impl.conditional_downcase_mapping(cp, str, pos, language_id) || 38 | SPECIAL_DOWNCASE_MAP[cp] 39 | if special_mapping 40 | special_mapping.each { |m| res << m } 41 | else 42 | res << (SIMPLE_DOWNCASE_MAP[cp] || cp) 43 | end 44 | pos += 1 45 | } 46 | } 47 | end 48 | module_function :downcase 49 | 50 | end 51 | -------------------------------------------------------------------------------- /lib/unicode_utils/each_grapheme.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | # Maps code points to integer codes. For the integer code to property 8 | # mapping, see #compile_grapheme_break_property in data/compile.rb. 9 | GRAPHEME_CLUSTER_BREAK_MAP = 10 | Impl.read_hexdigit_map("grapheme_break_property") # :nodoc: 11 | 12 | # Iterate over the grapheme clusters that make up +str+. A grapheme 13 | # cluster is a user perceived character (the basic unit of a writing 14 | # system for a language) and consists of one or more code points. 15 | # 16 | # This method uses the default Unicode algorithm for extended 17 | # grapheme clusters. 18 | # 19 | # Returns an enumerator if no block is given. 20 | # 21 | # Examples: 22 | # 23 | # require "unicode_utils/each_grapheme" 24 | # UnicodeUtils.each_grapheme("a\r\nb") { |g| p g } 25 | # 26 | # prints: 27 | # 28 | # "a" 29 | # "\r\n" 30 | # "b" 31 | # 32 | # and 33 | # 34 | # UnicodeUtils.each_grapheme("a\r\nb").count => 3 35 | def each_grapheme(str) 36 | return enum_for(__method__, str) unless block_given? 37 | c0 = nil 38 | c0_prop = nil 39 | grapheme = String.new.force_encoding(str.encoding) 40 | str.each_codepoint { |c| 41 | gbreak = false 42 | c_prop = GRAPHEME_CLUSTER_BREAK_MAP[c] 43 | 44 | ### rules ### 45 | if c0_prop == 0x0 && c_prop == 0x1 46 | # don't break CR LF 47 | elsif c0_prop == 0x0 || c0_prop == 0x1 || c0_prop == 0x2 48 | # break after controls 49 | gbreak = true 50 | elsif c_prop == 0x0 || c_prop == 0x1 || c_prop == 0x2 51 | # break before controls 52 | gbreak = true 53 | elsif c0_prop == 0x6 && (c_prop == 0x6 || c_prop == 0x7 || 54 | c_prop == 0x9 || c_prop == 0xA) 55 | # don't break hangul syllable 56 | elsif (c0_prop == 0x9 || c0_prop == 0x7) && 57 | (c_prop == 0x7 || c_prop == 0x8) 58 | # don't break hangul syllable 59 | elsif (c0_prop == 0xA || c0_prop == 0x8) && c_prop == 0x8 60 | # don't break hangul syllable 61 | elsif c0_prop == 0xB && c_prop == 0xB 62 | # don't break between regional indicator symbols 63 | elsif c_prop == 0x3 64 | # don't break before extending characters 65 | elsif c_prop == 0x5 66 | # don't break before SpacingMarks 67 | elsif c0_prop == 0x4 68 | # don't break after Prepend characters 69 | else 70 | # break everywhere 71 | gbreak = true 72 | end 73 | ############# 74 | 75 | if gbreak && !grapheme.empty? 76 | yield grapheme 77 | grapheme = String.new.force_encoding(str.encoding) 78 | end 79 | grapheme << c 80 | c0 = c 81 | c0_prop = c_prop 82 | } 83 | yield grapheme unless grapheme.empty? 84 | end 85 | module_function :each_grapheme 86 | 87 | end 88 | -------------------------------------------------------------------------------- /lib/unicode_utils/each_word.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | # Maps code points to integer codes. For the integer code to property 8 | # mapping, see #compile_word_break_property in data/compile.rb. 9 | WORD_BREAK_MAP = 10 | Impl.read_hexdigit_map("word_break_property") # :nodoc: 11 | 12 | # Split +str+ along word boundaries according to Unicode's Default 13 | # Word Boundary Specification, calling the given block with each 14 | # word. Returns +str+, or an enumerator if no block is given. 15 | # 16 | # Example: 17 | # 18 | # require "unicode_utils/each_word" 19 | # UnicodeUtils.each_word("Hello, world!").to_a => ["Hello", ",", " ", "world", "!"] 20 | def each_word(str) 21 | return enum_for(__method__, str) unless block_given? 22 | cs = str.each_codepoint.map { |c| WORD_BREAK_MAP[c] } 23 | cs << nil << nil # for negative indices 24 | word = String.new.force_encoding(str.encoding) 25 | i = 0 26 | str.each_codepoint { |c| 27 | word << c 28 | if Impl.word_break?(cs, i) && !word.empty? 29 | yield word 30 | word = String.new.force_encoding(str.encoding) 31 | end 32 | i += 1 33 | } 34 | yield word unless word.empty? 35 | str 36 | end 37 | module_function :each_word 38 | 39 | module Impl # :nodoc:all 40 | 41 | def self.word_break?(cs, i) 42 | # wb3 43 | cs_i = cs[i] 44 | i1 = i + 1 45 | cs_i1 = cs[i1] 46 | if cs_i == 0x0 && cs_i1 == 0x1 47 | return false 48 | end 49 | # wb3a 50 | if cs_i == 0x2 || cs_i == 0x0 || cs_i == 0x1 51 | return true 52 | end 53 | # wb3b 54 | if cs_i1 == 0x2 || cs_i1 == 0x0 || cs_i1 == 0x1 55 | return true 56 | end 57 | # wb5 58 | i0 = i 59 | # inline skip_l 60 | c = nil 61 | loop { c = cs[i0]; break unless c == 0x3 || c == 0x4; i0 -= 1 } 62 | ci0 = c 63 | if ci0 == 0x6 && cs_i1 == 0x6 64 | return false 65 | end 66 | # wb6 67 | i2 = i1 + 1 68 | # inline skip_r 69 | loop { c = cs[i2]; break unless c == 0x3 || c == 0x4; i2 += 1 } 70 | if ci0 == 0x6 && (cs_i1 == 0x7 || cs_i1 == 0x9) && cs[i2] == 0x6 71 | return false 72 | end 73 | # wb7 74 | i_1 = i0 - 1 75 | # inline skip_l 76 | loop { c = cs[i_1]; break unless c == 0x3 || c == 0x4; i_1 -= 1 } 77 | if cs[i_1] == 0x6 && (ci0 == 0x7 || ci0 == 0x9) && cs_i1 == 0x6 78 | return false 79 | end 80 | # wb8 81 | if ci0 == 0xA && cs_i1 == 0xA 82 | return false 83 | end 84 | # wb9 85 | if ci0 == 0x6 && cs_i1 == 0xA 86 | return false 87 | end 88 | # wb10 89 | if ci0 == 0xA && cs_i1 == 0x6 90 | return false 91 | end 92 | # wb11 93 | if cs[i_1] == 0xA && (ci0 == 0x8 || ci0 == 0x9) && cs_i1 == 0xA 94 | return false 95 | end 96 | # wb12 97 | if ci0 == 0xA && (cs_i1 == 0x8 || cs_i1 == 0x9) && cs[i2] == 0xA 98 | return false 99 | end 100 | # wb13 101 | if ci0 == 0x5 && cs_i1 == 0x5 102 | return false 103 | end 104 | # wb13a 105 | if (ci0 == 0x6 || ci0 == 0xA || ci0 == 0x5 || ci0 == 0xB) && cs_i1 == 0xB 106 | return false 107 | end 108 | # wb13b 109 | if ci0 == 0xB && (cs_i1 == 0x6 || cs_i1 == 0xA || cs_i1 == 0x5) 110 | return false 111 | end 112 | # wb13c 113 | if ci0 == 0xC && cs_i1 == 0xC 114 | return false 115 | end 116 | # break unless next char is Extend/Format 117 | cs_i1 != 0x3 && cs_i1 != 0x4 118 | end 119 | 120 | end 121 | 122 | end 123 | -------------------------------------------------------------------------------- /lib/unicode_utils/east_asian_width.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | EAST_ASIAN_WIDTH_MAP_PER_CP = 8 | Impl.read_east_asian_width_per_cp("east_asian_width_property_per_cp") # :nodoc: 9 | 10 | EAST_ASIAN_WIDTH_RANGES = 11 | Impl.read_east_asian_width_ranges("east_asian_width_property_ranges") # :nodoc: 12 | 13 | # Returns the default with of the given code point as described in 14 | # "UAX #11: East Asian Width" (http://unicode.org/reports/tr11/). 15 | # 16 | # Each code point is mapped to one of the following six symbols: 17 | # :Neutral, :Ambiguous, :Halfwidth, :Wide, :Fullwidth, :Narrow. 18 | def east_asian_width(char) 19 | cp = char.ord 20 | EAST_ASIAN_WIDTH_RANGES.each { |pair| 21 | return pair[1] if pair[0].cover?(cp) 22 | } 23 | EAST_ASIAN_WIDTH_MAP_PER_CP[cp] 24 | end 25 | module_function :east_asian_width 26 | 27 | end 28 | -------------------------------------------------------------------------------- /lib/unicode_utils/gc.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | GENERAL_CATEGORY_PER_CP_MAP = 8 | Impl.read_general_category_per_cp("general_category_per_cp") # :nodoc: 9 | 10 | GENERAL_CATEGORY_RANGES = 11 | Impl.read_general_category_ranges("general_category_ranges") # :nodoc: 12 | 13 | # Get the two letter general category alias of the given char. The 14 | # first letter denotes a major class, the second letter a subclass 15 | # of the major class. 16 | # 17 | # See section 4.5 in Unicode 6.0.0. 18 | # 19 | # Example: 20 | # 21 | # require "unicode_utils/gc" 22 | # UnicodeUtils.gc("A") # => :Lu (Letter, uppercase) 23 | # 24 | # Returns nil for ordinals outside the Unicode code point range, a 25 | # two letter symbol otherwise. 26 | # 27 | # See also: UnicodeUtils.general_category, UnicodeUtils.char_type 28 | def gc(char) 29 | cp = char.ord 30 | cat = GENERAL_CATEGORY_PER_CP_MAP[cp] and return cat 31 | GENERAL_CATEGORY_RANGES.each { |pair| 32 | return pair[1] if pair[0].cover?(cp) 33 | } 34 | if cp >= 0x0 && cp <= 0x10FFFF 35 | :Cn # Other, not assigned 36 | else 37 | nil 38 | end 39 | end 40 | module_function :gc 41 | 42 | end 43 | -------------------------------------------------------------------------------- /lib/unicode_utils/general_category.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/gc" 5 | 6 | module UnicodeUtils 7 | 8 | GENERAL_CATEGORY_ALIAS_MAP = 9 | Impl.read_symbol_map("general_category_aliases") # :nodoc: 10 | 11 | # Get the long general category alias of char. 12 | # 13 | # Example: 14 | # 15 | # require "unicode_utils/general_category" 16 | # UnicodeUtils.general_category("A") # => :Uppercase_Letter 17 | # 18 | # Returns a symbol if char is in the Unicode code point range, nil 19 | # otherwise. 20 | # 21 | # See also: UnicodeUtils.gc, UnicodeUtils.char_type 22 | def general_category(char) 23 | GENERAL_CATEGORY_ALIAS_MAP[UnicodeUtils.gc(char)] 24 | end 25 | module_function :general_category 26 | 27 | end 28 | -------------------------------------------------------------------------------- /lib/unicode_utils/graphic_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/gc" 4 | 5 | module UnicodeUtils 6 | 7 | GENERAL_CATEGORY_IS_GRAPHIC_MAP = { 8 | Lu: true, Ll: true, Lt: true, Lm: true, Lo: true, 9 | Mn: true, Mc: true, Me: true, 10 | Nd: true, Nl: true, No: true, 11 | Pc: true, Pd: true, Ps: true, Pe: true, Pi: true, Pf: true, Po: true, 12 | Sm: true, Sc: true, Sk: true, So: true, 13 | Zs: true, Zl: false, Zp: false, 14 | Cc: false, Cf: false, Cs: false, Co: false, Cn: false 15 | } # :nodoc: 16 | 17 | # Returns true if the given char is a graphic char, false otherwise. 18 | # See table 2-3 in section 2.4 of Unicode 6.0.0. 19 | # 20 | # Examples: 21 | # 22 | # require "unicode_utils/graphic_char_q" 23 | # UnicodeUtils.graphic_char?("a") # => true 24 | # UnicodeUtils.graphic_char?("\n") # => false 25 | # UnicodeUtils.graphic_char?(0x0) # => false 26 | def graphic_char?(char) 27 | GENERAL_CATEGORY_IS_GRAPHIC_MAP[UnicodeUtils.gc(char)] 28 | end 29 | module_function :graphic_char? 30 | 31 | end 32 | -------------------------------------------------------------------------------- /lib/unicode_utils/grep.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/codepoint" 4 | 5 | module UnicodeUtils 6 | 7 | # Get an array of all Codepoint instances in Codepoint::RANGE whose 8 | # name matches regexp. Matching is case insensitive. 9 | # 10 | # require "unicode_utils/grep" 11 | # UnicodeUtils.grep(/angstrom/) => [#] 12 | def grep(regexp) 13 | # TODO: enhance behaviour by searching aliases in NameAliases.txt 14 | unless regexp.casefold? 15 | regexp = Regexp.new(regexp.source, Regexp::IGNORECASE) 16 | end 17 | Codepoint::RANGE.select { |cp| 18 | regexp =~ UnicodeUtils.char_name(cp) 19 | }.map { |cp| Codepoint.new(cp) } 20 | end 21 | module_function :grep 22 | 23 | end 24 | -------------------------------------------------------------------------------- /lib/unicode_utils/hangul_syllable_decomposition.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | module UnicodeUtils 4 | 5 | # Derives the canonical decomposition of the given Hangul syllable. 6 | # 7 | # Example: 8 | # 9 | # require "unicode_utils/hangul_syllable_decomposition" 10 | # UnicodeUtils.hangul_syllable_decomposition("\u{d4db}") => "\u{1111}\u{1171}\u{11b6}" 11 | def hangul_syllable_decomposition(char) 12 | String.new.force_encoding(char.encoding).tap do |str| 13 | Impl.append_hangul_syllable_decomposition(str , char.ord) 14 | end 15 | end 16 | module_function :hangul_syllable_decomposition 17 | 18 | module Impl # :nodoc: 19 | 20 | def self.append_hangul_syllable_decomposition(str, s) 21 | # constants 22 | sbase = 0xAC00 23 | lbase = 0x1100 24 | vbase = 0x1161 25 | tbase = 0x11A7 26 | scount = 11172 27 | lcount = 19 28 | vcount = 21 29 | tcount = 28 30 | ncount = vcount * tcount 31 | 32 | sindex = s - sbase 33 | if 0 <= sindex && sindex < scount 34 | l = lbase + sindex / ncount 35 | v = vbase + (sindex % ncount) / tcount 36 | t = tbase + sindex % tcount 37 | str << l << v 38 | str << t if t != tbase 39 | else 40 | str << s 41 | end 42 | end 43 | 44 | end 45 | 46 | end 47 | -------------------------------------------------------------------------------- /lib/unicode_utils/jamo_short_name.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | JAMO_SHORT_NAME_MAP = Impl.read_names("jamo_short_names") # :nodoc: 8 | 9 | # The Jamo Short Name property of the given character (defaults 10 | # to nil). 11 | # 12 | # Example: 13 | # 14 | # require "unicode_utils/jamo_short_name" 15 | # UnicodeUtils.jamo_short_name("\u{1101}") => "GG" 16 | def jamo_short_name(char) 17 | JAMO_SHORT_NAME_MAP[char.ord] 18 | end 19 | module_function :jamo_short_name 20 | 21 | end 22 | -------------------------------------------------------------------------------- /lib/unicode_utils/lowercase_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | PROP_LOWERCASE_SET = Impl.read_code_point_set("prop_set_lowercase") # :nodoc: 8 | 9 | # True if the given character has the Unicode property Lowercase. 10 | def lowercase_char?(char) 11 | PROP_LOWERCASE_SET.include?(char.ord) 12 | end 13 | module_function :lowercase_char? 14 | 15 | end 16 | -------------------------------------------------------------------------------- /lib/unicode_utils/name_alias.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | module UnicodeUtils 4 | 5 | # See: UnicodeUtils.name_aliases 6 | class NameAlias 7 | 8 | # The alias as string. 9 | attr_reader :name 10 | 11 | # The type of alias as symbol. Currently one of :correction, 12 | # :control, :alternate, :figment, :abbreviation. 13 | attr_reader :type 14 | 15 | # Do not construct directly. Use UnicodeUtils.name_aliases. 16 | def initialize(name, type) 17 | @name = name 18 | @type = type 19 | end 20 | 21 | # Returns a descriptive string. The format may change even in minor 22 | # releases. 23 | def inspect 24 | "#" 25 | end 26 | 27 | # Returns name. 28 | def to_s 29 | name 30 | end 31 | 32 | def ==(other) 33 | other.kind_of?(NameAlias) && other.type == type && other.name == name 34 | end 35 | 36 | def eql?(other) 37 | self == other 38 | end 39 | 40 | def hash 41 | name.hash 42 | end 43 | 44 | end 45 | 46 | end 47 | -------------------------------------------------------------------------------- /lib/unicode_utils/name_aliases.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/name_alias" 4 | require "unicode_utils/read_cdata" 5 | 6 | module UnicodeUtils 7 | 8 | NAME_ALIASES_MAP = Impl.read_name_aliases("name_aliases") # :nodoc: 9 | NAME_ALIASES_MAP.default = [].freeze 10 | 11 | # Get an Enumerable of formal name aliases of the given character. Returns an 12 | # empty Enumerable if the character doesn't have an alias. 13 | # 14 | # The aliases are instances of UnicodeUtils::NameAlias, the order of the 15 | # aliases in the returned Enumerable is preserved from NameAliases.txt in the 16 | # Unicode Character Database. 17 | # 18 | # Example: 19 | # 20 | # require "unicode_utils/name_aliases" 21 | # UnicodeUtils.name_aliases("\n").map(&:name) # => ["LINE FEED", "NEW LINE", "END OF LINE", "LF", "NL", "EOL"] 22 | # 23 | # See also: UnicodeUtils.char_name 24 | def name_aliases(char) 25 | NAME_ALIASES_MAP[char.ord] 26 | end 27 | module_function :name_aliases 28 | 29 | end 30 | -------------------------------------------------------------------------------- /lib/unicode_utils/nfc.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/canonical_decomposition" 5 | require "unicode_utils/combining_class" 6 | 7 | module UnicodeUtils 8 | 9 | module Impl # :nodoc:all 10 | 11 | COMPOSITION_EXCLUSION_SET = 12 | Impl.read_code_point_set("composition_exclusion_set") 13 | 14 | CANONICAL_COMPOSITION_MAP = Hash.new.tap do |m| 15 | CANONICAL_DECOMPOSITION_MAP.each_pair { |comp, decomp| 16 | if decomp.length == 2 17 | (m[decomp[0]] ||= {})[decomp[1]] = comp 18 | end 19 | } 20 | end 21 | 22 | module NFC 23 | 24 | # does b block c? 25 | def self.blocked?(b, c) 26 | # From the standard: 27 | # "If a combining character sequence is in canonical order, 28 | # then testing whether a character is blocked requires looking 29 | # at only the immediately preceding character." 30 | # cpary is in canonical order (since it comes out of 31 | # canonical_decomposition). 32 | COMBINING_CLASS_MAP[b] >= COMBINING_CLASS_MAP[c] 33 | end 34 | 35 | def self.primary_composite?(cp) 36 | unless CANONICAL_DECOMPOSITION_MAP[cp] || 37 | # has hangul syllable decomposition? 38 | (cp >= 0xAC00 && cp <= 0xD7A3) 39 | return false 40 | end 41 | !COMPOSITION_EXCLUSION_SET.include?(cp) 42 | end 43 | 44 | end 45 | 46 | def self.composition(str) 47 | ### constants for hangul composition ### 48 | sbase = 0xAC00 49 | lbase = 0x1100 50 | vbase = 0x1161 51 | tbase = 0x11A7 52 | lcount = 19 53 | vcount = 21 54 | tcount = 28 55 | ncount = vcount * tcount 56 | scount = lcount * ncount 57 | ######################################## 58 | 59 | String.new.force_encoding(str.encoding).tap do |res| 60 | last_starter = nil 61 | uncomposable_non_starters = [] 62 | str.each_codepoint { |cp| 63 | if COMBINING_CLASS_MAP[cp] == 0 # starter? 64 | combined = false 65 | if last_starter && uncomposable_non_starters.empty? 66 | ### hangul ### 67 | lindex = last_starter - lbase 68 | if 0 <= lindex && lindex < lcount 69 | vindex = cp - vbase 70 | if 0 <= vindex && vindex <= vcount 71 | last_starter = 72 | sbase + (lindex * vcount + vindex) * tcount 73 | combined = true 74 | end 75 | end 76 | unless combined 77 | sindex = last_starter - sbase 78 | if 0 <= sindex && sindex < scount && (sindex % tcount) == 0 79 | tindex = cp - tbase 80 | if 0 <= tindex && tindex < tcount 81 | last_starter += tindex 82 | combined = true 83 | end 84 | end 85 | end 86 | ############## 87 | unless combined 88 | map = Impl::CANONICAL_COMPOSITION_MAP[last_starter] 89 | composition = map && map[cp] 90 | if composition && Impl::NFC.primary_composite?(composition) 91 | last_starter = composition 92 | combined = true 93 | end 94 | end 95 | end 96 | unless combined 97 | res << last_starter if last_starter 98 | uncomposable_non_starters.each { |nc| res << nc } 99 | uncomposable_non_starters.clear 100 | last_starter = cp 101 | end 102 | else 103 | last_non_starter = uncomposable_non_starters.last 104 | if last_non_starter && Impl::NFC.blocked?(last_non_starter, cp) 105 | uncomposable_non_starters << cp 106 | else 107 | map = Impl::CANONICAL_COMPOSITION_MAP[last_starter] 108 | composition = map && map[cp] 109 | if composition && Impl::NFC.primary_composite?(composition) 110 | last_starter = composition 111 | else 112 | uncomposable_non_starters << cp 113 | end 114 | end 115 | end 116 | } 117 | res << last_starter if last_starter 118 | uncomposable_non_starters.each { |nc| res << nc } 119 | end 120 | end 121 | 122 | end 123 | 124 | # Get +str+ in Normalization Form C. 125 | # 126 | # The Unicode standard has multiple representations for some 127 | # characters. One representation as a single code point and other 128 | # representation(s) as a combination of multiple code points. This 129 | # function "composes" these characters into the former 130 | # representation. 131 | # 132 | # Example: 133 | # 134 | # require "unicode_utils/nfc" 135 | # UnicodeUtils.nfc("La\u{308}mpchen") => "Lämpchen" 136 | def nfc(str) 137 | str = UnicodeUtils.canonical_decomposition(str) 138 | Impl.composition(str) 139 | end 140 | module_function :nfc 141 | 142 | end 143 | -------------------------------------------------------------------------------- /lib/unicode_utils/nfd.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/canonical_decomposition" 4 | 5 | module UnicodeUtils 6 | 7 | # Get +str+ in Normalization Form D. 8 | # 9 | # Alias for UnicodeUtils.canonical_decomposition. 10 | def nfd(str) 11 | UnicodeUtils.canonical_decomposition(str) 12 | end 13 | module_function :nfd 14 | 15 | end 16 | -------------------------------------------------------------------------------- /lib/unicode_utils/nfkc.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/compatibility_decomposition" 4 | require "unicode_utils/nfc" 5 | 6 | module UnicodeUtils 7 | 8 | # Get +str+ in Normalization Form KC. 9 | # 10 | # Normalization Form KC is compatibiliy decomposition (NFKD) 11 | # followed by composition. Like NFKD, this normalization can alter 12 | # how a string is displayed. 13 | # 14 | # Example: 15 | # 16 | # require "unicode_utils/nfkc" 17 | # # LATIN SMALL LIGATURE FI => LATIN SMALL LETTER F, LATIN SMALL LETTER I 18 | # UnicodeUtils.nfkc("fi") => "fi" 19 | # 20 | # See also: UnicodeUtils.compatibility_decomposition 21 | def nfkc(str) 22 | str = UnicodeUtils.compatibility_decomposition(str) 23 | Impl.composition(str) 24 | end 25 | module_function :nfkc 26 | 27 | end 28 | -------------------------------------------------------------------------------- /lib/unicode_utils/nfkd.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/compatibility_decomposition" 4 | 5 | module UnicodeUtils 6 | 7 | # Get +str+ in Normalization Form KD. 8 | # 9 | # Alias for UnicodeUtils.compatibility_decomposition. 10 | def nfkd(str) 11 | UnicodeUtils.compatibility_decomposition(str) 12 | end 13 | module_function :nfkd 14 | 15 | end 16 | -------------------------------------------------------------------------------- /lib/unicode_utils/read_cdata.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | module UnicodeUtils 4 | 5 | # Absolute path to the directory from which UnicodeUtils loads its 6 | # compiled Unicode data files at runtime. 7 | CDATA_DIR = File.absolute_path(File.join(File.dirname(__FILE__), "..", "..", "cdata")) 8 | 9 | module Impl # :nodoc: 10 | 11 | EAST_ASIAN_WIDTH_SYMBOL_MAP = { 12 | 1 => :Ambiguous, 13 | 2 => :Halfwidth, 14 | 3 => :Wide, 15 | 4 => :Fullwidth, 16 | 5 => :Narrow 17 | }.freeze 18 | 19 | NAME_ALIAS_TYPE_TO_SYMBOL_MAP = { 20 | 1 => :correction, 21 | 2 => :control, 22 | 3 => :alternate, 23 | 4 => :figment, 24 | 5 => :abbreviation 25 | }.freeze 26 | 27 | def self.open_cdata_file(filename, &block) 28 | File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block) 29 | end 30 | 31 | def self.read_code_point_set(filename) 32 | Hash.new.tap { |set| 33 | open_cdata_file(filename) do |input| 34 | buffer = "x" * 6 35 | buffer.force_encoding(Encoding::US_ASCII) 36 | while input.read(6, buffer) 37 | set[buffer.to_i(16)] = true 38 | end 39 | end 40 | } 41 | end 42 | 43 | def self.read_code_point_map(filename) 44 | Hash.new.tap { |map| 45 | open_cdata_file(filename) do |input| 46 | buffer = "x" * 6 47 | buffer.force_encoding(Encoding::US_ASCII) 48 | while input.read(6, buffer) 49 | map[buffer.to_i(16)] = input.read(6, buffer).to_i(16) 50 | end 51 | end 52 | } 53 | end 54 | 55 | def self.read_multivalued_map(filename) 56 | Hash.new.tap { |map| 57 | open_cdata_file(filename) do |input| 58 | buffer = "x" * 6 59 | buffer.force_encoding(Encoding::US_ASCII) 60 | while input.read(6, buffer) 61 | cp = buffer.to_i(16) 62 | mapping = [] 63 | while input.read(6, buffer).getbyte(0) != 120 64 | mapping << buffer.to_i(16) 65 | end 66 | map[cp] = mapping 67 | end 68 | end 69 | } 70 | end 71 | 72 | def self.read_names(filename) 73 | Hash.new.tap { |map| 74 | open_cdata_file(filename) do |input| 75 | buffer = "x" * 6 76 | buffer.force_encoding(Encoding::US_ASCII) 77 | while input.read(6, buffer) 78 | map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! } 79 | end 80 | end 81 | } 82 | end 83 | 84 | def self.read_conditional_casings(filename) 85 | Hash.new.tap { |cp_map| 86 | open_cdata_file(filename) do |input| 87 | input.each_line { |line| 88 | line.chomp! 89 | record = line.split(";") 90 | cp = record[0].to_i(16) 91 | mapping = record[1].split(",").map { |c| c.to_i(16) } 92 | language_id = record[2].empty? ? nil : record[2].to_sym 93 | context = record[3] && record[3].gsub('_', '') 94 | casing = Impl.const_get("#{context}ConditionalCasing").new(mapping) 95 | (cp_map[cp] ||= {})[language_id] = casing 96 | } 97 | end 98 | } 99 | end 100 | 101 | def self.read_combining_class_map 102 | Hash.new.tap { |map| 103 | open_cdata_file("combining_class_map") do |input| 104 | buffer = "x" * 6 105 | buffer.force_encoding(Encoding::US_ASCII) 106 | cc_buffer = "x" * 2 107 | cc_buffer.force_encoding(Encoding::US_ASCII) 108 | while input.read(6, buffer) 109 | map[buffer.to_i(16)] = input.read(2, cc_buffer).to_i(16) 110 | end 111 | end 112 | } 113 | end 114 | 115 | # Read a map whose keys are code points (6 hexgdigits, converted to 116 | # integer) and whose values are single hexdigits (converted to 117 | # integer). 118 | def self.read_hexdigit_map(filename) 119 | Hash.new.tap { |map| 120 | open_cdata_file(filename) do |input| 121 | buffer = "x" * 6 122 | buffer.force_encoding(Encoding::US_ASCII) 123 | val_buffer = "x" 124 | val_buffer.force_encoding(Encoding::US_ASCII) 125 | while input.read(6, buffer) 126 | map[buffer.to_i(16)] = input.read(1, val_buffer).to_i(16) 127 | end 128 | end 129 | } 130 | end 131 | 132 | # Returns a list (array) of pairs (two element Arrays) of Range 133 | # (code points) and associated integer value. 134 | def self.read_range_to_hexdigit_list(filename) 135 | Array.new.tap { |list| 136 | open_cdata_file(filename) do |input| 137 | cp_buffer = "x" * 6 138 | cp_buffer.force_encoding(Encoding::US_ASCII) 139 | val_buffer = "x" 140 | val_buffer.force_encoding(Encoding::US_ASCII) 141 | while input.read(6, cp_buffer) 142 | list << [ 143 | Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)), 144 | input.read(1, val_buffer).to_i(16) 145 | ] 146 | end 147 | end 148 | } 149 | end 150 | 151 | def self.read_east_asian_width_per_cp(filename) 152 | # like read_hexdigit_map, but with translation to symbol values 153 | Hash.new(:Neutral).tap { |map| 154 | open_cdata_file(filename) do |input| 155 | buffer = "x" * 6 156 | buffer.force_encoding(Encoding::US_ASCII) 157 | val_buffer = "x" 158 | val_buffer.force_encoding(Encoding::US_ASCII) 159 | while input.read(6, buffer) 160 | map[buffer.to_i(16)] = 161 | EAST_ASIAN_WIDTH_SYMBOL_MAP[input.read(1, val_buffer).to_i(16)] 162 | end 163 | end 164 | } 165 | end 166 | 167 | def self.read_east_asian_width_ranges(filename) 168 | read_range_to_hexdigit_list(filename).tap { |list| 169 | list.each { |pair| 170 | pair[1] = EAST_ASIAN_WIDTH_SYMBOL_MAP[pair[1]] 171 | } 172 | } 173 | end 174 | 175 | def self.read_general_category_per_cp(filename) 176 | Hash.new.tap { |map| 177 | open_cdata_file(filename) do |input| 178 | cp_buffer = "x" * 6 179 | cp_buffer.force_encoding(Encoding::US_ASCII) 180 | cat_buffer = "x" * 2 181 | cat_buffer.force_encoding(Encoding::US_ASCII) 182 | while input.read(6, cp_buffer) 183 | map[cp_buffer.to_i(16)] = input.read(2, cat_buffer).to_sym 184 | end 185 | end 186 | } 187 | end 188 | 189 | def self.read_general_category_ranges(filename) 190 | Array.new.tap { |list| 191 | open_cdata_file(filename) do |input| 192 | cp_buffer = "x" * 6 193 | cp_buffer.force_encoding(Encoding::US_ASCII) 194 | cat_buffer = "x" * 2 195 | cat_buffer.force_encoding(Encoding::US_ASCII) 196 | while input.read(6, cp_buffer) 197 | list << [ 198 | Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)), 199 | input.read(2, cat_buffer).to_sym 200 | ] 201 | end 202 | end 203 | } 204 | end 205 | 206 | def self.read_symbol_map(filename) 207 | Hash.new.tap { |map| 208 | open_cdata_file(filename) do |input| 209 | input.each_line { |line| 210 | parts = line.split(";") 211 | parts[0].strip! 212 | parts[1].strip! 213 | map[parts[0].to_sym] = parts[1].to_sym 214 | } 215 | end 216 | } 217 | end 218 | 219 | def self.read_name_aliases(filename) 220 | Hash.new.tap { |map| 221 | open_cdata_file(filename) do |input| 222 | cp_buffer = "x" * 6 223 | cp_buffer.force_encoding(Encoding::US_ASCII) 224 | ac_buffer = "x" * 1 225 | ac_buffer.force_encoding(Encoding::US_ASCII) 226 | at_buffer = "x" * 1 227 | at_buffer.force_encoding(Encoding::US_ASCII) 228 | al_buffer = "x" * 2 229 | al_buffer.force_encoding(Encoding::US_ASCII) 230 | while input.read(6, cp_buffer) 231 | aliases = Array.new(input.read(1, ac_buffer).to_i(16)) 232 | 0.upto(aliases.length - 1) { |i| 233 | type = NAME_ALIAS_TYPE_TO_SYMBOL_MAP[input.read(1, at_buffer).to_i(16)] 234 | name = input.read(input.read(2, al_buffer).to_i(16)) 235 | aliases[i] = NameAlias.new(name.freeze, type) 236 | } 237 | map[cp_buffer.to_i(16)] = aliases.freeze 238 | end 239 | end 240 | } 241 | end 242 | 243 | end 244 | 245 | end 246 | -------------------------------------------------------------------------------- /lib/unicode_utils/sid.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/name_aliases" 4 | require "unicode_utils/code_point_type" 5 | 6 | module UnicodeUtils 7 | 8 | CP_PREFERRED_ALIAS_STRING_MAP = Hash.new.tap do |map| 9 | NAME_ALIASES_MAP.each { |cp, aliases| 10 | al = 11 | (aliases.find { |al| al.type == :correction } || 12 | aliases.find { |al| al.type == :control } || 13 | aliases.find { |al| al.type == :figment } || 14 | aliases.find { |al| al.type == :alternate }) 15 | map[cp] = al.name if al 16 | } 17 | end #:nodoc: 18 | 19 | # Returns a unique string identifier for every code point. Returns 20 | # nil if +code_point+ is not in the Unicode codespace. +code_point+ 21 | # must be an Integer. 22 | # 23 | # The returned string identifier is either the non-empty Name 24 | # property value of +code_point+, a non-empty Name_Alias string 25 | # property value of +code_point+, or the code point label as 26 | # described by section "Code Point Labels" in chapter 4.8 "Name" of 27 | # the Unicode standard. 28 | # 29 | # If the returned identifier starts with "<", it is a code point 30 | # label and it ends with ">". Otherwise it is the normative name or 31 | # a formal alias string. 32 | # 33 | # The exact name/alias/label selection algorithm may change even in 34 | # minor UnicodeUtils releases, but overall behaviour will stay the 35 | # same in spirit. 36 | # 37 | # The selection process in this version of UnicodeUtils is: 38 | # 1. Use an alias of type :correction, :control, :figment or 39 | # :alternate (with listed precendence) if available 40 | # 2. Use the Unicode Name property value if it is not empty 41 | # 3. Construct a code point label in angle brackets. 42 | # 43 | # Examples: 44 | # 45 | # require "unicode_utils/sid" 46 | # 47 | # U.sid 0xa # => "LINE FEED" 48 | # U.sid 0x0 # => "NULL" 49 | # U.sid 0xfeff # => "BYTE ORDER MARK" 50 | # U.sid 0xe000 # => "" 51 | # U.sid 0x61 # => "LATIN SMALL LETTER A" 52 | # U.sid -1 # => nil 53 | def sid(code_point) 54 | s = CP_PREFERRED_ALIAS_STRING_MAP[code_point] and return s 55 | cn = UnicodeUtils.char_name(code_point) 56 | return cn if cn && cn !~ /\A(\<|\z)/ 57 | ct = UnicodeUtils.code_point_type(code_point) or return nil 58 | ts = ct.to_s.downcase.gsub('_', '-') 59 | "<#{ts}-#{code_point.to_s(16).upcase.rjust(4, '0')}>" 60 | end 61 | module_function :sid 62 | 63 | end 64 | -------------------------------------------------------------------------------- /lib/unicode_utils/simple_casefold.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | CASEFOLD_C_MAP = Impl.read_code_point_map("casefold_c_map") # :nodoc: 8 | 9 | CASEFOLD_S_MAP = Impl.read_code_point_map("casefold_s_map") # :nodoc: 10 | 11 | # Perform simple case folding. Contrary to full case folding, this 12 | # uses only one to one mappings, so that the length of the returned 13 | # string is equal to the length of +str+. 14 | # 15 | # The purpose of case folding is case insensitive string comparison. 16 | # 17 | # Examples: 18 | # 19 | # require "unicode_utils/simple_casefold" 20 | # UnicodeUtils.simple_casefold("Ümit") == UnicodeUtils.simple_casefold("ümit") => true 21 | # UnicodeUtils.simple_casefold("WEISS") == UnicodeUtils.simple_casefold("weiß") => false 22 | # 23 | # See also: UnicodeUtils.casefold 24 | def simple_casefold(str) 25 | String.new.force_encoding(str.encoding).tap do |res| 26 | str.each_codepoint { |cp| 27 | res << (CASEFOLD_C_MAP[cp] || CASEFOLD_S_MAP[cp] || cp) 28 | } 29 | end 30 | end 31 | module_function :simple_casefold 32 | 33 | end 34 | -------------------------------------------------------------------------------- /lib/unicode_utils/simple_downcase.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | SIMPLE_DOWNCASE_MAP = Impl.read_code_point_map("simple_lc_map") # :nodoc: 8 | 9 | # Map each code point in +str+ that has a single code point 10 | # lowercase-mapping to that lowercase mapping. The returned string 11 | # has the same length as the original string. 12 | # 13 | # This function is locale independent. 14 | # 15 | # Examples: 16 | # 17 | # require "unicode_utils/simple_downcase" 18 | # UnicodeUtils.simple_downcase("ÜMIT: 123") => "ümit: 123" 19 | # UnicodeUtils.simple_downcase("STRASSE") => "strasse" 20 | def simple_downcase(str) 21 | String.new.force_encoding(str.encoding).tap { |res| 22 | str.each_codepoint { |cp| 23 | res << (SIMPLE_DOWNCASE_MAP[cp] || cp) 24 | } 25 | } 26 | end 27 | module_function :simple_downcase 28 | 29 | end 30 | -------------------------------------------------------------------------------- /lib/unicode_utils/simple_upcase.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | SIMPLE_UPCASE_MAP = Impl.read_code_point_map("simple_uc_map") # :nodoc: 8 | 9 | # Map each code point in +str+ that has a single code point 10 | # uppercase-mapping to that uppercase mapping. The returned string 11 | # has the same length as the original string. 12 | # 13 | # This function is locale independent. 14 | # 15 | # Examples: 16 | # 17 | # require "unicode_utils/simple_upcase" 18 | # UnicodeUtils.simple_upcase("ümit: 123") => "ÜMIT: 123" 19 | # UnicodeUtils.simple_upcase("weiß") => "WEIß" 20 | def simple_upcase(str) 21 | String.new.force_encoding(str.encoding).tap { |res| 22 | str.each_codepoint { |cp| 23 | res << (SIMPLE_UPCASE_MAP[cp] || cp) 24 | } 25 | } 26 | end 27 | module_function :simple_upcase 28 | 29 | end 30 | -------------------------------------------------------------------------------- /lib/unicode_utils/soft_dotted_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | SOFT_DOTTED_SET = Impl.read_code_point_set("soft_dotted_set") # :nodoc: 8 | 9 | # Returns true if the given character has the Unicode property 10 | # Soft_Dotted. 11 | def soft_dotted_char?(char) 12 | SOFT_DOTTED_SET.include?(char.ord) 13 | end 14 | module_function :soft_dotted_char? 15 | 16 | end 17 | -------------------------------------------------------------------------------- /lib/unicode_utils/titlecase.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/conditional_casing" 5 | require "unicode_utils/each_word" 6 | require "unicode_utils/cased_char_q" 7 | require "unicode_utils/downcase" 8 | 9 | module UnicodeUtils 10 | 11 | SIMPLE_TITLECASE_MAP = Impl.read_code_point_map("simple_tc_map") # :nodoc: 12 | SPECIAL_TITLECASE_MAP = Impl.read_multivalued_map("special_tc_map") # :nodoc: 13 | 14 | # Convert the first cased character after each word boundary to 15 | # titlecase and all other cased characters to lowercase. For many, 16 | # but not all characters, the titlecase mapping is the same as the 17 | # uppercase mapping. 18 | # 19 | # Some conversion rules are language dependent, these are in effect 20 | # when a non-nil +language_id+ is given. If non-nil, the 21 | # +language_id+ must be a two letter language code as defined in BCP 22 | # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a 23 | # language doesn't have a two letter code, the three letter code is 24 | # to be used. If locale independent behaviour is required, +nil+ 25 | # should be passed explicitely, because a later version of 26 | # UnicodeUtils may default to something else. 27 | # 28 | # Example: 29 | # 30 | # require "unicode_utils/titlecase" 31 | # UnicodeUtils.titlecase("hello, world!") => "Hello, World!" 32 | def titlecase(str, language_id = nil) 33 | String.new.force_encoding(str.encoding).tap do |res| 34 | # ensure O(1) lookup by index 35 | str = str.encode(Encoding::UTF_32LE) 36 | i = 0 37 | each_word(str) { |word| 38 | cased_char_found = false 39 | word.each_codepoint { |cp| 40 | cased = cased_char?(cp) 41 | if !cased_char_found && cased 42 | cased_char_found = true 43 | special_mapping = 44 | Impl.conditional_titlecase_mapping(cp, str, i, language_id) || 45 | SPECIAL_TITLECASE_MAP[cp] 46 | if special_mapping 47 | special_mapping.each { |m| res << m } 48 | else 49 | res << (SIMPLE_TITLECASE_MAP[cp] || cp) 50 | end 51 | elsif cased 52 | special_mapping = 53 | Impl.conditional_downcase_mapping(cp, str, i, language_id) || 54 | SPECIAL_DOWNCASE_MAP[cp] 55 | if special_mapping 56 | special_mapping.each { |m| res << m } 57 | else 58 | res << (SIMPLE_DOWNCASE_MAP[cp] || cp) 59 | end 60 | else 61 | res << cp 62 | end 63 | i += 1 64 | } 65 | } 66 | end 67 | end 68 | module_function :titlecase 69 | 70 | end 71 | -------------------------------------------------------------------------------- /lib/unicode_utils/titlecase_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | TITLECASE_LETTER_SET = Impl.read_code_point_set("cat_set_titlecase") # :nodoc: 8 | 9 | # True if the given character has the General_Category 10 | # Titlecase_Letter (Lt). 11 | def titlecase_char?(char) 12 | TITLECASE_LETTER_SET.include?(char.ord) 13 | end 14 | module_function :titlecase_char? 15 | 16 | end 17 | -------------------------------------------------------------------------------- /lib/unicode_utils/u.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils" 4 | 5 | # Shortcut for usage in irb. This shortcut is only defined when 6 | # unicode_utils/u is explicitly required. It is intended for 7 | # interactive use only! 8 | # 9 | # $ irb -r unicode_utils/u 10 | # irb(main):001:0> U.grep(/angstrom/) 11 | # => [#] 12 | U = UnicodeUtils 13 | -------------------------------------------------------------------------------- /lib/unicode_utils/upcase.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | require "unicode_utils/simple_upcase" 5 | require "unicode_utils/conditional_casing" 6 | 7 | module UnicodeUtils 8 | 9 | SPECIAL_UPCASE_MAP = Impl.read_multivalued_map("special_uc_map") # :nodoc: 10 | 11 | # Perform a full case-conversion of +str+ to uppercase according to 12 | # the Unicode standard. 13 | # 14 | # Some conversion rules are language dependent, these are in effect 15 | # when a non-nil +language_id+ is given. If non-nil, the 16 | # +language_id+ must be a two letter language code as defined in BCP 17 | # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a 18 | # language doesn't have a two letter code, the three letter code is 19 | # to be used. If locale independent behaviour is required, +nil+ 20 | # should be passed explicitely, because a later version of 21 | # UnicodeUtils may default to something else. 22 | # 23 | # Examples: 24 | # 25 | # require "unicode_utils/upcase" 26 | # UnicodeUtils.upcase("weiß") => "WEISS" 27 | # UnicodeUtils.upcase("i", :en) => "I" 28 | # UnicodeUtils.upcase("i", :tr) => "İ" 29 | def upcase(str, language_id = nil) 30 | String.new.force_encoding(str.encoding).tap { |res| 31 | if Impl::LANGS_WITH_RULES.include?(language_id) 32 | # ensure O(1) lookup by index 33 | str = str.encode(Encoding::UTF_32LE) 34 | end 35 | pos = 0 36 | str.each_codepoint { |cp| 37 | special_mapping = 38 | Impl.conditional_upcase_mapping(cp, str, pos, language_id) || 39 | SPECIAL_UPCASE_MAP[cp] 40 | if special_mapping 41 | special_mapping.each { |m| res << m } 42 | else 43 | res << (SIMPLE_UPCASE_MAP[cp] || cp) 44 | end 45 | pos += 1 46 | } 47 | } 48 | end 49 | module_function :upcase 50 | 51 | end 52 | -------------------------------------------------------------------------------- /lib/unicode_utils/uppercase_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | PROP_UPPERCASE_SET = Impl.read_code_point_set("prop_set_uppercase") # :nodoc: 8 | 9 | # True if the given character has the Unicode property Uppercase. 10 | def uppercase_char?(char) 11 | PROP_UPPERCASE_SET.include?(char.ord) 12 | end 13 | module_function :uppercase_char? 14 | 15 | end 16 | -------------------------------------------------------------------------------- /lib/unicode_utils/version.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | module UnicodeUtils 4 | 5 | # Corresponds to the unicode_utils gem version. 6 | # 7 | # Conforms to Semantic Versioning as documented at semver.org. 8 | # 9 | # Summary: 10 | # MAJOR.MINOR.PATCHLEVEL 11 | # - A backwards incompatible change causes a change in MAJOR 12 | # - New features or non-bugfix improvals cause a change in MINOR 13 | # - Bugfixes increase only PATCHLEVEL. 14 | # - Pre-release versions append more info after a dash. 15 | VERSION = "1.4.0" 16 | 17 | # The version of Unicode implemented by this version of UnicodeUtils. 18 | # 19 | # require "unicode_utils/version" 20 | # puts "Unicode #{UnicodeUtils::UNICODE_VERSION}" 21 | UNICODE_VERSION = "6.2.0" 22 | 23 | end 24 | -------------------------------------------------------------------------------- /lib/unicode_utils/white_space_char_q.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "unicode_utils/read_cdata" 4 | 5 | module UnicodeUtils 6 | 7 | WHITE_SPACE_SET = Impl.read_code_point_set("white_space_set") # :nodoc: 8 | 9 | # True if the given character has the Unicode property White_Space. 10 | # 11 | # Example: 12 | # 13 | # require "unicode_utils/general_category" 14 | # require "unicode_utils/white_space_char_q" 15 | # 16 | # UnicodeUtils.general_category("\n") => :Control 17 | # UnicodeUtils.white_space_char?("\n") => true 18 | def white_space_char?(char) 19 | WHITE_SPACE_SET.include?(char.ord) 20 | end 21 | module_function :white_space_char? 22 | 23 | end 24 | -------------------------------------------------------------------------------- /test/coverage.rb: -------------------------------------------------------------------------------- 1 | require "simplecov" 2 | SimpleCov.start 3 | 4 | require_relative "suite.rb" 5 | -------------------------------------------------------------------------------- /test/suite.rb: -------------------------------------------------------------------------------- 1 | Dir["#{File.dirname __FILE__}/test_*.rb"].each { |fn| 2 | require_relative File.basename(fn) 3 | } 4 | -------------------------------------------------------------------------------- /test/test_case_mappings.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils/upcase" 6 | require "unicode_utils/downcase" 7 | require "unicode_utils/casefold" 8 | 9 | class TestCaseMappings < Test::Unit::TestCase 10 | 11 | TXT_DIR = File.dirname(__FILE__) 12 | 13 | def read_txt(filename) 14 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-") 15 | end 16 | 17 | def test_upcase_german_text 18 | assert_equal read_txt("dreilaendereck_uc.txt"), 19 | UnicodeUtils.upcase(read_txt("dreilaendereck.txt")) 20 | end 21 | 22 | def test_upcase_german_text_language_de 23 | assert_equal read_txt("dreilaendereck_uc.txt"), 24 | UnicodeUtils.upcase(read_txt("dreilaendereck.txt"), :de) 25 | end 26 | 27 | def test_upcase_german_text_language_tr 28 | assert_not_equal read_txt("dreilaendereck_uc.txt"), 29 | UnicodeUtils.upcase(read_txt("dreilaendereck.txt"), :tr) 30 | end 31 | 32 | def test_downcase_german_text 33 | assert_equal read_txt("dreilaendereck_lc.txt"), 34 | UnicodeUtils.downcase(read_txt("dreilaendereck.txt")) 35 | end 36 | 37 | def test_downcase_german_text_language_de 38 | assert_equal read_txt("dreilaendereck_lc.txt"), 39 | UnicodeUtils.downcase(read_txt("dreilaendereck.txt"), :de) 40 | end 41 | 42 | def test_downcase_german_text_language_tr 43 | assert_not_equal read_txt("dreilaendereck_lc.txt"), 44 | UnicodeUtils.downcase(read_txt("dreilaendereck.txt"), :tr) 45 | end 46 | 47 | def test_casefold_german_text 48 | assert_equal read_txt("dreilaendereck_cf.txt"), 49 | UnicodeUtils.casefold(read_txt("dreilaendereck.txt")) 50 | end 51 | 52 | end 53 | -------------------------------------------------------------------------------- /test/test_codepoint.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils/codepoint" 6 | 7 | class TestCodepoint < Test::Unit::TestCase 8 | 9 | def test_ord 10 | assert_equal 0x20ac, UnicodeUtils::Codepoint.new(0x20ac).ord 11 | end 12 | 13 | def test_uplus 14 | assert_equal "U+20AC", UnicodeUtils::Codepoint.new(0x20ac).uplus 15 | end 16 | 17 | def test_uplus_more_than_four_digits 18 | assert_equal "U+10FFFF", UnicodeUtils::Codepoint.new(0x10FFFF).uplus 19 | end 20 | 21 | def test_uplus_less_than_four_digits 22 | assert_equal "U+0061", UnicodeUtils::Codepoint.new(0x61).uplus 23 | end 24 | 25 | def test_name 26 | assert_equal "EURO SIGN", UnicodeUtils::Codepoint.new(0x20ac).name 27 | end 28 | 29 | def test_to_s 30 | assert_equal 0x20ac.chr(Encoding::UTF_8), UnicodeUtils::Codepoint.new(0x20ac).to_s 31 | end 32 | 33 | def test_hexbytes 34 | assert_equal "e2,82,ac", UnicodeUtils::Codepoint.new(0x20ac).hexbytes 35 | end 36 | 37 | def test_hexbytes_one_byte 38 | assert_equal "61", UnicodeUtils::Codepoint.new(0x61).hexbytes 39 | end 40 | 41 | def test_inspect 42 | str = UnicodeUtils::Codepoint.new(0x20ac).inspect 43 | assert str.include?("U+") 44 | assert str.include?("€") 45 | assert str.include?("EURO SIGN") 46 | assert str.include?("utf8:e2,82,ac") 47 | end 48 | 49 | end 50 | -------------------------------------------------------------------------------- /test/test_each_grapheme.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils/each_grapheme" 6 | 7 | class TestEachGrapheme < Test::Unit::TestCase 8 | 9 | UNPAIRED_D800 = [0xd800] 10 | 11 | def each_grapheme_list(encoding = 'utf-8') 12 | count = 0 13 | skip_count = 0 14 | fn = File.join(File.dirname(__FILE__), 15 | "..", "data", "GraphemeBreakTest.txt") 16 | File.open(fn, "r:utf-8:-") do |input| 17 | input.each_line { |line| 18 | has_unpaired_surrogate = false 19 | if line =~ /^([^#]*)#/ 20 | line = $1 21 | end 22 | line.strip! 23 | next if line.empty? 24 | count += 1 25 | graphemes = line.split("÷").map(&:strip).delete_if(&:empty?) 26 | graphemes.map! { |g| 27 | cps = g.split("×").map(&:strip).delete_if(&:empty?).map { |c| c.to_i(16) } 28 | if cps == UNPAIRED_D800 29 | has_unpaired_surrogate = true 30 | skip_count += 1 31 | break 32 | end 33 | cps.inject(String.new.force_encoding(encoding), &:<<) 34 | } 35 | # Unpaired surrogates are not allowed in UTF-8 36 | # GraphemeBreakTest has test cases with unpaired surrogates 37 | yield graphemes unless has_unpaired_surrogate 38 | } 39 | end 40 | #print "\nSkipped #{skip_count} out of #{count} grapheme tests due to surrogates\n" 41 | end 42 | 43 | def test_each_grapheme_utf8 44 | c = 0 45 | each_grapheme_list { |grapheme_list| 46 | c += 1 47 | graphemes = [] 48 | UnicodeUtils.each_grapheme(grapheme_list.join) { |g| graphemes << g } 49 | assert_equal grapheme_list, graphemes 50 | } 51 | assert_equal 348, c 52 | end 53 | 54 | def test_each_grapheme_utf16 55 | c = 0 56 | each_grapheme_list('utf-16le') { |grapheme_list| 57 | c += 1 58 | graphemes = [] 59 | UnicodeUtils.each_grapheme(grapheme_list.join) { |g| graphemes << g } 60 | assert_equal grapheme_list, graphemes 61 | } 62 | # TODO: currently we skip the unpaired surrogates for UTF-16 also, 63 | # because current Ruby implementations raise an exception in 64 | # each_codepoint. Review this point with future implementations. 65 | assert_equal 348, c 66 | end 67 | 68 | end 69 | -------------------------------------------------------------------------------- /test/test_each_word.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils/each_word" 6 | 7 | class TestEachWord < Test::Unit::TestCase 8 | 9 | def each_word_list 10 | fn = File.join(File.dirname(__FILE__), 11 | "..", "data", "WordBreakTest.txt") 12 | File.open(fn, "r:utf-8:-") do |input| 13 | input.each_line { |line| 14 | if line =~ /^([^#]*)#/ 15 | line = $1 16 | end 17 | line.strip! 18 | next if line.empty? 19 | words = line.split("÷").map(&:strip).delete_if(&:empty?) 20 | words.map! { |w| 21 | cps = w.split("×").map(&:strip).delete_if(&:empty?).map { |c| c.to_i(16) } 22 | cps.inject(String.new.force_encoding('utf-8'), &:<<) 23 | } 24 | yield words 25 | } 26 | end 27 | end 28 | 29 | def test_each_word 30 | c = 0 31 | each_word_list { |word_list| 32 | words = UnicodeUtils.each_word(word_list.join).to_a 33 | assert_equal word_list, words 34 | } 35 | end 36 | 37 | end 38 | -------------------------------------------------------------------------------- /test/test_grep.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils/grep" 6 | 7 | class TestGrep < Test::Unit::TestCase 8 | 9 | def test_angstrom 10 | assert_equal [0x212b], UnicodeUtils.grep(/angstrom/).map(&:ord) 11 | end 12 | 13 | end 14 | -------------------------------------------------------------------------------- /test/test_normalization.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils/nfd" 6 | require "unicode_utils/nfc" 7 | 8 | # See data/NormalizationTest.txt 9 | class TestNormalization < Test::Unit::TestCase 10 | 11 | class Record 12 | def initialize(ary) 13 | @ary = ary 14 | end 15 | def c1 16 | @ary[0] 17 | end 18 | def c2 19 | @ary[1] 20 | end 21 | def c3 22 | @ary[2] 23 | end 24 | def c4 25 | @ary[3] 26 | end 27 | def c5 28 | @ary[4] 29 | end 30 | end 31 | 32 | def each_testdata_record 33 | fn = File.join(File.dirname(__FILE__), 34 | "..", "data", "NormalizationTest.txt") 35 | File.open(fn, "r:utf-8:-") do |input| 36 | input.each_line { |line| 37 | if line =~ /^([^#]*)#/ 38 | line = $1 39 | end 40 | line.strip! 41 | next if line.empty? || line =~ /^@Part/ 42 | columns = line.split(";") 43 | ary = columns.map { |column| 44 | String.new.force_encoding(Encoding::UTF_8).tap do |str| 45 | column.split(" ").each { |c| 46 | str << c.strip.to_i(16) 47 | } 48 | end 49 | } 50 | yield Record.new(ary) 51 | } 52 | end 53 | end 54 | 55 | def test_nfd 56 | each_testdata_record { |r| 57 | assert_equal r.c3, UnicodeUtils.nfd(r.c1) 58 | assert_equal r.c3, UnicodeUtils.nfd(r.c2) 59 | assert_equal r.c3, UnicodeUtils.nfd(r.c3) 60 | assert_equal r.c5, UnicodeUtils.nfd(r.c4) 61 | assert_equal r.c5, UnicodeUtils.nfd(r.c5) 62 | } 63 | end 64 | 65 | def test_nfc 66 | each_testdata_record { |r| 67 | assert_equal r.c2, UnicodeUtils.nfc(r.c1) 68 | assert_equal r.c2, UnicodeUtils.nfc(r.c2) 69 | assert_equal r.c2, UnicodeUtils.nfc(r.c3) 70 | assert_equal r.c4, UnicodeUtils.nfc(r.c4) 71 | assert_equal r.c4, UnicodeUtils.nfc(r.c5) 72 | } 73 | end 74 | 75 | def test_nfkd 76 | each_testdata_record { |r| 77 | assert_equal r.c5, UnicodeUtils.nfkd(r.c1) 78 | assert_equal r.c5, UnicodeUtils.nfkd(r.c2) 79 | assert_equal r.c5, UnicodeUtils.nfkd(r.c3) 80 | assert_equal r.c5, UnicodeUtils.nfkd(r.c4) 81 | assert_equal r.c5, UnicodeUtils.nfkd(r.c5) 82 | } 83 | end 84 | 85 | def test_nfkc 86 | each_testdata_record { |r| 87 | assert_equal r.c4, UnicodeUtils.nfkc(r.c1) 88 | assert_equal r.c4, UnicodeUtils.nfkc(r.c2) 89 | assert_equal r.c4, UnicodeUtils.nfkc(r.c3) 90 | assert_equal r.c4, UnicodeUtils.nfkc(r.c4) 91 | assert_equal r.c4, UnicodeUtils.nfkc(r.c5) 92 | } 93 | end 94 | 95 | end 96 | -------------------------------------------------------------------------------- /test/test_unicode_6_0_0.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils" 6 | 7 | # Tests behaviour in Unicode 6.0.0 that wasn't in the previously 8 | # supported standard. That means each one of these assertions fails 9 | # with UnicodeUtils 1.0.0. 10 | class TestUnicode_6_0_0 < Test::Unit::TestCase 11 | 12 | def test_char_name 13 | assert_equal "CYRILLIC CAPITAL LETTER PE WITH DESCENDER", 14 | UnicodeUtils.char_name("\u{524}") 15 | assert_equal "SAMARITAN LETTER QUF", 16 | UnicodeUtils.char_name("\u{812}") 17 | assert_equal "TIBETAN SUBJOINED SIGN INVERTED MCHU CAN", 18 | UnicodeUtils.char_name("\u{F8F}") 19 | assert_equal "CANADIAN SYLLABICS TLHWE", 20 | UnicodeUtils.char_name("\u{18E8}") 21 | assert_equal "EGYPTIAN HIEROGLYPH F040", 22 | UnicodeUtils.char_name("\u{1312B}") 23 | assert_equal "STEAMING BOWL", 24 | UnicodeUtils.char_name("\u{1F35C}") 25 | assert_equal "HANGUL JUNGSEONG ARAEA-A", 26 | UnicodeUtils.char_name("\u{d7c5}") 27 | assert_equal "CJK UNIFIED IDEOGRAPH-2A700", 28 | UnicodeUtils.char_name("\u{2a700}") 29 | assert_equal "CJK UNIFIED IDEOGRAPH-2B81D", 30 | UnicodeUtils.char_name("\u{2b81d}") 31 | end 32 | 33 | def test_grep 34 | assert_equal [0x1F35C], UnicodeUtils.grep(/Steaming Bowl/).map(&:ord) 35 | end 36 | 37 | def test_simple_upcase 38 | assert_equal "\u{2c7e}", UnicodeUtils.simple_upcase("\u{23f}") 39 | end 40 | 41 | def test_simple_downcase 42 | assert_equal "\u{23f}", UnicodeUtils.simple_downcase("\u{2c7e}") 43 | end 44 | 45 | end 46 | -------------------------------------------------------------------------------- /test/test_unicode_6_1_0.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils" 6 | 7 | # Tests behaviour in Unicode 6.0.1 that wasn't in the previously 8 | # supported standard. That means each one of these assertions fails 9 | # with UnicodeUtils 1.0.2. 10 | class TestUnicode_6_0_1 < Test::Unit::TestCase 11 | 12 | def test_gc 13 | assert_equal :Po, UnicodeUtils.gc(0xa7) 14 | assert_equal :Po, UnicodeUtils.gc(0xb6) 15 | assert_equal :Po, UnicodeUtils.gc(0xf14) 16 | assert_equal :Po, UnicodeUtils.gc(0x1360) 17 | assert_equal :Po, UnicodeUtils.gc(0x10102) 18 | 0x3248.upto(0x324F) { |cp| 19 | assert_equal :No, UnicodeUtils.gc(cp) 20 | } 21 | end 22 | 23 | def test_char_name 24 | assert_equal "CJK UNIFIED IDEOGRAPH-9FCC", UnicodeUtils.char_name(0x9fcc) 25 | assert_equal "ARABIC LETTER BEH WITH SMALL V BELOW", UnicodeUtils.char_name(0x8a0) 26 | assert_equal "SLEEPING FACE", UnicodeUtils.char_name(0x1f634) 27 | end 28 | 29 | def test_canonical_decomposition 30 | assert_equal "\u{11131}\u{11127}", UnicodeUtils.canonical_decomposition("\u{1112e}") 31 | assert_equal "\u{11132}\u{11127}", UnicodeUtils.canonical_decomposition("\u{1112f}") 32 | end 33 | 34 | def test_nfd 35 | assert_equal "\u{11131}\u{11127}", UnicodeUtils.nfd("\u{1112e}") 36 | assert_equal "\u{11132}\u{11127}", UnicodeUtils.nfd("\u{1112f}") 37 | end 38 | 39 | def test_nfc 40 | assert_equal "\u{1112e}", UnicodeUtils.nfc("\u{11131}\u{11127}") 41 | assert_equal "\u{1112f}", UnicodeUtils.nfc("\u{11132}\u{11127}") 42 | end 43 | 44 | def test_casefold 45 | assert_equal "\u{2d2d}", UnicodeUtils.casefold("\u{10cd}") 46 | assert_equal "\u{a793}", UnicodeUtils.casefold("\u{a792}") 47 | end 48 | 49 | def test_combining_class 50 | assert_equal 7, UnicodeUtils.combining_class(0x116b7) 51 | end 52 | 53 | def test_lowercase_char? 54 | assert_equal true, UnicodeUtils.lowercase_char?(0x2071) 55 | end 56 | 57 | end 58 | -------------------------------------------------------------------------------- /test/test_unicode_6_2_0.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "test/unit" 4 | 5 | require "unicode_utils" 6 | 7 | # Tests behaviour in Unicode 6.2.0 that wasn't in the previously 8 | # supported standard. That means each one of these tests fails 9 | # with UnicodeUtils 1.3.0. 10 | class TestUnicode_6_2_0 < Test::Unit::TestCase 11 | 12 | def test_east_asian_width 13 | assert_equal :Neutral, UnicodeUtils.east_asian_width(0x11a3) 14 | end 15 | 16 | def test_display_width 17 | assert_equal 1, UnicodeUtils.display_width("\u{11a3}") 18 | end 19 | 20 | def test_char_display_width 21 | assert_equal 1, UnicodeUtils.char_display_width(0x11a3) 22 | end 23 | 24 | def test_each_grapheme 25 | # don't break between regional indicator symbols 26 | assert_equal ["\u{1F1E6}\u{1F1E7}"], 27 | UnicodeUtils.each_grapheme("\u{1F1E6}\u{1F1E7}").to_a 28 | end 29 | 30 | def test_sid 31 | # name alias of type correction introduced 32 | assert_equal "SYRIAC SUBLINEAR COLON SKEWED LEFT", UnicodeUtils.sid(0x709) 33 | end 34 | 35 | def test_char_name 36 | assert_equal "TURKISH LIRA SIGN", UnicodeUtils.char_name(0x20ba) 37 | end 38 | 39 | def test_general_category 40 | assert_equal :Currency_Symbol, UnicodeUtils.general_category(0x20ba) 41 | end 42 | 43 | def test_each_word 44 | # don't break between regional indicator symbols 45 | assert_equal ["foo", "\u{1F1E6}\u{1F1E7}", "bar"], 46 | UnicodeUtils.each_word("foo\u{1F1E6}\u{1F1E7}bar").to_a 47 | end 48 | 49 | end 50 | -------------------------------------------------------------------------------- /unicode_utils.gemspec: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "#{File.dirname(__FILE__)}/lib/unicode_utils/version" 4 | 5 | test_files = ["test/test_unicode_utils.rb"] 6 | files = 7 | Dir["lib/**/*.rb"] + Dir["cdata/*"] + test_files + 8 | ["README.rdoc", "INSTALL.txt", "LICENSE.txt", "CHANGES.txt"] 9 | files.reject! { |fn| fn.end_with?("~") } 10 | 11 | Gem::Specification.new do |g| 12 | g.name = "unicode_utils" 13 | g.version = UnicodeUtils::VERSION 14 | g.platform = Gem::Platform::RUBY 15 | g.summary = "additional Unicode aware functions for Ruby 1.9" 16 | g.require_paths = ["lib"] 17 | g.files = files 18 | g.test_files = test_files 19 | g.required_ruby_version = ">= 1.9.1" 20 | g.author = "Stefan Lang" 21 | g.email = "langstefan@gmx.at" 22 | g.has_rdoc = true 23 | g.extra_rdoc_files = ["README.rdoc", "INSTALL.txt", "CHANGES.txt"] 24 | g.rdoc_options = ["--main=README.rdoc", "--charset=UTF-8"] 25 | g.homepage = "http://github.com/lang/unicode_utils" 26 | g.rubyforge_project = "unicode-utils" 27 | end 28 | --------------------------------------------------------------------------------