├── .gitignore
├── CHANGES.txt
├── INSTALL.txt
├── ISSUES.txt
├── LICENSE.txt
├── README.rdoc
├── Rakefile
├── bench
├── case_mappings.rb
├── char_name.rb
├── each_grapheme.rb
├── each_word.rb
├── grep.rb
├── normalization.rb
└── titlecase.rb
├── cdata
├── canonical_decomposition_map
├── case_ignorable_set
├── casefold_c_map
├── casefold_f_map
├── casefold_s_map
├── cat_set_titlecase
├── combining_class_map
├── compatibility_decomposition_map
├── composition_exclusion_set
├── cond_lc_map
├── cond_tc_map
├── cond_uc_map
├── east_asian_width_property_per_cp
├── east_asian_width_property_ranges
├── general_category_aliases
├── general_category_per_cp
├── general_category_ranges
├── grapheme_break_property
├── jamo_short_names
├── name_aliases
├── names
├── prop_set_default_ignorable
├── prop_set_lowercase
├── prop_set_uppercase
├── simple_lc_map
├── simple_tc_map
├── simple_uc_map
├── soft_dotted_set
├── special_lc_map
├── special_tc_map
├── special_uc_map
├── white_space_set
└── word_break_property
├── data
├── CaseFolding.txt
├── DerivedCombiningClass.txt
├── DerivedCoreProperties.txt
├── DerivedNormalizationProps.txt
├── EastAsianWidth.txt
├── GraphemeBreakProperty.txt
├── GraphemeBreakTest.html
├── GraphemeBreakTest.txt
├── Jamo.txt
├── NameAliases.txt
├── NormalizationTest.txt
├── PropList.txt
├── PropertyValueAliases.txt
├── README.txt
├── SpecialCasing.txt
├── UnicodeData.txt
├── WordBreakProperty.txt
├── WordBreakTest.html
├── WordBreakTest.txt
├── compile.rb
└── tr44-10.html
├── install.rb
├── lib
├── unicode_utils.rb
└── unicode_utils
│ ├── canonical_decomposition.rb
│ ├── canonical_equivalents_q.rb
│ ├── case_ignorable_char_q.rb
│ ├── cased_char_q.rb
│ ├── casefold.rb
│ ├── char_display_width.rb
│ ├── char_name.rb
│ ├── char_type.rb
│ ├── code_point_type.rb
│ ├── codepoint.rb
│ ├── combining_class.rb
│ ├── compatibility_decomposition.rb
│ ├── conditional_casing.rb
│ ├── debug.rb
│ ├── default_ignorable_char_q.rb
│ ├── display_width.rb
│ ├── downcase.rb
│ ├── each_grapheme.rb
│ ├── each_word.rb
│ ├── east_asian_width.rb
│ ├── gc.rb
│ ├── general_category.rb
│ ├── graphic_char_q.rb
│ ├── grep.rb
│ ├── hangul_syllable_decomposition.rb
│ ├── jamo_short_name.rb
│ ├── lowercase_char_q.rb
│ ├── name_alias.rb
│ ├── name_aliases.rb
│ ├── nfc.rb
│ ├── nfd.rb
│ ├── nfkc.rb
│ ├── nfkd.rb
│ ├── read_cdata.rb
│ ├── sid.rb
│ ├── simple_casefold.rb
│ ├── simple_downcase.rb
│ ├── simple_upcase.rb
│ ├── soft_dotted_char_q.rb
│ ├── titlecase.rb
│ ├── titlecase_char_q.rb
│ ├── u.rb
│ ├── upcase.rb
│ ├── uppercase_char_q.rb
│ ├── version.rb
│ └── white_space_char_q.rb
├── test
├── coverage.rb
├── dreilaendereck.txt
├── dreilaendereck_cf.txt
├── dreilaendereck_lc.txt
├── dreilaendereck_uc.txt
├── suite.rb
├── test_case_mappings.rb
├── test_codepoint.rb
├── test_each_grapheme.rb
├── test_each_word.rb
├── test_grep.rb
├── test_normalization.rb
├── test_unicode_6_0_0.rb
├── test_unicode_6_1_0.rb
├── test_unicode_6_2_0.rb
└── test_unicode_utils.rb
└── unicode_utils.gemspec
/.gitignore:
--------------------------------------------------------------------------------
1 | tmp
2 | pkg
3 | doc
4 | Session.vim
5 | .*.swp
6 | *~
7 | .test-result
8 | /coverage
9 | /aux
10 |
--------------------------------------------------------------------------------
/CHANGES.txt:
--------------------------------------------------------------------------------
1 | == 1.4.0, 2012-09-30
2 |
3 | Updated to Unicode 6.2.0.
4 |
5 | * UnicodeUtils.debug accepts single Integer code point
6 |
7 | New methods in UnicodeUtils:
8 |
9 | * white_space_char?
10 |
11 | All tests pass with jruby-1.7.0.RC1. Not all tests pass with
12 | MRI 1.9.3p194 due to unexptected behaviour of String#<< with
13 | UTF-16 strings. As long as you use only UTF-8, there's no problem.
14 |
15 | == 1.3.0, 2012-03-07
16 |
17 | Updated to Unicode 6.1.0.
18 |
19 | New methods in UnicodeUtils:
20 |
21 | * code_point_type
22 | * name_aliases
23 | * sid (string identifier)
24 |
25 | New constants in UnicodeUtils:
26 |
27 | * UNICODE_VERSION
28 |
29 | == 1.2.2, 2011-11-27
30 |
31 | New methods in UnicodeUtils:
32 |
33 | * east_asian_width
34 | * display_width
35 | * default_ignorable_char_q
36 | * gc
37 | * graphic_char_q
38 | * general_category
39 | * char_type
40 | * char_display_width
41 | * debug
42 |
43 | == 1.1.2, 2011-11-18
44 |
45 | Updated to Unicode 6.0.0. No additions to API.
46 |
47 | == 1.0.0, 2009-01-30
48 |
49 | First release, conforms to Unicode 5.1.0.
50 |
--------------------------------------------------------------------------------
/INSTALL.txt:
--------------------------------------------------------------------------------
1 | == Installing UnicodeUtils
2 |
3 | The easiest way to install UnicodeUtils is with RubyGems:
4 |
5 | $ gem install unicode_utils
6 |
7 | === Manual installation
8 |
9 | Two kinds of files must be installed:
10 |
11 | 1. The library code. All files under lib/ and
12 | lib/unicode_utils/ with suffix .rb.
13 | The whole tree under lib/ must be on the load path.
14 |
15 | 2. The compiled Unicode data files under cdata/. UnicodeUtils
16 | loads them from the UnicodeUtils::CDATA_DIR directory,
17 | which is defined in read_cdata.rb.
18 |
19 | The best strategy is to copy the library files to Ruby's +sitelibdir+.
20 | You can get that by running:
21 |
22 | $ ruby -r rbconfig -e "puts Config::CONFIG['sitelibdir']"
23 |
24 | Then copy all files under cdata/ to
25 | /unicode_utils. And last but not least, change
26 | the definition of +CDATA_DIR+ in
27 | /unicode_utils/read_cdata.rb to
28 | File.absolute_path(File.dirname(\_\_FILE\_\_)).
29 |
30 | In fact, UnicodeUtils comes with an install.rb script that does
31 | all that:
32 |
33 | $ ruby install.rb install
34 |
35 | or:
36 |
37 | $ ruby install.rb install /some/other/dir
38 |
--------------------------------------------------------------------------------
/ISSUES.txt:
--------------------------------------------------------------------------------
1 | = Issues
2 |
3 | == "code point" vs. "codepoint"
4 |
5 | The Unicode standard consistently uses "code point". On the other
6 | hand, Ruby's String class has an "each_codepoint" method.
7 |
8 | Beginning with version 1.3.0, UnicodeUtils will use "code point"
9 | and the related Ruby symbol names "code_point", "CODE_POINT" and
10 | "CodePoint" throughout.
11 |
12 | The only exception is the "UnicodeUtils::Codepoint" class, which
13 | predates UnicodeUtils 1.3.0.
14 |
15 | == char_name
16 |
17 | Unfortunately deviates from the Unicode Name property.
18 |
19 | Possible course of action:
20 | * Add consistent way to access all Unicode properties
21 | E.g. UnicodeUtils::General_Category[code_point],
22 | UnicodeUtils::Name[code_point], ...
23 | * Deprecate char_name
24 |
25 | == Encoding of string property values
26 |
27 | The encoding of spring property values is currenctly undocumented.
28 |
29 | Possible course of action:
30 | * Use the same encoding for all string property values, preferably UTF-8
31 | * Document it
32 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2008-2012, Stefan Lang
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions
6 | are met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 | * Redistributions in binary form must reproduce the above
11 | copyright notice, this list of conditions and the following
12 | disclaimer in the documentation and/or other materials
13 | provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | POSSIBILITY OF SUCH DAMAGE.
27 |
--------------------------------------------------------------------------------
/README.rdoc:
--------------------------------------------------------------------------------
1 | = Unicode Utils - Unicode algorithms for Ruby 1.9
2 |
3 | UnicodeUtils implements Unicode algorithms for case conversion,
4 | normalization, text segmentation and more in pure Ruby code.
5 |
6 | == Installation
7 |
8 | Install with RubyGems:
9 |
10 | gem install unicode_utils
11 |
12 | Or get the source from Github: http://github.com/lang/unicode_utils
13 | and follow the instructions in INSTALL.txt.
14 |
15 | UnicodeUtils works with Ruby 1.9.1 or later.
16 |
17 | == Synopsis
18 |
19 | require "unicode_utils/upcase"
20 |
21 | UnicodeUtils.upcase("weiß") => "WEISS"
22 |
23 | UnicodeUtils.upcase("i", :tr) => "İ"
24 |
25 | Start with the UnicodeUtils module in the API documentation for
26 | complete documentation.
27 |
28 | == License
29 |
30 | unicode_utils is licensed under the BSD license. Read the file
31 | LICENSE.txt in the unicode_utils package for details.
32 |
33 | == Links
34 |
35 | Online documentation:: http://unicode-utils.rubyforge.org
36 | Source code:: http://github.com/lang/unicode_utils
37 | Rubyforge project:: http://rubyforge.org/projects/unicode-utils
38 | Home of the Unicode Consortium:: http://unicode.org
39 |
40 | == Who?
41 |
42 | UnicodeUtils is written by Stefan Lang. You can contact me at
43 | langstefan AT gmx.at. Contributions welcome!
44 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | require "#{File.dirname(__FILE__)}/lib/unicode_utils/version"
4 |
5 | suffix = ENV["SUFFIX"]
6 |
7 | gem_filename = "unicode_utils-#{UnicodeUtils::VERSION}.gem"
8 |
9 | task "default" => "quick-test"
10 |
11 | desc "Run unit tests."
12 | task "test" do
13 | ruby "-I lib test/suite.rb"
14 | end
15 |
16 | desc "Quick test run."
17 | task "quick-test" do
18 | ruby "-I lib -I . test/test_unicode_utils.rb"
19 | end
20 |
21 | desc "Run tests and generate coverage report."
22 | task "coverage" do
23 | ruby "-I lib test/coverage.rb"
24 | end
25 |
26 | desc "Build unicode_utils gem."
27 | task "gem" do
28 | sh "gem#{suffix} build unicode_utils.gemspec"
29 | mkdir "pkg" unless File.directory? "pkg"
30 | mv gem_filename, "pkg"
31 | end
32 |
33 | desc "Run rdoc to generate html documentation."
34 | task "doc" do
35 | sh "rdoc#{suffix} -o doc --charset=UTF-8 --title=UnicodeUtils --main=README.rdoc lib README.rdoc INSTALL.txt CHANGES.txt LICENSE.txt"
36 | end
37 |
38 | desc "Publish doc/ on unicode-utils.rubyfore.org. " +
39 | "Note: scp will prompt for rubyforge password."
40 | task "publish-doc" => "doc" do
41 | sh "scp -i ~/.ssh/id_rsa_s0 -r doc/* langi@rubyforge.org:/var/www/gforge-projects/unicode-utils/"
42 | end
43 |
44 | desc "Compile Unicode data files from data/ to cdata/."
45 | task "compile-data" do
46 | ruby "data/compile.rb"
47 | end
48 |
49 | desc "Remove generated packages and documentation."
50 | task "clean" do
51 | rm_r "pkg" if File.exist? "pkg"
52 | rm_r "doc" if File.exist? "doc"
53 | end
54 |
--------------------------------------------------------------------------------
/bench/case_mappings.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "benchmark"
4 |
5 | require "unicode_utils/upcase"
6 | require "unicode_utils/downcase"
7 | require "unicode_utils/casefold"
8 |
9 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test")
10 |
11 | def read_txt(filename)
12 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-")
13 | end
14 |
15 | german_text = read_txt("dreilaendereck.txt")
16 | long_german_text = german_text * 100
17 |
18 | Benchmark.bm(35) do |x|
19 | x.report "String#upcase" do
20 | 100.times { german_text.upcase }
21 | end
22 | x.report "upcase, no language" do
23 | 100.times { UnicodeUtils.upcase(german_text) }
24 | end
25 | x.report "upcase, :de" do
26 | 100.times { UnicodeUtils.upcase(german_text, :de) }
27 | end
28 | x.report "upcase, :tr" do
29 | 100.times { UnicodeUtils.upcase(german_text, :tr) }
30 | end
31 | x.report "long text: String#upcase" do
32 | 1.times { long_german_text.upcase }
33 | end
34 | x.report "long text: upcase, no language" do
35 | 1.times { UnicodeUtils.upcase(long_german_text) }
36 | end
37 | x.report "long text: upcase, :de" do
38 | 1.times { UnicodeUtils.upcase(long_german_text, :de) }
39 | end
40 | x.report "long text: upcase, :tr" do
41 | 1.times { UnicodeUtils.upcase(long_german_text, :tr) }
42 | end
43 |
44 | x.report "String#downcase" do
45 | 100.times { german_text.downcase }
46 | end
47 | x.report "downcase, no language" do
48 | 100.times { UnicodeUtils.downcase(german_text) }
49 | end
50 | x.report "downcase, :de" do
51 | 100.times { UnicodeUtils.downcase(german_text, :de) }
52 | end
53 | x.report "downcase, :tr" do
54 | 100.times { UnicodeUtils.downcase(german_text, :tr) }
55 | end
56 | x.report "long text: String#downcase" do
57 | 1.times { long_german_text.downcase }
58 | end
59 | x.report "long text: downcase, no language" do
60 | 1.times { UnicodeUtils.downcase(long_german_text) }
61 | end
62 | x.report "long text: downcase, :de" do
63 | 1.times { UnicodeUtils.downcase(long_german_text, :de) }
64 | end
65 | x.report "long text: downcase, :tr" do
66 | 1.times { UnicodeUtils.downcase(long_german_text, :tr) }
67 | end
68 |
69 | x.report "casefold" do
70 | 100.times { UnicodeUtils.casefold(german_text) }
71 | end
72 | x.report "long text: casefold" do
73 | 1.times { UnicodeUtils.casefold(long_german_text) }
74 | end
75 | end
76 |
--------------------------------------------------------------------------------
/bench/char_name.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "benchmark"
4 |
5 | require "unicode_utils/char_name"
6 | require "unicode_utils/codepoint"
7 |
8 | def all_char_names
9 | UnicodeUtils::Codepoint::RANGE.each { |code_point|
10 | UnicodeUtils.char_name(code_point)
11 | }
12 | end
13 |
14 | def cjk_char_names
15 | [0x3400..0x4DB5, 0x4E00..0x9FC3, 0x20000..0x2A6D6].each { |range|
16 | range.each { |code_point|
17 | UnicodeUtils.char_name(code_point)
18 | }
19 | }
20 | end
21 |
22 | def hangul_syllable_char_names
23 | (0xAC00..0xD7A3).each { |code_point|
24 | UnicodeUtils.char_name(code_point)
25 | }
26 | end
27 |
28 | def name_map_lookup(code_point)
29 | UnicodeUtils::NAME_MAP[code_point]
30 | end
31 |
32 | puts "UnicodeUtils.char_name benchmarks"
33 |
34 | Benchmark.bm { |x|
35 | x.report("baseline") {
36 | UnicodeUtils::Codepoint::RANGE.each { |code_point|
37 | name_map_lookup(code_point)
38 | }
39 | }
40 | x.report("all code points") {
41 | all_char_names
42 | }
43 | x.report("CJK UNIFIED IDEOGRAPH") {
44 | cjk_char_names
45 | }
46 | x.report("HANGUL SYLLABLE") {
47 | hangul_syllable_char_names
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/bench/each_grapheme.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "benchmark"
4 |
5 | require "unicode_utils/each_grapheme"
6 |
7 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test")
8 |
9 | def read_txt(filename)
10 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-")
11 | end
12 |
13 | german_text = read_txt("dreilaendereck.txt")
14 | long_german_text = german_text * 50
15 |
16 | Benchmark.bmbm do |x|
17 | x.report "each_grapheme" do
18 | 50.times { UnicodeUtils.each_grapheme(german_text) { |g| g } }
19 | end
20 | x.report "each_grapheme, long text" do
21 | 1.times { UnicodeUtils.each_grapheme(long_german_text) { |g| g } }
22 | end
23 | end
24 |
--------------------------------------------------------------------------------
/bench/each_word.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "benchmark"
4 |
5 | require "unicode_utils/each_word"
6 |
7 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test")
8 |
9 | def read_txt(filename)
10 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-")
11 | end
12 |
13 | german_text = read_txt("dreilaendereck.txt")
14 | long_german_text = german_text * 30
15 |
16 | Benchmark.bm(35) do |x|
17 | x.report "each_word" do
18 | 30.times { UnicodeUtils.each_word(german_text) { |w| w } }
19 | end
20 | x.report "each_word, long text" do
21 | 1.times { UnicodeUtils.each_word(long_german_text) { |w| w } }
22 | end
23 | end
24 |
--------------------------------------------------------------------------------
/bench/grep.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "benchmark"
4 |
5 | require "unicode_utils/grep"
6 |
7 | Benchmark.bm { |x|
8 | x.report("angstrom") {
9 | UnicodeUtils.grep(/angstrom/)
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/bench/normalization.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "benchmark"
4 |
5 | require "unicode_utils/nfd"
6 | require "unicode_utils/nfkd"
7 | require "unicode_utils/nfc"
8 | require "unicode_utils/nfkc"
9 |
10 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test")
11 |
12 | def read_txt(filename)
13 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-")
14 | end
15 |
16 | german_text = read_txt("dreilaendereck.txt")
17 | long_german_text = german_text * 100
18 |
19 | Benchmark.bmbm do |x|
20 | x.report "nfd" do
21 | 100.times { UnicodeUtils.nfd(german_text) }
22 | end
23 | x.report "nfd, long text" do
24 | 1.times { UnicodeUtils.nfd(long_german_text) }
25 | end
26 | x.report "nfkd" do
27 | 100.times { UnicodeUtils.nfkd(german_text) }
28 | end
29 | x.report "nfkd, long text" do
30 | 1.times { UnicodeUtils.nfkd(long_german_text) }
31 | end
32 | x.report "nfc" do
33 | 100.times { UnicodeUtils.nfc(german_text) }
34 | end
35 | x.report "nfc, long text" do
36 | 1.times { UnicodeUtils.nfc(long_german_text) }
37 | end
38 | x.report "nfkc" do
39 | 100.times { UnicodeUtils.nfkc(german_text) }
40 | end
41 | x.report "nfkc, long text" do
42 | 1.times { UnicodeUtils.nfkc(long_german_text) }
43 | end
44 | end
45 |
--------------------------------------------------------------------------------
/bench/titlecase.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "benchmark"
4 |
5 | require "unicode_utils/titlecase"
6 |
7 | TXT_DIR = File.join(File.dirname(__FILE__), "..", "test")
8 |
9 | def read_txt(filename)
10 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-")
11 | end
12 |
13 | german_text = read_txt("dreilaendereck.txt")
14 | long_german_text = german_text * 30
15 |
16 | Benchmark.bm(35) do |x|
17 | x.report "titlecase" do
18 | 30.times { UnicodeUtils.titlecase(german_text) }
19 | end
20 | x.report "titlecase, long text" do
21 | 1.times { UnicodeUtils.titlecase(long_german_text) }
22 | end
23 | end
24 |
--------------------------------------------------------------------------------
/cdata/case_ignorable_set:
--------------------------------------------------------------------------------
1 | 00003a0000b70003870005f400202700fe1300fe5500ff1a00005e0000600000a80000ad0000af0000b40000b80002b00002b10002b20002b30002b40002b50002b60002b70002b80002b90002ba0002bb0002bc0002bd0002be0002bf0002c00002c10002c20002c30002c40002c50002c60002c70002c80002c90002ca0002cb0002cc0002cd0002ce0002cf0002d00002d10002d20002d30002d40002d50002d60002d70002d80002d90002da0002db0002dc0002dd0002de0002df0002e00002e10002e20002e30002e40002e50002e60002e70002e80002e90002ea0002eb0002ec0002ed0002ee0002ef0002f00002f10002f20002f30002f40002f50002f60002f70002f80002f90002fa0002fb0002fc0002fd0002fe0002ff00030000030100030200030300030400030500030600030700030800030900030a00030b00030c00030d00030e00030f00031000031100031200031300031400031500031600031700031800031900031a00031b00031c00031d00031e00031f00032000032100032200032300032400032500032600032700032800032900032a00032b00032c00032d00032e00032f00033000033100033200033300033400033500033600033700033800033900033a00033b00033c00033d00033e00033f00034000034100034200034300034400034500034600034700034800034900034a00034b00034c00034d00034e00034f00035000035100035200035300035400035500035600035700035800035900035a00035b00035c00035d00035e00035f00036000036100036200036300036400036500036600036700036800036900036a00036b00036c00036d00036e00036f00037400037500037a00038400038500048300048400048500048600048700048800048900055900059100059200059300059400059500059600059700059800059900059a00059b00059c00059d00059e00059f0005a00005a10005a20005a30005a40005a50005a60005a70005a80005a90005aa0005ab0005ac0005ad0005ae0005af0005b00005b10005b20005b30005b40005b50005b60005b70005b80005b90005ba0005bb0005bc0005bd0005bf0005c10005c20005c40005c50005c700060000060100060200060300060400061000061100061200061300061400061500061600061700061800061900061a00064000064b00064c00064d00064e00064f00065000065100065200065300065400065500065600065700065800065900065a00065b00065c00065d00065e00065f0006700006d60006d70006d80006d90006da0006db0006dc0006dd0006df0006e00006e10006e20006e30006e40006e50006e60006e70006e80006ea0006eb0006ec0006ed00070f00071100073000073100073200073300073400073500073600073700073800073900073a00073b00073c00073d00073e00073f00074000074100074200074300074400074500074600074700074800074900074a0007a60007a70007a80007a90007aa0007ab0007ac0007ad0007ae0007af0007b00007eb0007ec0007ed0007ee0007ef0007f00007f10007f20007f30007f40007f50007fa00081600081700081800081900081a00081b00081c00081d00081e00081f00082000082100082200082300082400082500082600082700082800082900082a00082b00082c00082d00085900085a00085b0008e40008e50008e60008e70008e80008e90008ea0008eb0008ec0008ed0008ee0008ef0008f00008f10008f20008f30008f40008f50008f60008f70008f80008f90008fa0008fb0008fc0008fd0008fe00090000090100090200093a00093c00094100094200094300094400094500094600094700094800094d0009510009520009530009540009550009560009570009620009630009710009810009bc0009c10009c20009c30009c40009cd0009e20009e3000a01000a02000a3c000a41000a42000a47000a48000a4b000a4c000a4d000a51000a70000a71000a75000a81000a82000abc000ac1000ac2000ac3000ac4000ac5000ac7000ac8000acd000ae2000ae3000b01000b3c000b3f000b41000b42000b43000b44000b4d000b56000b62000b63000b82000bc0000bcd000c3e000c3f000c40000c46000c47000c48000c4a000c4b000c4c000c4d000c55000c56000c62000c63000cbc000cbf000cc6000ccc000ccd000ce2000ce3000d41000d42000d43000d44000d4d000d62000d63000dca000dd2000dd3000dd4000dd6000e31000e34000e35000e36000e37000e38000e39000e3a000e46000e47000e48000e49000e4a000e4b000e4c000e4d000e4e000eb1000eb4000eb5000eb6000eb7000eb8000eb9000ebb000ebc000ec6000ec8000ec9000eca000ecb000ecc000ecd000f18000f19000f35000f37000f39000f71000f72000f73000f74000f75000f76000f77000f78000f79000f7a000f7b000f7c000f7d000f7e000f80000f81000f82000f83000f84000f86000f87000f8d000f8e000f8f000f90000f91000f92000f93000f94000f95000f96000f97000f99000f9a000f9b000f9c000f9d000f9e000f9f000fa0000fa1000fa2000fa3000fa4000fa5000fa6000fa7000fa8000fa9000faa000fab000fac000fad000fae000faf000fb0000fb1000fb2000fb3000fb4000fb5000fb6000fb7000fb8000fb9000fba000fbb000fbc000fc600102d00102e00102f00103000103200103300103400103500103600103700103900103a00103d00103e00105800105900105e00105f00106000107100107200107300107400108200108500108600108d00109d0010fc00135d00135e00135f0017120017130017140017320017330017340017520017530017720017730017b40017b50017b70017b80017b90017ba0017bb0017bc0017bd0017c60017c90017ca0017cb0017cc0017cd0017ce0017cf0017d00017d10017d20017d30017d70017dd00180b00180c00180d0018430018a900192000192100192200192700192800193200193900193a00193b001a17001a18001a56001a58001a59001a5a001a5b001a5c001a5d001a5e001a60001a62001a65001a66001a67001a68001a69001a6a001a6b001a6c001a73001a74001a75001a76001a77001a78001a79001a7a001a7b001a7c001a7f001aa7001b00001b01001b02001b03001b34001b36001b37001b38001b39001b3a001b3c001b42001b6b001b6c001b6d001b6e001b6f001b70001b71001b72001b73001b80001b81001ba2001ba3001ba4001ba5001ba8001ba9001bab001be6001be8001be9001bed001bef001bf0001bf1001c2c001c2d001c2e001c2f001c30001c31001c32001c33001c36001c37001c78001c79001c7a001c7b001c7c001c7d001cd0001cd1001cd2001cd4001cd5001cd6001cd7001cd8001cd9001cda001cdb001cdc001cdd001cde001cdf001ce0001ce2001ce3001ce4001ce5001ce6001ce7001ce8001ced001cf4001d2c001d2d001d2e001d2f001d30001d31001d32001d33001d34001d35001d36001d37001d38001d39001d3a001d3b001d3c001d3d001d3e001d3f001d40001d41001d42001d43001d44001d45001d46001d47001d48001d49001d4a001d4b001d4c001d4d001d4e001d4f001d50001d51001d52001d53001d54001d55001d56001d57001d58001d59001d5a001d5b001d5c001d5d001d5e001d5f001d60001d61001d62001d63001d64001d65001d66001d67001d68001d69001d6a001d78001d9b001d9c001d9d001d9e001d9f001da0001da1001da2001da3001da4001da5001da6001da7001da8001da9001daa001dab001dac001dad001dae001daf001db0001db1001db2001db3001db4001db5001db6001db7001db8001db9001dba001dbb001dbc001dbd001dbe001dbf001dc0001dc1001dc2001dc3001dc4001dc5001dc6001dc7001dc8001dc9001dca001dcb001dcc001dcd001dce001dcf001dd0001dd1001dd2001dd3001dd4001dd5001dd6001dd7001dd8001dd9001dda001ddb001ddc001ddd001dde001ddf001de0001de1001de2001de3001de4001de5001de6001dfc001dfd001dfe001dff001fbd001fbf001fc0001fc1001fcd001fce001fcf001fdd001fde001fdf001fed001fee001fef001ffd001ffe00200b00200c00200d00200e00200f00202a00202b00202c00202d00202e00206000206100206200206300206400206a00206b00206c00206d00206e00206f00207100207f00209000209100209200209300209400209500209600209700209800209900209a00209b00209c0020d00020d10020d20020d30020d40020d50020d60020d70020d80020d90020da0020db0020dc0020dd0020de0020df0020e00020e10020e20020e30020e40020e50020e60020e70020e80020e90020ea0020eb0020ec0020ed0020ee0020ef0020f0002c7c002c7d002cef002cf0002cf1002d6f002d7f002de0002de1002de2002de3002de4002de5002de6002de7002de8002de9002dea002deb002dec002ded002dee002def002df0002df1002df2002df3002df4002df5002df6002df7002df8002df9002dfa002dfb002dfc002dfd002dfe002dff002e2f00300500302a00302b00302c00302d00303100303200303300303400303500303b00309900309a00309b00309c00309d00309e0030fc0030fd0030fe00a01500a4f800a4f900a4fa00a4fb00a4fc00a4fd00a60c00a66f00a67000a67100a67200a67400a67500a67600a67700a67800a67900a67a00a67b00a67c00a67d00a67f00a69f00a6f000a6f100a70000a70100a70200a70300a70400a70500a70600a70700a70800a70900a70a00a70b00a70c00a70d00a70e00a70f00a71000a71100a71200a71300a71400a71500a71600a71700a71800a71900a71a00a71b00a71c00a71d00a71e00a71f00a72000a72100a77000a78800a78900a78a00a7f800a7f900a80200a80600a80b00a82500a82600a8c400a8e000a8e100a8e200a8e300a8e400a8e500a8e600a8e700a8e800a8e900a8ea00a8eb00a8ec00a8ed00a8ee00a8ef00a8f000a8f100a92600a92700a92800a92900a92a00a92b00a92c00a92d00a94700a94800a94900a94a00a94b00a94c00a94d00a94e00a94f00a95000a95100a98000a98100a98200a9b300a9b600a9b700a9b800a9b900a9bc00a9cf00aa2900aa2a00aa2b00aa2c00aa2d00aa2e00aa3100aa3200aa3500aa3600aa4300aa4c00aa7000aab000aab200aab300aab400aab700aab800aabe00aabf00aac100aadd00aaec00aaed00aaf300aaf400aaf600abe500abe800abed00fb1e00fbb200fbb300fbb400fbb500fbb600fbb700fbb800fbb900fbba00fbbb00fbbc00fbbd00fbbe00fbbf00fbc000fbc100fe0000fe0100fe0200fe0300fe0400fe0500fe0600fe0700fe0800fe0900fe0a00fe0b00fe0c00fe0d00fe0e00fe0f00fe2000fe2100fe2200fe2300fe2400fe2500fe2600feff00ff3e00ff4000ff7000ff9e00ff9f00ffe300fff900fffa00fffb0101fd010a01010a02010a03010a05010a06010a0c010a0d010a0e010a0f010a38010a39010a3a010a3f01100101103801103901103a01103b01103c01103d01103e01103f0110400110410110420110430110440110450110460110800110810110b30110b40110b50110b60110b90110ba0110bd01110001110101110201112701112801112901112a01112b01112d01112e01112f0111300111310111320111330111340111800111810111b60111b70111b80111b90111ba0111bb0111bc0111bd0111be0116ab0116ad0116b00116b10116b20116b30116b40116b50116b7016f8f016f90016f91016f92016f93016f94016f95016f96016f97016f98016f99016f9a016f9b016f9c016f9d016f9e016f9f01d16701d16801d16901d17301d17401d17501d17601d17701d17801d17901d17a01d17b01d17c01d17d01d17e01d17f01d18001d18101d18201d18501d18601d18701d18801d18901d18a01d18b01d1aa01d1ab01d1ac01d1ad01d24201d24301d2440e00010e00200e00210e00220e00230e00240e00250e00260e00270e00280e00290e002a0e002b0e002c0e002d0e002e0e002f0e00300e00310e00320e00330e00340e00350e00360e00370e00380e00390e003a0e003b0e003c0e003d0e003e0e003f0e00400e00410e00420e00430e00440e00450e00460e00470e00480e00490e004a0e004b0e004c0e004d0e004e0e004f0e00500e00510e00520e00530e00540e00550e00560e00570e00580e00590e005a0e005b0e005c0e005d0e005e0e005f0e00600e00610e00620e00630e00640e00650e00660e00670e00680e00690e006a0e006b0e006c0e006d0e006e0e006f0e00700e00710e00720e00730e00740e00750e00760e00770e00780e00790e007a0e007b0e007c0e007d0e007e0e007f0e01000e01010e01020e01030e01040e01050e01060e01070e01080e01090e010a0e010b0e010c0e010d0e010e0e010f0e01100e01110e01120e01130e01140e01150e01160e01170e01180e01190e011a0e011b0e011c0e011d0e011e0e011f0e01200e01210e01220e01230e01240e01250e01260e01270e01280e01290e012a0e012b0e012c0e012d0e012e0e012f0e01300e01310e01320e01330e01340e01350e01360e01370e01380e01390e013a0e013b0e013c0e013d0e013e0e013f0e01400e01410e01420e01430e01440e01450e01460e01470e01480e01490e014a0e014b0e014c0e014d0e014e0e014f0e01500e01510e01520e01530e01540e01550e01560e01570e01580e01590e015a0e015b0e015c0e015d0e015e0e015f0e01600e01610e01620e01630e01640e01650e01660e01670e01680e01690e016a0e016b0e016c0e016d0e016e0e016f0e01700e01710e01720e01730e01740e01750e01760e01770e01780e01790e017a0e017b0e017c0e017d0e017e0e017f0e01800e01810e01820e01830e01840e01850e01860e01870e01880e01890e018a0e018b0e018c0e018d0e018e0e018f0e01900e01910e01920e01930e01940e01950e01960e01970e01980e01990e019a0e019b0e019c0e019d0e019e0e019f0e01a00e01a10e01a20e01a30e01a40e01a50e01a60e01a70e01a80e01a90e01aa0e01ab0e01ac0e01ad0e01ae0e01af0e01b00e01b10e01b20e01b30e01b40e01b50e01b60e01b70e01b80e01b90e01ba0e01bb0e01bc0e01bd0e01be0e01bf0e01c00e01c10e01c20e01c30e01c40e01c50e01c60e01c70e01c80e01c90e01ca0e01cb0e01cc0e01cd0e01ce0e01cf0e01d00e01d10e01d20e01d30e01d40e01d50e01d60e01d70e01d80e01d90e01da0e01db0e01dc0e01dd0e01de0e01df0e01e00e01e10e01e20e01e30e01e40e01e50e01e60e01e70e01e80e01e90e01ea0e01eb0e01ec0e01ed0e01ee0e01ef
--------------------------------------------------------------------------------
/cdata/casefold_c_map:
--------------------------------------------------------------------------------
1 | 00004100006100004200006200004300006300004400006400004500006500004600006600004700006700004800006800004900006900004a00006a00004b00006b00004c00006c00004d00006d00004e00006e00004f00006f00005000007000005100007100005200007200005300007300005400007400005500007500005600007600005700007700005800007800005900007900005a00007a0000b50003bc0000c00000e00000c10000e10000c20000e20000c30000e30000c40000e40000c50000e50000c60000e60000c70000e70000c80000e80000c90000e90000ca0000ea0000cb0000eb0000cc0000ec0000cd0000ed0000ce0000ee0000cf0000ef0000d00000f00000d10000f10000d20000f20000d30000f30000d40000f40000d50000f50000d60000f60000d80000f80000d90000f90000da0000fa0000db0000fb0000dc0000fc0000dd0000fd0000de0000fe00010000010100010200010300010400010500010600010700010800010900010a00010b00010c00010d00010e00010f00011000011100011200011300011400011500011600011700011800011900011a00011b00011c00011d00011e00011f00012000012100012200012300012400012500012600012700012800012900012a00012b00012c00012d00012e00012f00013200013300013400013500013600013700013900013a00013b00013c00013d00013e00013f00014000014100014200014300014400014500014600014700014800014a00014b00014c00014d00014e00014f00015000015100015200015300015400015500015600015700015800015900015a00015b00015c00015d00015e00015f00016000016100016200016300016400016500016600016700016800016900016a00016b00016c00016d00016e00016f0001700001710001720001730001740001750001760001770001780000ff00017900017a00017b00017c00017d00017e00017f00007300018100025300018200018300018400018500018600025400018700018800018900025600018a00025700018b00018c00018e0001dd00018f00025900019000025b00019100019200019300026000019400026300019600026900019700026800019800019900019c00026f00019d00027200019f0002750001a00001a10001a20001a30001a40001a50001a60002800001a70001a80001a90002830001ac0001ad0001ae0002880001af0001b00001b100028a0001b200028b0001b30001b40001b50001b60001b70002920001b80001b90001bc0001bd0001c40001c60001c50001c60001c70001c90001c80001c90001ca0001cc0001cb0001cc0001cd0001ce0001cf0001d00001d10001d20001d30001d40001d50001d60001d70001d80001d90001da0001db0001dc0001de0001df0001e00001e10001e20001e30001e40001e50001e60001e70001e80001e90001ea0001eb0001ec0001ed0001ee0001ef0001f10001f30001f20001f30001f40001f50001f60001950001f70001bf0001f80001f90001fa0001fb0001fc0001fd0001fe0001ff00020000020100020200020300020400020500020600020700020800020900020a00020b00020c00020d00020e00020f00021000021100021200021300021400021500021600021700021800021900021a00021b00021c00021d00021e00021f00022000019e00022200022300022400022500022600022700022800022900022a00022b00022c00022d00022e00022f00023000023100023200023300023a002c6500023b00023c00023d00019a00023e002c6600024100024200024300018000024400028900024500028c00024600024700024800024900024a00024b00024c00024d00024e00024f0003450003b90003700003710003720003730003760003770003860003ac0003880003ad0003890003ae00038a0003af00038c0003cc00038e0003cd00038f0003ce0003910003b10003920003b20003930003b30003940003b40003950003b50003960003b60003970003b70003980003b80003990003b900039a0003ba00039b0003bb00039c0003bc00039d0003bd00039e0003be00039f0003bf0003a00003c00003a10003c10003a30003c30003a40003c40003a50003c50003a60003c60003a70003c70003a80003c80003a90003c90003aa0003ca0003ab0003cb0003c20003c30003cf0003d70003d00003b20003d10003b80003d50003c60003d60003c00003d80003d90003da0003db0003dc0003dd0003de0003df0003e00003e10003e20003e30003e40003e50003e60003e70003e80003e90003ea0003eb0003ec0003ed0003ee0003ef0003f00003ba0003f10003c10003f40003b80003f50003b50003f70003f80003f90003f20003fa0003fb0003fd00037b0003fe00037c0003ff00037d00040000045000040100045100040200045200040300045300040400045400040500045500040600045600040700045700040800045800040900045900040a00045a00040b00045b00040c00045c00040d00045d00040e00045e00040f00045f00041000043000041100043100041200043200041300043300041400043400041500043500041600043600041700043700041800043800041900043900041a00043a00041b00043b00041c00043c00041d00043d00041e00043e00041f00043f00042000044000042100044100042200044200042300044300042400044400042500044500042600044600042700044700042800044800042900044900042a00044a00042b00044b00042c00044c00042d00044d00042e00044e00042f00044f00046000046100046200046300046400046500046600046700046800046900046a00046b00046c00046d00046e00046f00047000047100047200047300047400047500047600047700047800047900047a00047b00047c00047d00047e00047f00048000048100048a00048b00048c00048d00048e00048f00049000049100049200049300049400049500049600049700049800049900049a00049b00049c00049d00049e00049f0004a00004a10004a20004a30004a40004a50004a60004a70004a80004a90004aa0004ab0004ac0004ad0004ae0004af0004b00004b10004b20004b30004b40004b50004b60004b70004b80004b90004ba0004bb0004bc0004bd0004be0004bf0004c00004cf0004c10004c20004c30004c40004c50004c60004c70004c80004c90004ca0004cb0004cc0004cd0004ce0004d00004d10004d20004d30004d40004d50004d60004d70004d80004d90004da0004db0004dc0004dd0004de0004df0004e00004e10004e20004e30004e40004e50004e60004e70004e80004e90004ea0004eb0004ec0004ed0004ee0004ef0004f00004f10004f20004f30004f40004f50004f60004f70004f80004f90004fa0004fb0004fc0004fd0004fe0004ff00050000050100050200050300050400050500050600050700050800050900050a00050b00050c00050d00050e00050f00051000051100051200051300051400051500051600051700051800051900051a00051b00051c00051d00051e00051f00052000052100052200052300052400052500052600052700053100056100053200056200053300056300053400056400053500056500053600056600053700056700053800056800053900056900053a00056a00053b00056b00053c00056c00053d00056d00053e00056e00053f00056f00054000057000054100057100054200057200054300057300054400057400054500057500054600057600054700057700054800057800054900057900054a00057a00054b00057b00054c00057c00054d00057d00054e00057e00054f00057f0005500005800005510005810005520005820005530005830005540005840005550005850005560005860010a0002d000010a1002d010010a2002d020010a3002d030010a4002d040010a5002d050010a6002d060010a7002d070010a8002d080010a9002d090010aa002d0a0010ab002d0b0010ac002d0c0010ad002d0d0010ae002d0e0010af002d0f0010b0002d100010b1002d110010b2002d120010b3002d130010b4002d140010b5002d150010b6002d160010b7002d170010b8002d180010b9002d190010ba002d1a0010bb002d1b0010bc002d1c0010bd002d1d0010be002d1e0010bf002d1f0010c0002d200010c1002d210010c2002d220010c3002d230010c4002d240010c5002d250010c7002d270010cd002d2d001e00001e01001e02001e03001e04001e05001e06001e07001e08001e09001e0a001e0b001e0c001e0d001e0e001e0f001e10001e11001e12001e13001e14001e15001e16001e17001e18001e19001e1a001e1b001e1c001e1d001e1e001e1f001e20001e21001e22001e23001e24001e25001e26001e27001e28001e29001e2a001e2b001e2c001e2d001e2e001e2f001e30001e31001e32001e33001e34001e35001e36001e37001e38001e39001e3a001e3b001e3c001e3d001e3e001e3f001e40001e41001e42001e43001e44001e45001e46001e47001e48001e49001e4a001e4b001e4c001e4d001e4e001e4f001e50001e51001e52001e53001e54001e55001e56001e57001e58001e59001e5a001e5b001e5c001e5d001e5e001e5f001e60001e61001e62001e63001e64001e65001e66001e67001e68001e69001e6a001e6b001e6c001e6d001e6e001e6f001e70001e71001e72001e73001e74001e75001e76001e77001e78001e79001e7a001e7b001e7c001e7d001e7e001e7f001e80001e81001e82001e83001e84001e85001e86001e87001e88001e89001e8a001e8b001e8c001e8d001e8e001e8f001e90001e91001e92001e93001e94001e95001e9b001e61001ea0001ea1001ea2001ea3001ea4001ea5001ea6001ea7001ea8001ea9001eaa001eab001eac001ead001eae001eaf001eb0001eb1001eb2001eb3001eb4001eb5001eb6001eb7001eb8001eb9001eba001ebb001ebc001ebd001ebe001ebf001ec0001ec1001ec2001ec3001ec4001ec5001ec6001ec7001ec8001ec9001eca001ecb001ecc001ecd001ece001ecf001ed0001ed1001ed2001ed3001ed4001ed5001ed6001ed7001ed8001ed9001eda001edb001edc001edd001ede001edf001ee0001ee1001ee2001ee3001ee4001ee5001ee6001ee7001ee8001ee9001eea001eeb001eec001eed001eee001eef001ef0001ef1001ef2001ef3001ef4001ef5001ef6001ef7001ef8001ef9001efa001efb001efc001efd001efe001eff001f08001f00001f09001f01001f0a001f02001f0b001f03001f0c001f04001f0d001f05001f0e001f06001f0f001f07001f18001f10001f19001f11001f1a001f12001f1b001f13001f1c001f14001f1d001f15001f28001f20001f29001f21001f2a001f22001f2b001f23001f2c001f24001f2d001f25001f2e001f26001f2f001f27001f38001f30001f39001f31001f3a001f32001f3b001f33001f3c001f34001f3d001f35001f3e001f36001f3f001f37001f48001f40001f49001f41001f4a001f42001f4b001f43001f4c001f44001f4d001f45001f59001f51001f5b001f53001f5d001f55001f5f001f57001f68001f60001f69001f61001f6a001f62001f6b001f63001f6c001f64001f6d001f65001f6e001f66001f6f001f67001fb8001fb0001fb9001fb1001fba001f70001fbb001f71001fbe0003b9001fc8001f72001fc9001f73001fca001f74001fcb001f75001fd8001fd0001fd9001fd1001fda001f76001fdb001f77001fe8001fe0001fe9001fe1001fea001f7a001feb001f7b001fec001fe5001ff8001f78001ff9001f79001ffa001f7c001ffb001f7d0021260003c900212a00006b00212b0000e500213200214e00216000217000216100217100216200217200216300217300216400217400216500217500216600217600216700217700216800217800216900217900216a00217a00216b00217b00216c00217c00216d00217d00216e00217e00216f00217f0021830021840024b60024d00024b70024d10024b80024d20024b90024d30024ba0024d40024bb0024d50024bc0024d60024bd0024d70024be0024d80024bf0024d90024c00024da0024c10024db0024c20024dc0024c30024dd0024c40024de0024c50024df0024c60024e00024c70024e10024c80024e20024c90024e30024ca0024e40024cb0024e50024cc0024e60024cd0024e70024ce0024e80024cf0024e9002c00002c30002c01002c31002c02002c32002c03002c33002c04002c34002c05002c35002c06002c36002c07002c37002c08002c38002c09002c39002c0a002c3a002c0b002c3b002c0c002c3c002c0d002c3d002c0e002c3e002c0f002c3f002c10002c40002c11002c41002c12002c42002c13002c43002c14002c44002c15002c45002c16002c46002c17002c47002c18002c48002c19002c49002c1a002c4a002c1b002c4b002c1c002c4c002c1d002c4d002c1e002c4e002c1f002c4f002c20002c50002c21002c51002c22002c52002c23002c53002c24002c54002c25002c55002c26002c56002c27002c57002c28002c58002c29002c59002c2a002c5a002c2b002c5b002c2c002c5c002c2d002c5d002c2e002c5e002c60002c61002c6200026b002c63001d7d002c6400027d002c67002c68002c69002c6a002c6b002c6c002c6d000251002c6e000271002c6f000250002c70000252002c72002c73002c75002c76002c7e00023f002c7f000240002c80002c81002c82002c83002c84002c85002c86002c87002c88002c89002c8a002c8b002c8c002c8d002c8e002c8f002c90002c91002c92002c93002c94002c95002c96002c97002c98002c99002c9a002c9b002c9c002c9d002c9e002c9f002ca0002ca1002ca2002ca3002ca4002ca5002ca6002ca7002ca8002ca9002caa002cab002cac002cad002cae002caf002cb0002cb1002cb2002cb3002cb4002cb5002cb6002cb7002cb8002cb9002cba002cbb002cbc002cbd002cbe002cbf002cc0002cc1002cc2002cc3002cc4002cc5002cc6002cc7002cc8002cc9002cca002ccb002ccc002ccd002cce002ccf002cd0002cd1002cd2002cd3002cd4002cd5002cd6002cd7002cd8002cd9002cda002cdb002cdc002cdd002cde002cdf002ce0002ce1002ce2002ce3002ceb002cec002ced002cee002cf2002cf300a64000a64100a64200a64300a64400a64500a64600a64700a64800a64900a64a00a64b00a64c00a64d00a64e00a64f00a65000a65100a65200a65300a65400a65500a65600a65700a65800a65900a65a00a65b00a65c00a65d00a65e00a65f00a66000a66100a66200a66300a66400a66500a66600a66700a66800a66900a66a00a66b00a66c00a66d00a68000a68100a68200a68300a68400a68500a68600a68700a68800a68900a68a00a68b00a68c00a68d00a68e00a68f00a69000a69100a69200a69300a69400a69500a69600a69700a72200a72300a72400a72500a72600a72700a72800a72900a72a00a72b00a72c00a72d00a72e00a72f00a73200a73300a73400a73500a73600a73700a73800a73900a73a00a73b00a73c00a73d00a73e00a73f00a74000a74100a74200a74300a74400a74500a74600a74700a74800a74900a74a00a74b00a74c00a74d00a74e00a74f00a75000a75100a75200a75300a75400a75500a75600a75700a75800a75900a75a00a75b00a75c00a75d00a75e00a75f00a76000a76100a76200a76300a76400a76500a76600a76700a76800a76900a76a00a76b00a76c00a76d00a76e00a76f00a77900a77a00a77b00a77c00a77d001d7900a77e00a77f00a78000a78100a78200a78300a78400a78500a78600a78700a78b00a78c00a78d00026500a79000a79100a79200a79300a7a000a7a100a7a200a7a300a7a400a7a500a7a600a7a700a7a800a7a900a7aa00026600ff2100ff4100ff2200ff4200ff2300ff4300ff2400ff4400ff2500ff4500ff2600ff4600ff2700ff4700ff2800ff4800ff2900ff4900ff2a00ff4a00ff2b00ff4b00ff2c00ff4c00ff2d00ff4d00ff2e00ff4e00ff2f00ff4f00ff3000ff5000ff3100ff5100ff3200ff5200ff3300ff5300ff3400ff5400ff3500ff5500ff3600ff5600ff3700ff5700ff3800ff5800ff3900ff5900ff3a00ff5a01040001042801040101042901040201042a01040301042b01040401042c01040501042d01040601042e01040701042f01040801043001040901043101040a01043201040b01043301040c01043401040d01043501040e01043601040f01043701041001043801041101043901041201043a01041301043b01041401043c01041501043d01041601043e01041701043f01041801044001041901044101041a01044201041b01044301041c01044401041d01044501041e01044601041f01044701042001044801042101044901042201044a01042301044b01042401044c01042501044d01042601044e01042701044f
--------------------------------------------------------------------------------
/cdata/casefold_f_map:
--------------------------------------------------------------------------------
1 | 0000df000073000073xxxxxx000130000069000307xxxxxx0001490002bc00006exxxxxx0001f000006a00030cxxxxxx0003900003b9000308000301xxxxxx0003b00003c5000308000301xxxxxx000587000565000582xxxxxx001e96000068000331xxxxxx001e97000074000308xxxxxx001e9800007700030axxxxxx001e9900007900030axxxxxx001e9a0000610002bexxxxxx001e9e000073000073xxxxxx001f500003c5000313xxxxxx001f520003c5000313000300xxxxxx001f540003c5000313000301xxxxxx001f560003c5000313000342xxxxxx001f80001f000003b9xxxxxx001f81001f010003b9xxxxxx001f82001f020003b9xxxxxx001f83001f030003b9xxxxxx001f84001f040003b9xxxxxx001f85001f050003b9xxxxxx001f86001f060003b9xxxxxx001f87001f070003b9xxxxxx001f88001f000003b9xxxxxx001f89001f010003b9xxxxxx001f8a001f020003b9xxxxxx001f8b001f030003b9xxxxxx001f8c001f040003b9xxxxxx001f8d001f050003b9xxxxxx001f8e001f060003b9xxxxxx001f8f001f070003b9xxxxxx001f90001f200003b9xxxxxx001f91001f210003b9xxxxxx001f92001f220003b9xxxxxx001f93001f230003b9xxxxxx001f94001f240003b9xxxxxx001f95001f250003b9xxxxxx001f96001f260003b9xxxxxx001f97001f270003b9xxxxxx001f98001f200003b9xxxxxx001f99001f210003b9xxxxxx001f9a001f220003b9xxxxxx001f9b001f230003b9xxxxxx001f9c001f240003b9xxxxxx001f9d001f250003b9xxxxxx001f9e001f260003b9xxxxxx001f9f001f270003b9xxxxxx001fa0001f600003b9xxxxxx001fa1001f610003b9xxxxxx001fa2001f620003b9xxxxxx001fa3001f630003b9xxxxxx001fa4001f640003b9xxxxxx001fa5001f650003b9xxxxxx001fa6001f660003b9xxxxxx001fa7001f670003b9xxxxxx001fa8001f600003b9xxxxxx001fa9001f610003b9xxxxxx001faa001f620003b9xxxxxx001fab001f630003b9xxxxxx001fac001f640003b9xxxxxx001fad001f650003b9xxxxxx001fae001f660003b9xxxxxx001faf001f670003b9xxxxxx001fb2001f700003b9xxxxxx001fb30003b10003b9xxxxxx001fb40003ac0003b9xxxxxx001fb60003b1000342xxxxxx001fb70003b10003420003b9xxxxxx001fbc0003b10003b9xxxxxx001fc2001f740003b9xxxxxx001fc30003b70003b9xxxxxx001fc40003ae0003b9xxxxxx001fc60003b7000342xxxxxx001fc70003b70003420003b9xxxxxx001fcc0003b70003b9xxxxxx001fd20003b9000308000300xxxxxx001fd30003b9000308000301xxxxxx001fd60003b9000342xxxxxx001fd70003b9000308000342xxxxxx001fe20003c5000308000300xxxxxx001fe30003c5000308000301xxxxxx001fe40003c1000313xxxxxx001fe60003c5000342xxxxxx001fe70003c5000308000342xxxxxx001ff2001f7c0003b9xxxxxx001ff30003c90003b9xxxxxx001ff40003ce0003b9xxxxxx001ff60003c9000342xxxxxx001ff70003c90003420003b9xxxxxx001ffc0003c90003b9xxxxxx00fb00000066000066xxxxxx00fb01000066000069xxxxxx00fb0200006600006cxxxxxx00fb03000066000066000069xxxxxx00fb0400006600006600006cxxxxxx00fb05000073000074xxxxxx00fb06000073000074xxxxxx00fb13000574000576xxxxxx00fb14000574000565xxxxxx00fb1500057400056bxxxxxx00fb1600057e000576xxxxxx00fb1700057400056dxxxxxx
--------------------------------------------------------------------------------
/cdata/casefold_s_map:
--------------------------------------------------------------------------------
1 | 001e9e0000df001f88001f80001f89001f81001f8a001f82001f8b001f83001f8c001f84001f8d001f85001f8e001f86001f8f001f87001f98001f90001f99001f91001f9a001f92001f9b001f93001f9c001f94001f9d001f95001f9e001f96001f9f001f97001fa8001fa0001fa9001fa1001faa001fa2001fab001fa3001fac001fa4001fad001fa5001fae001fa6001faf001fa7001fbc001fb3001fcc001fc3001ffc001ff3
--------------------------------------------------------------------------------
/cdata/cat_set_titlecase:
--------------------------------------------------------------------------------
1 | 0001c50001c80001cb0001f2001f88001f89001f8a001f8b001f8c001f8d001f8e001f8f001f98001f99001f9a001f9b001f9c001f9d001f9e001f9f001fa8001fa9001faa001fab001fac001fad001fae001faf001fbc001fcc001ffc
--------------------------------------------------------------------------------
/cdata/combining_class_map:
--------------------------------------------------------------------------------
1 | 0003340100033501000336010003370100033801001cd401001ce201001ce301001ce401001ce501001ce601001ce701001ce8010020d2010020d3010020d8010020d9010020da010020e5010020e6010020ea010020eb01010a390101d1670101d1680101d1690100093c070009bc07000a3c07000abc07000b3c07000cbc0700103707001b3407001be607001c370700a9b3070110ba070116b7070030990800309a0800094d090009cd09000a4d09000acd09000b4d09000bcd09000c4d09000ccd09000d4d09000dca09000e3a09000f84090010390900103a0900171409001734090017d209001a6009001b4409001baa09001bab09001bf209001bf309002d7f0900a8060900a8c40900a9530900a9c00900aaf60900abed09010a3f09011046090110b90901113309011134090111c0090116b6090005b00a0005b10b0005b20c0005b30d0005b40e0005b50f0005b6100005b7110005b8120005c7120005b9130005ba130005bb140005bc150005bd160005bf170005c1180005c21900fb1e1a00064b1b0008f01b00064c1c0008f11c00064d1d0008f21d0006181e00064e1e0006191f00064f1f00061a200006502000065121000652220006702300071124000c5554000c565b000e3867000e3967000e486b000e496b000e4a6b000e4b6b000eb876000eb976000ec87a000ec97a000eca7a000ecb7a000f7181000f7282000f7a82000f7b82000f7c82000f7d82000f8082000f7484000321ca000322ca000327ca000328ca001dd0ca001dced600031bd8000f39d801d165d801d166d801d16ed801d16fd801d170d801d171d801d172d800302ada000316dc000317dc000318dc000319dc00031cdc00031ddc00031edc00031fdc000320dc000323dc000324dc000325dc000326dc000329dc00032adc00032bdc00032cdc00032ddc00032edc00032fdc000330dc000331dc000332dc000333dc000339dc00033adc00033bdc00033cdc000347dc000348dc000349dc00034ddc00034edc000353dc000354dc000355dc000356dc000359dc00035adc000591dc000596dc00059bdc0005a2dc0005a3dc0005a4dc0005a5dc0005a6dc0005a7dc0005aadc0005c5dc000655dc000656dc00065cdc00065fdc0006e3dc0006eadc0006eddc000731dc000734dc000737dc000738dc000739dc00073bdc00073cdc00073edc000742dc000744dc000746dc000748dc0007f2dc000859dc00085adc00085bdc0008e6dc0008e9dc0008eddc0008eedc0008efdc0008f6dc0008f9dc0008fadc000952dc000f18dc000f19dc000f35dc000f37dc000fc6dc00108ddc00193bdc001a18dc001a7fdc001b6cdc001cd5dc001cd6dc001cd7dc001cd8dc001cd9dc001cdcdc001cdddc001cdedc001cdfdc001ceddc001dc2dc001dcadc001dcfdc001dfddc001dffdc0020e8dc0020ecdc0020eddc0020eedc0020efdc00a92bdc00a92cdc00a92ddc00aab4dc0101fddc010a0ddc010a3adc01d17bdc01d17cdc01d17ddc01d17edc01d17fdc01d180dc01d181dc01d182dc01d18adc01d18bdc00059ade0005adde001939de00302dde00302ee000302fe001d16de20005aee40018a9e400302be4000300e6000301e6000302e6000303e6000304e6000305e6000306e6000307e6000308e6000309e600030ae600030be600030ce600030de600030ee600030fe6000310e6000311e6000312e6000313e6000314e600033de600033ee600033fe6000340e6000341e6000342e6000343e6000344e6000346e600034ae600034be600034ce6000350e6000351e6000352e6000357e600035be6000363e6000364e6000365e6000366e6000367e6000368e6000369e600036ae600036be600036ce600036de600036ee600036fe6000483e6000484e6000485e6000486e6000487e6000592e6000593e6000594e6000595e6000597e6000598e6000599e600059ce600059de600059ee600059fe60005a0e60005a1e60005a8e60005a9e60005abe60005ace60005afe60005c4e6000610e6000611e6000612e6000613e6000614e6000615e6000616e6000617e6000653e6000654e6000657e6000658e6000659e600065ae600065be600065de600065ee60006d6e60006d7e60006d8e60006d9e60006dae60006dbe60006dce60006dfe60006e0e60006e1e60006e2e60006e4e60006e7e60006e8e60006ebe60006ece6000730e6000732e6000733e6000735e6000736e600073ae600073de600073fe6000740e6000741e6000743e6000745e6000747e6000749e600074ae60007ebe60007ece60007ede60007eee60007efe60007f0e60007f1e60007f3e6000816e6000817e6000818e6000819e600081be600081ce600081de600081ee600081fe6000820e6000821e6000822e6000823e6000825e6000826e6000827e6000829e600082ae600082be600082ce600082de60008e4e60008e5e60008e7e60008e8e60008eae60008ebe60008ece60008f3e60008f4e60008f5e60008f7e60008f8e60008fbe60008fce60008fde60008fee6000951e6000953e6000954e6000f82e6000f83e6000f86e6000f87e600135de600135ee600135fe60017dde600193ae6001a17e6001a75e6001a76e6001a77e6001a78e6001a79e6001a7ae6001a7be6001a7ce6001b6be6001b6de6001b6ee6001b6fe6001b70e6001b71e6001b72e6001b73e6001cd0e6001cd1e6001cd2e6001cdae6001cdbe6001ce0e6001cf4e6001dc0e6001dc1e6001dc3e6001dc4e6001dc5e6001dc6e6001dc7e6001dc8e6001dc9e6001dcbe6001dcce6001dd1e6001dd2e6001dd3e6001dd4e6001dd5e6001dd6e6001dd7e6001dd8e6001dd9e6001ddae6001ddbe6001ddce6001ddde6001ddee6001ddfe6001de0e6001de1e6001de2e6001de3e6001de4e6001de5e6001de6e6001dfee60020d0e60020d1e60020d4e60020d5e60020d6e60020d7e60020dbe60020dce60020e1e60020e7e60020e9e60020f0e6002cefe6002cf0e6002cf1e6002de0e6002de1e6002de2e6002de3e6002de4e6002de5e6002de6e6002de7e6002de8e6002de9e6002deae6002debe6002dece6002dede6002deee6002defe6002df0e6002df1e6002df2e6002df3e6002df4e6002df5e6002df6e6002df7e6002df8e6002df9e6002dfae6002dfbe6002dfce6002dfde6002dfee6002dffe600a66fe600a674e600a675e600a676e600a677e600a678e600a679e600a67ae600a67be600a67ce600a67de600a69fe600a6f0e600a6f1e600a8e0e600a8e1e600a8e2e600a8e3e600a8e4e600a8e5e600a8e6e600a8e7e600a8e8e600a8e9e600a8eae600a8ebe600a8ece600a8ede600a8eee600a8efe600a8f0e600a8f1e600aab0e600aab2e600aab3e600aab7e600aab8e600aabee600aabfe600aac1e600fe20e600fe21e600fe22e600fe23e600fe24e600fe25e600fe26e6010a0fe6010a38e6011100e6011101e6011102e601d185e601d186e601d187e601d188e601d189e601d1aae601d1abe601d1ace601d1ade601d242e601d243e601d244e6000315e800031ae8000358e800302ce800035ce900035fe9000362e9001dfce900035dea00035eea000360ea000361ea001dcdea000345f0
--------------------------------------------------------------------------------
/cdata/composition_exclusion_set:
--------------------------------------------------------------------------------
1 | 00034000034100034300034400037400037e00038700095800095900095a00095b00095c00095d00095e00095f0009dc0009dd0009df000a33000a36000a59000a5a000a5b000a5e000b5c000b5d000f43000f4d000f52000f57000f5c000f69000f73000f75000f76000f78000f81000f93000f9d000fa2000fa7000fac000fb9001f71001f73001f75001f77001f79001f7b001f7d001fbb001fbe001fc9001fcb001fd3001fdb001fe3001feb001fee001fef001ff9001ffb001ffd00200000200100212600212a00212b00232900232a002adc00f90000f90100f90200f90300f90400f90500f90600f90700f90800f90900f90a00f90b00f90c00f90d00f90e00f90f00f91000f91100f91200f91300f91400f91500f91600f91700f91800f91900f91a00f91b00f91c00f91d00f91e00f91f00f92000f92100f92200f92300f92400f92500f92600f92700f92800f92900f92a00f92b00f92c00f92d00f92e00f92f00f93000f93100f93200f93300f93400f93500f93600f93700f93800f93900f93a00f93b00f93c00f93d00f93e00f93f00f94000f94100f94200f94300f94400f94500f94600f94700f94800f94900f94a00f94b00f94c00f94d00f94e00f94f00f95000f95100f95200f95300f95400f95500f95600f95700f95800f95900f95a00f95b00f95c00f95d00f95e00f95f00f96000f96100f96200f96300f96400f96500f96600f96700f96800f96900f96a00f96b00f96c00f96d00f96e00f96f00f97000f97100f97200f97300f97400f97500f97600f97700f97800f97900f97a00f97b00f97c00f97d00f97e00f97f00f98000f98100f98200f98300f98400f98500f98600f98700f98800f98900f98a00f98b00f98c00f98d00f98e00f98f00f99000f99100f99200f99300f99400f99500f99600f99700f99800f99900f99a00f99b00f99c00f99d00f99e00f99f00f9a000f9a100f9a200f9a300f9a400f9a500f9a600f9a700f9a800f9a900f9aa00f9ab00f9ac00f9ad00f9ae00f9af00f9b000f9b100f9b200f9b300f9b400f9b500f9b600f9b700f9b800f9b900f9ba00f9bb00f9bc00f9bd00f9be00f9bf00f9c000f9c100f9c200f9c300f9c400f9c500f9c600f9c700f9c800f9c900f9ca00f9cb00f9cc00f9cd00f9ce00f9cf00f9d000f9d100f9d200f9d300f9d400f9d500f9d600f9d700f9d800f9d900f9da00f9db00f9dc00f9dd00f9de00f9df00f9e000f9e100f9e200f9e300f9e400f9e500f9e600f9e700f9e800f9e900f9ea00f9eb00f9ec00f9ed00f9ee00f9ef00f9f000f9f100f9f200f9f300f9f400f9f500f9f600f9f700f9f800f9f900f9fa00f9fb00f9fc00f9fd00f9fe00f9ff00fa0000fa0100fa0200fa0300fa0400fa0500fa0600fa0700fa0800fa0900fa0a00fa0b00fa0c00fa0d00fa1000fa1200fa1500fa1600fa1700fa1800fa1900fa1a00fa1b00fa1c00fa1d00fa1e00fa2000fa2200fa2500fa2600fa2a00fa2b00fa2c00fa2d00fa2e00fa2f00fa3000fa3100fa3200fa3300fa3400fa3500fa3600fa3700fa3800fa3900fa3a00fa3b00fa3c00fa3d00fa3e00fa3f00fa4000fa4100fa4200fa4300fa4400fa4500fa4600fa4700fa4800fa4900fa4a00fa4b00fa4c00fa4d00fa4e00fa4f00fa5000fa5100fa5200fa5300fa5400fa5500fa5600fa5700fa5800fa5900fa5a00fa5b00fa5c00fa5d00fa5e00fa5f00fa6000fa6100fa6200fa6300fa6400fa6500fa6600fa6700fa6800fa6900fa6a00fa6b00fa6c00fa6d00fa7000fa7100fa7200fa7300fa7400fa7500fa7600fa7700fa7800fa7900fa7a00fa7b00fa7c00fa7d00fa7e00fa7f00fa8000fa8100fa8200fa8300fa8400fa8500fa8600fa8700fa8800fa8900fa8a00fa8b00fa8c00fa8d00fa8e00fa8f00fa9000fa9100fa9200fa9300fa9400fa9500fa9600fa9700fa9800fa9900fa9a00fa9b00fa9c00fa9d00fa9e00fa9f00faa000faa100faa200faa300faa400faa500faa600faa700faa800faa900faaa00faab00faac00faad00faae00faaf00fab000fab100fab200fab300fab400fab500fab600fab700fab800fab900faba00fabb00fabc00fabd00fabe00fabf00fac000fac100fac200fac300fac400fac500fac600fac700fac800fac900faca00facb00facc00facd00face00facf00fad000fad100fad200fad300fad400fad500fad600fad700fad800fad900fb1d00fb1f00fb2a00fb2b00fb2c00fb2d00fb2e00fb2f00fb3000fb3100fb3200fb3300fb3400fb3500fb3600fb3800fb3900fb3a00fb3b00fb3c00fb3e00fb4000fb4100fb4300fb4400fb4600fb4700fb4800fb4900fb4a00fb4b00fb4c00fb4d00fb4e01d15e01d15f01d16001d16101d16201d16301d16401d1bb01d1bc01d1bd01d1be01d1bf01d1c002f80002f80102f80202f80302f80402f80502f80602f80702f80802f80902f80a02f80b02f80c02f80d02f80e02f80f02f81002f81102f81202f81302f81402f81502f81602f81702f81802f81902f81a02f81b02f81c02f81d02f81e02f81f02f82002f82102f82202f82302f82402f82502f82602f82702f82802f82902f82a02f82b02f82c02f82d02f82e02f82f02f83002f83102f83202f83302f83402f83502f83602f83702f83802f83902f83a02f83b02f83c02f83d02f83e02f83f02f84002f84102f84202f84302f84402f84502f84602f84702f84802f84902f84a02f84b02f84c02f84d02f84e02f84f02f85002f85102f85202f85302f85402f85502f85602f85702f85802f85902f85a02f85b02f85c02f85d02f85e02f85f02f86002f86102f86202f86302f86402f86502f86602f86702f86802f86902f86a02f86b02f86c02f86d02f86e02f86f02f87002f87102f87202f87302f87402f87502f87602f87702f87802f87902f87a02f87b02f87c02f87d02f87e02f87f02f88002f88102f88202f88302f88402f88502f88602f88702f88802f88902f88a02f88b02f88c02f88d02f88e02f88f02f89002f89102f89202f89302f89402f89502f89602f89702f89802f89902f89a02f89b02f89c02f89d02f89e02f89f02f8a002f8a102f8a202f8a302f8a402f8a502f8a602f8a702f8a802f8a902f8aa02f8ab02f8ac02f8ad02f8ae02f8af02f8b002f8b102f8b202f8b302f8b402f8b502f8b602f8b702f8b802f8b902f8ba02f8bb02f8bc02f8bd02f8be02f8bf02f8c002f8c102f8c202f8c302f8c402f8c502f8c602f8c702f8c802f8c902f8ca02f8cb02f8cc02f8cd02f8ce02f8cf02f8d002f8d102f8d202f8d302f8d402f8d502f8d602f8d702f8d802f8d902f8da02f8db02f8dc02f8dd02f8de02f8df02f8e002f8e102f8e202f8e302f8e402f8e502f8e602f8e702f8e802f8e902f8ea02f8eb02f8ec02f8ed02f8ee02f8ef02f8f002f8f102f8f202f8f302f8f402f8f502f8f602f8f702f8f802f8f902f8fa02f8fb02f8fc02f8fd02f8fe02f8ff02f90002f90102f90202f90302f90402f90502f90602f90702f90802f90902f90a02f90b02f90c02f90d02f90e02f90f02f91002f91102f91202f91302f91402f91502f91602f91702f91802f91902f91a02f91b02f91c02f91d02f91e02f91f02f92002f92102f92202f92302f92402f92502f92602f92702f92802f92902f92a02f92b02f92c02f92d02f92e02f92f02f93002f93102f93202f93302f93402f93502f93602f93702f93802f93902f93a02f93b02f93c02f93d02f93e02f93f02f94002f94102f94202f94302f94402f94502f94602f94702f94802f94902f94a02f94b02f94c02f94d02f94e02f94f02f95002f95102f95202f95302f95402f95502f95602f95702f95802f95902f95a02f95b02f95c02f95d02f95e02f95f02f96002f96102f96202f96302f96402f96502f96602f96702f96802f96902f96a02f96b02f96c02f96d02f96e02f96f02f97002f97102f97202f97302f97402f97502f97602f97702f97802f97902f97a02f97b02f97c02f97d02f97e02f97f02f98002f98102f98202f98302f98402f98502f98602f98702f98802f98902f98a02f98b02f98c02f98d02f98e02f98f02f99002f99102f99202f99302f99402f99502f99602f99702f99802f99902f99a02f99b02f99c02f99d02f99e02f99f02f9a002f9a102f9a202f9a302f9a402f9a502f9a602f9a702f9a802f9a902f9aa02f9ab02f9ac02f9ad02f9ae02f9af02f9b002f9b102f9b202f9b302f9b402f9b502f9b602f9b702f9b802f9b902f9ba02f9bb02f9bc02f9bd02f9be02f9bf02f9c002f9c102f9c202f9c302f9c402f9c502f9c602f9c702f9c802f9c902f9ca02f9cb02f9cc02f9cd02f9ce02f9cf02f9d002f9d102f9d202f9d302f9d402f9d502f9d602f9d702f9d802f9d902f9da02f9db02f9dc02f9dd02f9de02f9df02f9e002f9e102f9e202f9e302f9e402f9e502f9e602f9e702f9e802f9e902f9ea02f9eb02f9ec02f9ed02f9ee02f9ef02f9f002f9f102f9f202f9f302f9f402f9f502f9f602f9f702f9f802f9f902f9fa02f9fb02f9fc02f9fd02f9fe02f9ff02fa0002fa0102fa0202fa0302fa0402fa0502fa0602fa0702fa0802fa0902fa0a02fa0b02fa0c02fa0d02fa0e02fa0f02fa1002fa1102fa1202fa1302fa1402fa1502fa1602fa1702fa1802fa1902fa1a02fa1b02fa1c02fa1d
--------------------------------------------------------------------------------
/cdata/cond_lc_map:
--------------------------------------------------------------------------------
1 | 000049;000069,000307;lt;More_Above
2 | 000049;000131;az;Not_Before_Dot
3 | 000049;000131;tr;Not_Before_Dot
4 | 00004a;00006a,000307;lt;More_Above
5 | 000069;000069;az;
6 | 000069;000069;tr;
7 | 0000cc;000069,000307,000300;lt;
8 | 0000cd;000069,000307,000301;lt;
9 | 000128;000069,000307,000303;lt;
10 | 00012e;00012f,000307;lt;More_Above
11 | 000130;000069;az;
12 | 000130;000069;tr;
13 | 000307;000307;lt;After_Soft_Dotted
14 | 000307;;az;After_I
15 | 000307;;tr;After_I
16 | 0003a3;0003c2;;Final_Sigma
17 |
--------------------------------------------------------------------------------
/cdata/cond_tc_map:
--------------------------------------------------------------------------------
1 | 0003a3;0003a3;;Final_Sigma
2 | 000307;;lt;After_Soft_Dotted
3 | 000049;000049;lt;More_Above
4 | 00004a;00004a;lt;More_Above
5 | 00012e;00012e;lt;More_Above
6 | 0000cc;0000cc;lt;
7 | 0000cd;0000cd;lt;
8 | 000128;000128;lt;
9 | 000130;000130;tr;
10 | 000130;000130;az;
11 | 000307;000307;tr;After_I
12 | 000307;000307;az;After_I
13 | 000049;000049;tr;Not_Before_Dot
14 | 000049;000049;az;Not_Before_Dot
15 | 000069;000130;tr;
16 | 000069;000130;az;
17 |
--------------------------------------------------------------------------------
/cdata/cond_uc_map:
--------------------------------------------------------------------------------
1 | 000049;000049;az;Not_Before_Dot
2 | 000049;000049;lt;More_Above
3 | 000049;000049;tr;Not_Before_Dot
4 | 00004a;00004a;lt;More_Above
5 | 000069;000130;az;
6 | 000069;000130;tr;
7 | 0000cc;0000cc;lt;
8 | 0000cd;0000cd;lt;
9 | 000128;000128;lt;
10 | 00012e;00012e;lt;More_Above
11 | 000130;000130;az;
12 | 000130;000130;tr;
13 | 000307;000307;az;After_I
14 | 000307;000307;tr;After_I
15 | 000307;;lt;After_Soft_Dotted
16 | 0003a3;0003a3;;Final_Sigma
17 |
--------------------------------------------------------------------------------
/cdata/east_asian_width_property_ranges:
--------------------------------------------------------------------------------
1 | 003400004dbf3004e00009fff300ac0000d7a3300e00000f8ff102000002f7ff302fa1e02fffd303000003fffd30f00000ffffd110000010fffd1
--------------------------------------------------------------------------------
/cdata/general_category_aliases:
--------------------------------------------------------------------------------
1 | C;Other
2 | Cc;Control
3 | Cf;Format
4 | Cn;Unassigned
5 | Co;Private_Use
6 | Cs;Surrogate
7 | L;Letter
8 | LC;Cased_Letter
9 | Ll;Lowercase_Letter
10 | Lm;Modifier_Letter
11 | Lo;Other_Letter
12 | Lt;Titlecase_Letter
13 | Lu;Uppercase_Letter
14 | M;Mark
15 | Mc;Spacing_Mark
16 | Me;Enclosing_Mark
17 | Mn;Nonspacing_Mark
18 | N;Number
19 | Nd;Decimal_Number
20 | Nl;Letter_Number
21 | No;Other_Number
22 | P;Punctuation
23 | Pc;Connector_Punctuation
24 | Pd;Dash_Punctuation
25 | Pe;Close_Punctuation
26 | Pf;Final_Punctuation
27 | Pi;Initial_Punctuation
28 | Po;Other_Punctuation
29 | Ps;Open_Punctuation
30 | S;Symbol
31 | Sc;Currency_Symbol
32 | Sk;Modifier_Symbol
33 | Sm;Math_Symbol
34 | So;Other_Symbol
35 | Z;Separator
36 | Zl;Line_Separator
37 | Zp;Paragraph_Separator
38 | Zs;Space_Separator
39 |
--------------------------------------------------------------------------------
/cdata/general_category_ranges:
--------------------------------------------------------------------------------
1 | 003400004db5Lo004e00009fccLo00ac0000d7a3Lo00d80000db7fCs00db8000dbffCs00dc0000dfffCs00e00000f8ffCo02000002a6d6Lo02a70002b734Lo02b74002b81dLo0f00000ffffdCo10000010fffdCo
--------------------------------------------------------------------------------
/cdata/jamo_short_names:
--------------------------------------------------------------------------------
1 | 001100G
2 | 001101GG
3 | 001102N
4 | 001103D
5 | 001104DD
6 | 001105R
7 | 001106M
8 | 001107B
9 | 001108BB
10 | 001109S
11 | 00110aSS
12 | 00110cJ
13 | 00110dJJ
14 | 00110eC
15 | 00110fK
16 | 001110T
17 | 001111P
18 | 001112H
19 | 001161A
20 | 001162AE
21 | 001163YA
22 | 001164YAE
23 | 001165EO
24 | 001166E
25 | 001167YEO
26 | 001168YE
27 | 001169O
28 | 00116aWA
29 | 00116bWAE
30 | 00116cOE
31 | 00116dYO
32 | 00116eU
33 | 00116fWEO
34 | 001170WE
35 | 001171WI
36 | 001172YU
37 | 001173EU
38 | 001174YI
39 | 001175I
40 | 0011a8G
41 | 0011a9GG
42 | 0011aaGS
43 | 0011abN
44 | 0011acNJ
45 | 0011adNH
46 | 0011aeD
47 | 0011afL
48 | 0011b0LG
49 | 0011b1LM
50 | 0011b2LB
51 | 0011b3LS
52 | 0011b4LT
53 | 0011b5LP
54 | 0011b6LH
55 | 0011b7M
56 | 0011b8B
57 | 0011b9BS
58 | 0011baS
59 | 0011bbSS
60 | 0011bcNG
61 | 0011bdJ
62 | 0011beC
63 | 0011bfK
64 | 0011c0T
65 | 0011c1P
66 | 0011c2H
67 |
--------------------------------------------------------------------------------
/cdata/name_aliases:
--------------------------------------------------------------------------------
1 | 0000002204NULL503NUL0000012210START OF HEADING503SOH000002220dSTART OF TEXT503STX000003220bEND OF TEXT503ETX0000042213END OF TRANSMISSION503EOT0000052207ENQUIRY503ENQ000006220bACKNOWLEDGE503ACK0000072205ALERT503BEL0000082209BACKSPACE502BS0000094214CHARACTER TABULATION215HORIZONTAL TABULATION502HT503TAB00000a6209LINE FEED208NEW LINE20bEND OF LINE502LF502NL503EOL00000b320fLINE TABULATION213VERTICAL TABULATION502VT00000c2209FORM FEED502FF00000d220fCARRIAGE RETURN502CR00000e3209SHIFT OUT211LOCKING-SHIFT ONE502SO00000f3208SHIFT IN212LOCKING-SHIFT ZERO502SI0000102210DATA LINK ESCAPE503DLE0000112212DEVICE CONTROL ONE503DC10000122212DEVICE CONTROL TWO503DC20000132214DEVICE CONTROL THREE503DC30000142213DEVICE CONTROL FOUR503DC40000152214NEGATIVE ACKNOWLEDGE503NAK0000162210SYNCHRONOUS IDLE503SYN0000172219END OF TRANSMISSION BLOCK503ETB0000182206CANCEL503CAN000019220dEND OF MEDIUM503EOM00001a220aSUBSTITUTE503SUB00001b2206ESCAPE503ESC00001c321aINFORMATION SEPARATOR FOUR20eFILE SEPARATOR502FS00001d321bINFORMATION SEPARATOR THREE20fGROUP SEPARATOR502GS00001e3219INFORMATION SEPARATOR TWO210RECORD SEPARATOR502RS00001f3219INFORMATION SEPARATOR ONE20eUNIT SEPARATOR502US0000201502SP00007f2206DELETE503DEL0000802411PADDING CHARACTER503PAD0000812411HIGH OCTET PRESET503HOP0000822214BREAK PERMITTED HERE503BPH000083220dNO BREAK HERE503NBH0000842205INDEX503IND0000852209NEXT LINE503NEL0000862216START OF SELECTED AREA503SSA0000872214END OF SELECTED AREA503ESA0000883218CHARACTER TABULATION SET219HORIZONTAL TABULATION SET503HTS0000893227CHARACTER TABULATION WITH JUSTIFICATION228HORIZONTAL TABULATION WITH JUSTIFICATION503HTJ00008a3213LINE TABULATION SET217VERTICAL TABULATION SET503VTS00008b3214PARTIAL LINE FORWARD211PARTIAL LINE DOWN503PLD00008c3215PARTIAL LINE BACKWARD20fPARTIAL LINE UP503PLU00008d3211REVERSE LINE FEED20dREVERSE INDEX502RI00008e3210SINGLE SHIFT TWO20eSINGLE-SHIFT-2503SS200008f3212SINGLE SHIFT THREE20eSINGLE-SHIFT-3503SS30000902215DEVICE CONTROL STRING503DCS000091320fPRIVATE USE ONE20dPRIVATE USE-1503PU1000092320fPRIVATE USE TWO20dPRIVATE USE-2503PU20000932212SET TRANSMIT STATE503STS0000942210CANCEL CHARACTER503CCH000095220fMESSAGE WAITING502MW0000963215START OF GUARDED AREA217START OF PROTECTED AREA503SPA0000973213END OF GUARDED AREA215END OF PROTECTED AREA503EPA000098220fSTART OF STRING503SOS0000992423SINGLE GRAPHIC CHARACTER INTRODUCER503SGC00009a221bSINGLE CHARACTER INTRODUCER503SCI00009b221bCONTROL SEQUENCE INTRODUCER503CSI00009c2211STRING TERMINATOR502ST00009d2218OPERATING SYSTEM COMMAND503OSC00009e220fPRIVACY MESSAGE502PM00009f221bAPPLICATION PROGRAM COMMAND503APC0000a01504NBSP0000ad1503SHY0001a21118LATIN CAPITAL LETTER GHA0001a31116LATIN SMALL LETTER GHA00034f1503CGJ0007091122SYRIAC SUBLINEAR COLON SKEWED LEFT000cde1113KANNADA LETTER LLLA000e9d1111LAO LETTER FO FON000e9f1111LAO LETTER FO FAY000ea3110dLAO LETTER RO000ea5110dLAO LETTER LO000fd01123TIBETAN MARK BKA- SHOG GI MGO RGYAN00180b1504FVS100180c1504FVS200180d1504FVS300180e1503MVS00200b1504ZWSP00200c1504ZWNJ00200d1503ZWJ00200e1503LRM00200f1503RLM00202a1503LRE00202b1503RLE00202c1503PDF00202d1503LRO00202e1503RLO00202f1505NNBSP00205f1504MMSP0020601502WJ002118111dWEIERSTRASS ELLIPTIC FUNCTION0024481111MICR ON US SYMBOL0024491110MICR DASH SYMBOL00a015111aYI SYLLABLE ITERATION MARK00fe18113dPRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET00fe001503VS100fe011503VS200fe021503VS300fe031503VS400fe041503VS500fe051503VS600fe061503VS700fe071503VS800fe081503VS900fe091504VS1000fe0a1504VS1100fe0b1504VS1200fe0c1504VS1300fe0d1504VS1400fe0e1504VS1500fe0f1504VS1600feff330fBYTE ORDER MARK503BOM506ZWNBSP01d0c51134BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS0e01001504VS170e01011504VS180e01021504VS190e01031504VS200e01041504VS210e01051504VS220e01061504VS230e01071504VS240e01081504VS250e01091504VS260e010a1504VS270e010b1504VS280e010c1504VS290e010d1504VS300e010e1504VS310e010f1504VS320e01101504VS330e01111504VS340e01121504VS350e01131504VS360e01141504VS370e01151504VS380e01161504VS390e01171504VS400e01181504VS410e01191504VS420e011a1504VS430e011b1504VS440e011c1504VS450e011d1504VS460e011e1504VS470e011f1504VS480e01201504VS490e01211504VS500e01221504VS510e01231504VS520e01241504VS530e01251504VS540e01261504VS550e01271504VS560e01281504VS570e01291504VS580e012a1504VS590e012b1504VS600e012c1504VS610e012d1504VS620e012e1504VS630e012f1504VS640e01301504VS650e01311504VS660e01321504VS670e01331504VS680e01341504VS690e01351504VS700e01361504VS710e01371504VS720e01381504VS730e01391504VS740e013a1504VS750e013b1504VS760e013c1504VS770e013d1504VS780e013e1504VS790e013f1504VS800e01401504VS810e01411504VS820e01421504VS830e01431504VS840e01441504VS850e01451504VS860e01461504VS870e01471504VS880e01481504VS890e01491504VS900e014a1504VS910e014b1504VS920e014c1504VS930e014d1504VS940e014e1504VS950e014f1504VS960e01501504VS970e01511504VS980e01521504VS990e01531505VS1000e01541505VS1010e01551505VS1020e01561505VS1030e01571505VS1040e01581505VS1050e01591505VS1060e015a1505VS1070e015b1505VS1080e015c1505VS1090e015d1505VS1100e015e1505VS1110e015f1505VS1120e01601505VS1130e01611505VS1140e01621505VS1150e01631505VS1160e01641505VS1170e01651505VS1180e01661505VS1190e01671505VS1200e01681505VS1210e01691505VS1220e016a1505VS1230e016b1505VS1240e016c1505VS1250e016d1505VS1260e016e1505VS1270e016f1505VS1280e01701505VS1290e01711505VS1300e01721505VS1310e01731505VS1320e01741505VS1330e01751505VS1340e01761505VS1350e01771505VS1360e01781505VS1370e01791505VS1380e017a1505VS1390e017b1505VS1400e017c1505VS1410e017d1505VS1420e017e1505VS1430e017f1505VS1440e01801505VS1450e01811505VS1460e01821505VS1470e01831505VS1480e01841505VS1490e01851505VS1500e01861505VS1510e01871505VS1520e01881505VS1530e01891505VS1540e018a1505VS1550e018b1505VS1560e018c1505VS1570e018d1505VS1580e018e1505VS1590e018f1505VS1600e01901505VS1610e01911505VS1620e01921505VS1630e01931505VS1640e01941505VS1650e01951505VS1660e01961505VS1670e01971505VS1680e01981505VS1690e01991505VS1700e019a1505VS1710e019b1505VS1720e019c1505VS1730e019d1505VS1740e019e1505VS1750e019f1505VS1760e01a01505VS1770e01a11505VS1780e01a21505VS1790e01a31505VS1800e01a41505VS1810e01a51505VS1820e01a61505VS1830e01a71505VS1840e01a81505VS1850e01a91505VS1860e01aa1505VS1870e01ab1505VS1880e01ac1505VS1890e01ad1505VS1900e01ae1505VS1910e01af1505VS1920e01b01505VS1930e01b11505VS1940e01b21505VS1950e01b31505VS1960e01b41505VS1970e01b51505VS1980e01b61505VS1990e01b71505VS2000e01b81505VS2010e01b91505VS2020e01ba1505VS2030e01bb1505VS2040e01bc1505VS2050e01bd1505VS2060e01be1505VS2070e01bf1505VS2080e01c01505VS2090e01c11505VS2100e01c21505VS2110e01c31505VS2120e01c41505VS2130e01c51505VS2140e01c61505VS2150e01c71505VS2160e01c81505VS2170e01c91505VS2180e01ca1505VS2190e01cb1505VS2200e01cc1505VS2210e01cd1505VS2220e01ce1505VS2230e01cf1505VS2240e01d01505VS2250e01d11505VS2260e01d21505VS2270e01d31505VS2280e01d41505VS2290e01d51505VS2300e01d61505VS2310e01d71505VS2320e01d81505VS2330e01d91505VS2340e01da1505VS2350e01db1505VS2360e01dc1505VS2370e01dd1505VS2380e01de1505VS2390e01df1505VS2400e01e01505VS2410e01e11505VS2420e01e21505VS2430e01e31505VS2440e01e41505VS2450e01e51505VS2460e01e61505VS2470e01e71505VS2480e01e81505VS2490e01e91505VS2500e01ea1505VS2510e01eb1505VS2520e01ec1505VS2530e01ed1505VS2540e01ee1505VS2550e01ef1505VS256
--------------------------------------------------------------------------------
/cdata/prop_set_lowercase:
--------------------------------------------------------------------------------
1 | 00006100006200006300006400006500006600006700006800006900006a00006b00006c00006d00006e00006f00007000007100007200007300007400007500007600007700007800007900007a0000aa0000b50000ba0000df0000e00000e10000e20000e30000e40000e50000e60000e70000e80000e90000ea0000eb0000ec0000ed0000ee0000ef0000f00000f10000f20000f30000f40000f50000f60000f80000f90000fa0000fb0000fc0000fd0000fe0000ff00010100010300010500010700010900010b00010d00010f00011100011300011500011700011900011b00011d00011f00012100012300012500012700012900012b00012d00012f00013100013300013500013700013800013a00013c00013e00014000014200014400014600014800014900014b00014d00014f00015100015300015500015700015900015b00015d00015f00016100016300016500016700016900016b00016d00016f00017100017300017500017700017a00017c00017e00017f00018000018300018500018800018c00018d00019200019500019900019a00019b00019e0001a10001a30001a50001a80001aa0001ab0001ad0001b00001b40001b60001b90001ba0001bd0001be0001bf0001c60001c90001cc0001ce0001d00001d20001d40001d60001d80001da0001dc0001dd0001df0001e10001e30001e50001e70001e90001eb0001ed0001ef0001f00001f30001f50001f90001fb0001fd0001ff00020100020300020500020700020900020b00020d00020f00021100021300021500021700021900021b00021d00021f00022100022300022500022700022900022b00022d00022f00023100023300023400023500023600023700023800023900023c00023f00024000024200024700024900024b00024d00024f00025000025100025200025300025400025500025600025700025800025900025a00025b00025c00025d00025e00025f00026000026100026200026300026400026500026600026700026800026900026a00026b00026c00026d00026e00026f00027000027100027200027300027400027500027600027700027800027900027a00027b00027c00027d00027e00027f00028000028100028200028300028400028500028600028700028800028900028a00028b00028c00028d00028e00028f00029000029100029200029300029500029600029700029800029900029a00029b00029c00029d00029e00029f0002a00002a10002a20002a30002a40002a50002a60002a70002a80002a90002aa0002ab0002ac0002ad0002ae0002af0002b00002b10002b20002b30002b40002b50002b60002b70002b80002c00002c10002e00002e10002e20002e30002e400034500037100037300037700037a00037b00037c00037d0003900003ac0003ad0003ae0003af0003b00003b10003b20003b30003b40003b50003b60003b70003b80003b90003ba0003bb0003bc0003bd0003be0003bf0003c00003c10003c20003c30003c40003c50003c60003c70003c80003c90003ca0003cb0003cc0003cd0003ce0003d00003d10003d50003d60003d70003d90003db0003dd0003df0003e10003e30003e50003e70003e90003eb0003ed0003ef0003f00003f10003f20003f30003f50003f80003fb0003fc00043000043100043200043300043400043500043600043700043800043900043a00043b00043c00043d00043e00043f00044000044100044200044300044400044500044600044700044800044900044a00044b00044c00044d00044e00044f00045000045100045200045300045400045500045600045700045800045900045a00045b00045c00045d00045e00045f00046100046300046500046700046900046b00046d00046f00047100047300047500047700047900047b00047d00047f00048100048b00048d00048f00049100049300049500049700049900049b00049d00049f0004a10004a30004a50004a70004a90004ab0004ad0004af0004b10004b30004b50004b70004b90004bb0004bd0004bf0004c20004c40004c60004c80004ca0004cc0004ce0004cf0004d10004d30004d50004d70004d90004db0004dd0004df0004e10004e30004e50004e70004e90004eb0004ed0004ef0004f10004f30004f50004f70004f90004fb0004fd0004ff00050100050300050500050700050900050b00050d00050f00051100051300051500051700051900051b00051d00051f00052100052300052500052700056100056200056300056400056500056600056700056800056900056a00056b00056c00056d00056e00056f00057000057100057200057300057400057500057600057700057800057900057a00057b00057c00057d00057e00057f000580000581000582000583000584000585000586000587001d00001d01001d02001d03001d04001d05001d06001d07001d08001d09001d0a001d0b001d0c001d0d001d0e001d0f001d10001d11001d12001d13001d14001d15001d16001d17001d18001d19001d1a001d1b001d1c001d1d001d1e001d1f001d20001d21001d22001d23001d24001d25001d26001d27001d28001d29001d2a001d2b001d2c001d2d001d2e001d2f001d30001d31001d32001d33001d34001d35001d36001d37001d38001d39001d3a001d3b001d3c001d3d001d3e001d3f001d40001d41001d42001d43001d44001d45001d46001d47001d48001d49001d4a001d4b001d4c001d4d001d4e001d4f001d50001d51001d52001d53001d54001d55001d56001d57001d58001d59001d5a001d5b001d5c001d5d001d5e001d5f001d60001d61001d62001d63001d64001d65001d66001d67001d68001d69001d6a001d6b001d6c001d6d001d6e001d6f001d70001d71001d72001d73001d74001d75001d76001d77001d78001d79001d7a001d7b001d7c001d7d001d7e001d7f001d80001d81001d82001d83001d84001d85001d86001d87001d88001d89001d8a001d8b001d8c001d8d001d8e001d8f001d90001d91001d92001d93001d94001d95001d96001d97001d98001d99001d9a001d9b001d9c001d9d001d9e001d9f001da0001da1001da2001da3001da4001da5001da6001da7001da8001da9001daa001dab001dac001dad001dae001daf001db0001db1001db2001db3001db4001db5001db6001db7001db8001db9001dba001dbb001dbc001dbd001dbe001dbf001e01001e03001e05001e07001e09001e0b001e0d001e0f001e11001e13001e15001e17001e19001e1b001e1d001e1f001e21001e23001e25001e27001e29001e2b001e2d001e2f001e31001e33001e35001e37001e39001e3b001e3d001e3f001e41001e43001e45001e47001e49001e4b001e4d001e4f001e51001e53001e55001e57001e59001e5b001e5d001e5f001e61001e63001e65001e67001e69001e6b001e6d001e6f001e71001e73001e75001e77001e79001e7b001e7d001e7f001e81001e83001e85001e87001e89001e8b001e8d001e8f001e91001e93001e95001e96001e97001e98001e99001e9a001e9b001e9c001e9d001e9f001ea1001ea3001ea5001ea7001ea9001eab001ead001eaf001eb1001eb3001eb5001eb7001eb9001ebb001ebd001ebf001ec1001ec3001ec5001ec7001ec9001ecb001ecd001ecf001ed1001ed3001ed5001ed7001ed9001edb001edd001edf001ee1001ee3001ee5001ee7001ee9001eeb001eed001eef001ef1001ef3001ef5001ef7001ef9001efb001efd001eff001f00001f01001f02001f03001f04001f05001f06001f07001f10001f11001f12001f13001f14001f15001f20001f21001f22001f23001f24001f25001f26001f27001f30001f31001f32001f33001f34001f35001f36001f37001f40001f41001f42001f43001f44001f45001f50001f51001f52001f53001f54001f55001f56001f57001f60001f61001f62001f63001f64001f65001f66001f67001f70001f71001f72001f73001f74001f75001f76001f77001f78001f79001f7a001f7b001f7c001f7d001f80001f81001f82001f83001f84001f85001f86001f87001f90001f91001f92001f93001f94001f95001f96001f97001fa0001fa1001fa2001fa3001fa4001fa5001fa6001fa7001fb0001fb1001fb2001fb3001fb4001fb6001fb7001fbe001fc2001fc3001fc4001fc6001fc7001fd0001fd1001fd2001fd3001fd6001fd7001fe0001fe1001fe2001fe3001fe4001fe5001fe6001fe7001ff2001ff3001ff4001ff6001ff700207100207f00209000209100209200209300209400209500209600209700209800209900209a00209b00209c00210a00210e00210f00211300212f00213400213900213c00213d00214600214700214800214900214e00217000217100217200217300217400217500217600217700217800217900217a00217b00217c00217d00217e00217f0021840024d00024d10024d20024d30024d40024d50024d60024d70024d80024d90024da0024db0024dc0024dd0024de0024df0024e00024e10024e20024e30024e40024e50024e60024e70024e80024e9002c30002c31002c32002c33002c34002c35002c36002c37002c38002c39002c3a002c3b002c3c002c3d002c3e002c3f002c40002c41002c42002c43002c44002c45002c46002c47002c48002c49002c4a002c4b002c4c002c4d002c4e002c4f002c50002c51002c52002c53002c54002c55002c56002c57002c58002c59002c5a002c5b002c5c002c5d002c5e002c61002c65002c66002c68002c6a002c6c002c71002c73002c74002c76002c77002c78002c79002c7a002c7b002c7c002c7d002c81002c83002c85002c87002c89002c8b002c8d002c8f002c91002c93002c95002c97002c99002c9b002c9d002c9f002ca1002ca3002ca5002ca7002ca9002cab002cad002caf002cb1002cb3002cb5002cb7002cb9002cbb002cbd002cbf002cc1002cc3002cc5002cc7002cc9002ccb002ccd002ccf002cd1002cd3002cd5002cd7002cd9002cdb002cdd002cdf002ce1002ce3002ce4002cec002cee002cf3002d00002d01002d02002d03002d04002d05002d06002d07002d08002d09002d0a002d0b002d0c002d0d002d0e002d0f002d10002d11002d12002d13002d14002d15002d16002d17002d18002d19002d1a002d1b002d1c002d1d002d1e002d1f002d20002d21002d22002d23002d24002d25002d27002d2d00a64100a64300a64500a64700a64900a64b00a64d00a64f00a65100a65300a65500a65700a65900a65b00a65d00a65f00a66100a66300a66500a66700a66900a66b00a66d00a68100a68300a68500a68700a68900a68b00a68d00a68f00a69100a69300a69500a69700a72300a72500a72700a72900a72b00a72d00a72f00a73000a73100a73300a73500a73700a73900a73b00a73d00a73f00a74100a74300a74500a74700a74900a74b00a74d00a74f00a75100a75300a75500a75700a75900a75b00a75d00a75f00a76100a76300a76500a76700a76900a76b00a76d00a76f00a77000a77100a77200a77300a77400a77500a77600a77700a77800a77a00a77c00a77f00a78100a78300a78500a78700a78c00a78e00a79100a79300a7a100a7a300a7a500a7a700a7a900a7f800a7f900a7fa00fb0000fb0100fb0200fb0300fb0400fb0500fb0600fb1300fb1400fb1500fb1600fb1700ff4100ff4200ff4300ff4400ff4500ff4600ff4700ff4800ff4900ff4a00ff4b00ff4c00ff4d00ff4e00ff4f00ff5000ff5100ff5200ff5300ff5400ff5500ff5600ff5700ff5800ff5900ff5a01042801042901042a01042b01042c01042d01042e01042f01043001043101043201043301043401043501043601043701043801043901043a01043b01043c01043d01043e01043f01044001044101044201044301044401044501044601044701044801044901044a01044b01044c01044d01044e01044f01d41a01d41b01d41c01d41d01d41e01d41f01d42001d42101d42201d42301d42401d42501d42601d42701d42801d42901d42a01d42b01d42c01d42d01d42e01d42f01d43001d43101d43201d43301d44e01d44f01d45001d45101d45201d45301d45401d45601d45701d45801d45901d45a01d45b01d45c01d45d01d45e01d45f01d46001d46101d46201d46301d46401d46501d46601d46701d48201d48301d48401d48501d48601d48701d48801d48901d48a01d48b01d48c01d48d01d48e01d48f01d49001d49101d49201d49301d49401d49501d49601d49701d49801d49901d49a01d49b01d4b601d4b701d4b801d4b901d4bb01d4bd01d4be01d4bf01d4c001d4c101d4c201d4c301d4c501d4c601d4c701d4c801d4c901d4ca01d4cb01d4cc01d4cd01d4ce01d4cf01d4ea01d4eb01d4ec01d4ed01d4ee01d4ef01d4f001d4f101d4f201d4f301d4f401d4f501d4f601d4f701d4f801d4f901d4fa01d4fb01d4fc01d4fd01d4fe01d4ff01d50001d50101d50201d50301d51e01d51f01d52001d52101d52201d52301d52401d52501d52601d52701d52801d52901d52a01d52b01d52c01d52d01d52e01d52f01d53001d53101d53201d53301d53401d53501d53601d53701d55201d55301d55401d55501d55601d55701d55801d55901d55a01d55b01d55c01d55d01d55e01d55f01d56001d56101d56201d56301d56401d56501d56601d56701d56801d56901d56a01d56b01d58601d58701d58801d58901d58a01d58b01d58c01d58d01d58e01d58f01d59001d59101d59201d59301d59401d59501d59601d59701d59801d59901d59a01d59b01d59c01d59d01d59e01d59f01d5ba01d5bb01d5bc01d5bd01d5be01d5bf01d5c001d5c101d5c201d5c301d5c401d5c501d5c601d5c701d5c801d5c901d5ca01d5cb01d5cc01d5cd01d5ce01d5cf01d5d001d5d101d5d201d5d301d5ee01d5ef01d5f001d5f101d5f201d5f301d5f401d5f501d5f601d5f701d5f801d5f901d5fa01d5fb01d5fc01d5fd01d5fe01d5ff01d60001d60101d60201d60301d60401d60501d60601d60701d62201d62301d62401d62501d62601d62701d62801d62901d62a01d62b01d62c01d62d01d62e01d62f01d63001d63101d63201d63301d63401d63501d63601d63701d63801d63901d63a01d63b01d65601d65701d65801d65901d65a01d65b01d65c01d65d01d65e01d65f01d66001d66101d66201d66301d66401d66501d66601d66701d66801d66901d66a01d66b01d66c01d66d01d66e01d66f01d68a01d68b01d68c01d68d01d68e01d68f01d69001d69101d69201d69301d69401d69501d69601d69701d69801d69901d69a01d69b01d69c01d69d01d69e01d69f01d6a001d6a101d6a201d6a301d6a401d6a501d6c201d6c301d6c401d6c501d6c601d6c701d6c801d6c901d6ca01d6cb01d6cc01d6cd01d6ce01d6cf01d6d001d6d101d6d201d6d301d6d401d6d501d6d601d6d701d6d801d6d901d6da01d6dc01d6dd01d6de01d6df01d6e001d6e101d6fc01d6fd01d6fe01d6ff01d70001d70101d70201d70301d70401d70501d70601d70701d70801d70901d70a01d70b01d70c01d70d01d70e01d70f01d71001d71101d71201d71301d71401d71601d71701d71801d71901d71a01d71b01d73601d73701d73801d73901d73a01d73b01d73c01d73d01d73e01d73f01d74001d74101d74201d74301d74401d74501d74601d74701d74801d74901d74a01d74b01d74c01d74d01d74e01d75001d75101d75201d75301d75401d75501d77001d77101d77201d77301d77401d77501d77601d77701d77801d77901d77a01d77b01d77c01d77d01d77e01d77f01d78001d78101d78201d78301d78401d78501d78601d78701d78801d78a01d78b01d78c01d78d01d78e01d78f01d7aa01d7ab01d7ac01d7ad01d7ae01d7af01d7b001d7b101d7b201d7b301d7b401d7b501d7b601d7b701d7b801d7b901d7ba01d7bb01d7bc01d7bd01d7be01d7bf01d7c001d7c101d7c201d7c401d7c501d7c601d7c701d7c801d7c901d7cb
--------------------------------------------------------------------------------
/cdata/prop_set_uppercase:
--------------------------------------------------------------------------------
1 | 00004100004200004300004400004500004600004700004800004900004a00004b00004c00004d00004e00004f00005000005100005200005300005400005500005600005700005800005900005a0000c00000c10000c20000c30000c40000c50000c60000c70000c80000c90000ca0000cb0000cc0000cd0000ce0000cf0000d00000d10000d20000d30000d40000d50000d60000d80000d90000da0000db0000dc0000dd0000de00010000010200010400010600010800010a00010c00010e00011000011200011400011600011800011a00011c00011e00012000012200012400012600012800012a00012c00012e00013000013200013400013600013900013b00013d00013f00014100014300014500014700014a00014c00014e00015000015200015400015600015800015a00015c00015e00016000016200016400016600016800016a00016c00016e00017000017200017400017600017800017900017b00017d00018100018200018400018600018700018900018a00018b00018e00018f00019000019100019300019400019600019700019800019c00019d00019f0001a00001a20001a40001a60001a70001a90001ac0001ae0001af0001b10001b20001b30001b50001b70001b80001bc0001c40001c70001ca0001cd0001cf0001d10001d30001d50001d70001d90001db0001de0001e00001e20001e40001e60001e80001ea0001ec0001ee0001f10001f40001f60001f70001f80001fa0001fc0001fe00020000020200020400020600020800020a00020c00020e00021000021200021400021600021800021a00021c00021e00022000022200022400022600022800022a00022c00022e00023000023200023a00023b00023d00023e00024100024300024400024500024600024800024a00024c00024e00037000037200037600038600038800038900038a00038c00038e00038f00039100039200039300039400039500039600039700039800039900039a00039b00039c00039d00039e00039f0003a00003a10003a30003a40003a50003a60003a70003a80003a90003aa0003ab0003cf0003d20003d30003d40003d80003da0003dc0003de0003e00003e20003e40003e60003e80003ea0003ec0003ee0003f40003f70003f90003fa0003fd0003fe0003ff00040000040100040200040300040400040500040600040700040800040900040a00040b00040c00040d00040e00040f00041000041100041200041300041400041500041600041700041800041900041a00041b00041c00041d00041e00041f00042000042100042200042300042400042500042600042700042800042900042a00042b00042c00042d00042e00042f00046000046200046400046600046800046a00046c00046e00047000047200047400047600047800047a00047c00047e00048000048a00048c00048e00049000049200049400049600049800049a00049c00049e0004a00004a20004a40004a60004a80004aa0004ac0004ae0004b00004b20004b40004b60004b80004ba0004bc0004be0004c00004c10004c30004c50004c70004c90004cb0004cd0004d00004d20004d40004d60004d80004da0004dc0004de0004e00004e20004e40004e60004e80004ea0004ec0004ee0004f00004f20004f40004f60004f80004fa0004fc0004fe00050000050200050400050600050800050a00050c00050e00051000051200051400051600051800051a00051c00051e00052000052200052400052600053100053200053300053400053500053600053700053800053900053a00053b00053c00053d00053e00053f00054000054100054200054300054400054500054600054700054800054900054a00054b00054c00054d00054e00054f0005500005510005520005530005540005550005560010a00010a10010a20010a30010a40010a50010a60010a70010a80010a90010aa0010ab0010ac0010ad0010ae0010af0010b00010b10010b20010b30010b40010b50010b60010b70010b80010b90010ba0010bb0010bc0010bd0010be0010bf0010c00010c10010c20010c30010c40010c50010c70010cd001e00001e02001e04001e06001e08001e0a001e0c001e0e001e10001e12001e14001e16001e18001e1a001e1c001e1e001e20001e22001e24001e26001e28001e2a001e2c001e2e001e30001e32001e34001e36001e38001e3a001e3c001e3e001e40001e42001e44001e46001e48001e4a001e4c001e4e001e50001e52001e54001e56001e58001e5a001e5c001e5e001e60001e62001e64001e66001e68001e6a001e6c001e6e001e70001e72001e74001e76001e78001e7a001e7c001e7e001e80001e82001e84001e86001e88001e8a001e8c001e8e001e90001e92001e94001e9e001ea0001ea2001ea4001ea6001ea8001eaa001eac001eae001eb0001eb2001eb4001eb6001eb8001eba001ebc001ebe001ec0001ec2001ec4001ec6001ec8001eca001ecc001ece001ed0001ed2001ed4001ed6001ed8001eda001edc001ede001ee0001ee2001ee4001ee6001ee8001eea001eec001eee001ef0001ef2001ef4001ef6001ef8001efa001efc001efe001f08001f09001f0a001f0b001f0c001f0d001f0e001f0f001f18001f19001f1a001f1b001f1c001f1d001f28001f29001f2a001f2b001f2c001f2d001f2e001f2f001f38001f39001f3a001f3b001f3c001f3d001f3e001f3f001f48001f49001f4a001f4b001f4c001f4d001f59001f5b001f5d001f5f001f68001f69001f6a001f6b001f6c001f6d001f6e001f6f001fb8001fb9001fba001fbb001fc8001fc9001fca001fcb001fd8001fd9001fda001fdb001fe8001fe9001fea001feb001fec001ff8001ff9001ffa001ffb00210200210700210b00210c00210d00211000211100211200211500211900211a00211b00211c00211d00212400212600212800212a00212b00212c00212d00213000213100213200213300213e00213f00214500216000216100216200216300216400216500216600216700216800216900216a00216b00216c00216d00216e00216f0021830024b60024b70024b80024b90024ba0024bb0024bc0024bd0024be0024bf0024c00024c10024c20024c30024c40024c50024c60024c70024c80024c90024ca0024cb0024cc0024cd0024ce0024cf002c00002c01002c02002c03002c04002c05002c06002c07002c08002c09002c0a002c0b002c0c002c0d002c0e002c0f002c10002c11002c12002c13002c14002c15002c16002c17002c18002c19002c1a002c1b002c1c002c1d002c1e002c1f002c20002c21002c22002c23002c24002c25002c26002c27002c28002c29002c2a002c2b002c2c002c2d002c2e002c60002c62002c63002c64002c67002c69002c6b002c6d002c6e002c6f002c70002c72002c75002c7e002c7f002c80002c82002c84002c86002c88002c8a002c8c002c8e002c90002c92002c94002c96002c98002c9a002c9c002c9e002ca0002ca2002ca4002ca6002ca8002caa002cac002cae002cb0002cb2002cb4002cb6002cb8002cba002cbc002cbe002cc0002cc2002cc4002cc6002cc8002cca002ccc002cce002cd0002cd2002cd4002cd6002cd8002cda002cdc002cde002ce0002ce2002ceb002ced002cf200a64000a64200a64400a64600a64800a64a00a64c00a64e00a65000a65200a65400a65600a65800a65a00a65c00a65e00a66000a66200a66400a66600a66800a66a00a66c00a68000a68200a68400a68600a68800a68a00a68c00a68e00a69000a69200a69400a69600a72200a72400a72600a72800a72a00a72c00a72e00a73200a73400a73600a73800a73a00a73c00a73e00a74000a74200a74400a74600a74800a74a00a74c00a74e00a75000a75200a75400a75600a75800a75a00a75c00a75e00a76000a76200a76400a76600a76800a76a00a76c00a76e00a77900a77b00a77d00a77e00a78000a78200a78400a78600a78b00a78d00a79000a79200a7a000a7a200a7a400a7a600a7a800a7aa00ff2100ff2200ff2300ff2400ff2500ff2600ff2700ff2800ff2900ff2a00ff2b00ff2c00ff2d00ff2e00ff2f00ff3000ff3100ff3200ff3300ff3400ff3500ff3600ff3700ff3800ff3900ff3a01040001040101040201040301040401040501040601040701040801040901040a01040b01040c01040d01040e01040f01041001041101041201041301041401041501041601041701041801041901041a01041b01041c01041d01041e01041f01042001042101042201042301042401042501042601042701d40001d40101d40201d40301d40401d40501d40601d40701d40801d40901d40a01d40b01d40c01d40d01d40e01d40f01d41001d41101d41201d41301d41401d41501d41601d41701d41801d41901d43401d43501d43601d43701d43801d43901d43a01d43b01d43c01d43d01d43e01d43f01d44001d44101d44201d44301d44401d44501d44601d44701d44801d44901d44a01d44b01d44c01d44d01d46801d46901d46a01d46b01d46c01d46d01d46e01d46f01d47001d47101d47201d47301d47401d47501d47601d47701d47801d47901d47a01d47b01d47c01d47d01d47e01d47f01d48001d48101d49c01d49e01d49f01d4a201d4a501d4a601d4a901d4aa01d4ab01d4ac01d4ae01d4af01d4b001d4b101d4b201d4b301d4b401d4b501d4d001d4d101d4d201d4d301d4d401d4d501d4d601d4d701d4d801d4d901d4da01d4db01d4dc01d4dd01d4de01d4df01d4e001d4e101d4e201d4e301d4e401d4e501d4e601d4e701d4e801d4e901d50401d50501d50701d50801d50901d50a01d50d01d50e01d50f01d51001d51101d51201d51301d51401d51601d51701d51801d51901d51a01d51b01d51c01d53801d53901d53b01d53c01d53d01d53e01d54001d54101d54201d54301d54401d54601d54a01d54b01d54c01d54d01d54e01d54f01d55001d56c01d56d01d56e01d56f01d57001d57101d57201d57301d57401d57501d57601d57701d57801d57901d57a01d57b01d57c01d57d01d57e01d57f01d58001d58101d58201d58301d58401d58501d5a001d5a101d5a201d5a301d5a401d5a501d5a601d5a701d5a801d5a901d5aa01d5ab01d5ac01d5ad01d5ae01d5af01d5b001d5b101d5b201d5b301d5b401d5b501d5b601d5b701d5b801d5b901d5d401d5d501d5d601d5d701d5d801d5d901d5da01d5db01d5dc01d5dd01d5de01d5df01d5e001d5e101d5e201d5e301d5e401d5e501d5e601d5e701d5e801d5e901d5ea01d5eb01d5ec01d5ed01d60801d60901d60a01d60b01d60c01d60d01d60e01d60f01d61001d61101d61201d61301d61401d61501d61601d61701d61801d61901d61a01d61b01d61c01d61d01d61e01d61f01d62001d62101d63c01d63d01d63e01d63f01d64001d64101d64201d64301d64401d64501d64601d64701d64801d64901d64a01d64b01d64c01d64d01d64e01d64f01d65001d65101d65201d65301d65401d65501d67001d67101d67201d67301d67401d67501d67601d67701d67801d67901d67a01d67b01d67c01d67d01d67e01d67f01d68001d68101d68201d68301d68401d68501d68601d68701d68801d68901d6a801d6a901d6aa01d6ab01d6ac01d6ad01d6ae01d6af01d6b001d6b101d6b201d6b301d6b401d6b501d6b601d6b701d6b801d6b901d6ba01d6bb01d6bc01d6bd01d6be01d6bf01d6c001d6e201d6e301d6e401d6e501d6e601d6e701d6e801d6e901d6ea01d6eb01d6ec01d6ed01d6ee01d6ef01d6f001d6f101d6f201d6f301d6f401d6f501d6f601d6f701d6f801d6f901d6fa01d71c01d71d01d71e01d71f01d72001d72101d72201d72301d72401d72501d72601d72701d72801d72901d72a01d72b01d72c01d72d01d72e01d72f01d73001d73101d73201d73301d73401d75601d75701d75801d75901d75a01d75b01d75c01d75d01d75e01d75f01d76001d76101d76201d76301d76401d76501d76601d76701d76801d76901d76a01d76b01d76c01d76d01d76e01d79001d79101d79201d79301d79401d79501d79601d79701d79801d79901d79a01d79b01d79c01d79d01d79e01d79f01d7a001d7a101d7a201d7a301d7a401d7a501d7a601d7a701d7a801d7ca
--------------------------------------------------------------------------------
/cdata/simple_lc_map:
--------------------------------------------------------------------------------
1 | 00004100006100004200006200004300006300004400006400004500006500004600006600004700006700004800006800004900006900004a00006a00004b00006b00004c00006c00004d00006d00004e00006e00004f00006f00005000007000005100007100005200007200005300007300005400007400005500007500005600007600005700007700005800007800005900007900005a00007a0000c00000e00000c10000e10000c20000e20000c30000e30000c40000e40000c50000e50000c60000e60000c70000e70000c80000e80000c90000e90000ca0000ea0000cb0000eb0000cc0000ec0000cd0000ed0000ce0000ee0000cf0000ef0000d00000f00000d10000f10000d20000f20000d30000f30000d40000f40000d50000f50000d60000f60000d80000f80000d90000f90000da0000fa0000db0000fb0000dc0000fc0000dd0000fd0000de0000fe00010000010100010200010300010400010500010600010700010800010900010a00010b00010c00010d00010e00010f00011000011100011200011300011400011500011600011700011800011900011a00011b00011c00011d00011e00011f00012000012100012200012300012400012500012600012700012800012900012a00012b00012c00012d00012e00012f00013000006900013200013300013400013500013600013700013900013a00013b00013c00013d00013e00013f00014000014100014200014300014400014500014600014700014800014a00014b00014c00014d00014e00014f00015000015100015200015300015400015500015600015700015800015900015a00015b00015c00015d00015e00015f00016000016100016200016300016400016500016600016700016800016900016a00016b00016c00016d00016e00016f0001700001710001720001730001740001750001760001770001780000ff00017900017a00017b00017c00017d00017e00018100025300018200018300018400018500018600025400018700018800018900025600018a00025700018b00018c00018e0001dd00018f00025900019000025b00019100019200019300026000019400026300019600026900019700026800019800019900019c00026f00019d00027200019f0002750001a00001a10001a20001a30001a40001a50001a60002800001a70001a80001a90002830001ac0001ad0001ae0002880001af0001b00001b100028a0001b200028b0001b30001b40001b50001b60001b70002920001b80001b90001bc0001bd0001c40001c60001c50001c60001c70001c90001c80001c90001ca0001cc0001cb0001cc0001cd0001ce0001cf0001d00001d10001d20001d30001d40001d50001d60001d70001d80001d90001da0001db0001dc0001de0001df0001e00001e10001e20001e30001e40001e50001e60001e70001e80001e90001ea0001eb0001ec0001ed0001ee0001ef0001f10001f30001f20001f30001f40001f50001f60001950001f70001bf0001f80001f90001fa0001fb0001fc0001fd0001fe0001ff00020000020100020200020300020400020500020600020700020800020900020a00020b00020c00020d00020e00020f00021000021100021200021300021400021500021600021700021800021900021a00021b00021c00021d00021e00021f00022000019e00022200022300022400022500022600022700022800022900022a00022b00022c00022d00022e00022f00023000023100023200023300023a002c6500023b00023c00023d00019a00023e002c6600024100024200024300018000024400028900024500028c00024600024700024800024900024a00024b00024c00024d00024e00024f0003700003710003720003730003760003770003860003ac0003880003ad0003890003ae00038a0003af00038c0003cc00038e0003cd00038f0003ce0003910003b10003920003b20003930003b30003940003b40003950003b50003960003b60003970003b70003980003b80003990003b900039a0003ba00039b0003bb00039c0003bc00039d0003bd00039e0003be00039f0003bf0003a00003c00003a10003c10003a30003c30003a40003c40003a50003c50003a60003c60003a70003c70003a80003c80003a90003c90003aa0003ca0003ab0003cb0003cf0003d70003d80003d90003da0003db0003dc0003dd0003de0003df0003e00003e10003e20003e30003e40003e50003e60003e70003e80003e90003ea0003eb0003ec0003ed0003ee0003ef0003f40003b80003f70003f80003f90003f20003fa0003fb0003fd00037b0003fe00037c0003ff00037d00040000045000040100045100040200045200040300045300040400045400040500045500040600045600040700045700040800045800040900045900040a00045a00040b00045b00040c00045c00040d00045d00040e00045e00040f00045f00041000043000041100043100041200043200041300043300041400043400041500043500041600043600041700043700041800043800041900043900041a00043a00041b00043b00041c00043c00041d00043d00041e00043e00041f00043f00042000044000042100044100042200044200042300044300042400044400042500044500042600044600042700044700042800044800042900044900042a00044a00042b00044b00042c00044c00042d00044d00042e00044e00042f00044f00046000046100046200046300046400046500046600046700046800046900046a00046b00046c00046d00046e00046f00047000047100047200047300047400047500047600047700047800047900047a00047b00047c00047d00047e00047f00048000048100048a00048b00048c00048d00048e00048f00049000049100049200049300049400049500049600049700049800049900049a00049b00049c00049d00049e00049f0004a00004a10004a20004a30004a40004a50004a60004a70004a80004a90004aa0004ab0004ac0004ad0004ae0004af0004b00004b10004b20004b30004b40004b50004b60004b70004b80004b90004ba0004bb0004bc0004bd0004be0004bf0004c00004cf0004c10004c20004c30004c40004c50004c60004c70004c80004c90004ca0004cb0004cc0004cd0004ce0004d00004d10004d20004d30004d40004d50004d60004d70004d80004d90004da0004db0004dc0004dd0004de0004df0004e00004e10004e20004e30004e40004e50004e60004e70004e80004e90004ea0004eb0004ec0004ed0004ee0004ef0004f00004f10004f20004f30004f40004f50004f60004f70004f80004f90004fa0004fb0004fc0004fd0004fe0004ff00050000050100050200050300050400050500050600050700050800050900050a00050b00050c00050d00050e00050f00051000051100051200051300051400051500051600051700051800051900051a00051b00051c00051d00051e00051f00052000052100052200052300052400052500052600052700053100056100053200056200053300056300053400056400053500056500053600056600053700056700053800056800053900056900053a00056a00053b00056b00053c00056c00053d00056d00053e00056e00053f00056f00054000057000054100057100054200057200054300057300054400057400054500057500054600057600054700057700054800057800054900057900054a00057a00054b00057b00054c00057c00054d00057d00054e00057e00054f00057f0005500005800005510005810005520005820005530005830005540005840005550005850005560005860010a0002d000010a1002d010010a2002d020010a3002d030010a4002d040010a5002d050010a6002d060010a7002d070010a8002d080010a9002d090010aa002d0a0010ab002d0b0010ac002d0c0010ad002d0d0010ae002d0e0010af002d0f0010b0002d100010b1002d110010b2002d120010b3002d130010b4002d140010b5002d150010b6002d160010b7002d170010b8002d180010b9002d190010ba002d1a0010bb002d1b0010bc002d1c0010bd002d1d0010be002d1e0010bf002d1f0010c0002d200010c1002d210010c2002d220010c3002d230010c4002d240010c5002d250010c7002d270010cd002d2d001e00001e01001e02001e03001e04001e05001e06001e07001e08001e09001e0a001e0b001e0c001e0d001e0e001e0f001e10001e11001e12001e13001e14001e15001e16001e17001e18001e19001e1a001e1b001e1c001e1d001e1e001e1f001e20001e21001e22001e23001e24001e25001e26001e27001e28001e29001e2a001e2b001e2c001e2d001e2e001e2f001e30001e31001e32001e33001e34001e35001e36001e37001e38001e39001e3a001e3b001e3c001e3d001e3e001e3f001e40001e41001e42001e43001e44001e45001e46001e47001e48001e49001e4a001e4b001e4c001e4d001e4e001e4f001e50001e51001e52001e53001e54001e55001e56001e57001e58001e59001e5a001e5b001e5c001e5d001e5e001e5f001e60001e61001e62001e63001e64001e65001e66001e67001e68001e69001e6a001e6b001e6c001e6d001e6e001e6f001e70001e71001e72001e73001e74001e75001e76001e77001e78001e79001e7a001e7b001e7c001e7d001e7e001e7f001e80001e81001e82001e83001e84001e85001e86001e87001e88001e89001e8a001e8b001e8c001e8d001e8e001e8f001e90001e91001e92001e93001e94001e95001e9e0000df001ea0001ea1001ea2001ea3001ea4001ea5001ea6001ea7001ea8001ea9001eaa001eab001eac001ead001eae001eaf001eb0001eb1001eb2001eb3001eb4001eb5001eb6001eb7001eb8001eb9001eba001ebb001ebc001ebd001ebe001ebf001ec0001ec1001ec2001ec3001ec4001ec5001ec6001ec7001ec8001ec9001eca001ecb001ecc001ecd001ece001ecf001ed0001ed1001ed2001ed3001ed4001ed5001ed6001ed7001ed8001ed9001eda001edb001edc001edd001ede001edf001ee0001ee1001ee2001ee3001ee4001ee5001ee6001ee7001ee8001ee9001eea001eeb001eec001eed001eee001eef001ef0001ef1001ef2001ef3001ef4001ef5001ef6001ef7001ef8001ef9001efa001efb001efc001efd001efe001eff001f08001f00001f09001f01001f0a001f02001f0b001f03001f0c001f04001f0d001f05001f0e001f06001f0f001f07001f18001f10001f19001f11001f1a001f12001f1b001f13001f1c001f14001f1d001f15001f28001f20001f29001f21001f2a001f22001f2b001f23001f2c001f24001f2d001f25001f2e001f26001f2f001f27001f38001f30001f39001f31001f3a001f32001f3b001f33001f3c001f34001f3d001f35001f3e001f36001f3f001f37001f48001f40001f49001f41001f4a001f42001f4b001f43001f4c001f44001f4d001f45001f59001f51001f5b001f53001f5d001f55001f5f001f57001f68001f60001f69001f61001f6a001f62001f6b001f63001f6c001f64001f6d001f65001f6e001f66001f6f001f67001f88001f80001f89001f81001f8a001f82001f8b001f83001f8c001f84001f8d001f85001f8e001f86001f8f001f87001f98001f90001f99001f91001f9a001f92001f9b001f93001f9c001f94001f9d001f95001f9e001f96001f9f001f97001fa8001fa0001fa9001fa1001faa001fa2001fab001fa3001fac001fa4001fad001fa5001fae001fa6001faf001fa7001fb8001fb0001fb9001fb1001fba001f70001fbb001f71001fbc001fb3001fc8001f72001fc9001f73001fca001f74001fcb001f75001fcc001fc3001fd8001fd0001fd9001fd1001fda001f76001fdb001f77001fe8001fe0001fe9001fe1001fea001f7a001feb001f7b001fec001fe5001ff8001f78001ff9001f79001ffa001f7c001ffb001f7d001ffc001ff30021260003c900212a00006b00212b0000e500213200214e00216000217000216100217100216200217200216300217300216400217400216500217500216600217600216700217700216800217800216900217900216a00217a00216b00217b00216c00217c00216d00217d00216e00217e00216f00217f0021830021840024b60024d00024b70024d10024b80024d20024b90024d30024ba0024d40024bb0024d50024bc0024d60024bd0024d70024be0024d80024bf0024d90024c00024da0024c10024db0024c20024dc0024c30024dd0024c40024de0024c50024df0024c60024e00024c70024e10024c80024e20024c90024e30024ca0024e40024cb0024e50024cc0024e60024cd0024e70024ce0024e80024cf0024e9002c00002c30002c01002c31002c02002c32002c03002c33002c04002c34002c05002c35002c06002c36002c07002c37002c08002c38002c09002c39002c0a002c3a002c0b002c3b002c0c002c3c002c0d002c3d002c0e002c3e002c0f002c3f002c10002c40002c11002c41002c12002c42002c13002c43002c14002c44002c15002c45002c16002c46002c17002c47002c18002c48002c19002c49002c1a002c4a002c1b002c4b002c1c002c4c002c1d002c4d002c1e002c4e002c1f002c4f002c20002c50002c21002c51002c22002c52002c23002c53002c24002c54002c25002c55002c26002c56002c27002c57002c28002c58002c29002c59002c2a002c5a002c2b002c5b002c2c002c5c002c2d002c5d002c2e002c5e002c60002c61002c6200026b002c63001d7d002c6400027d002c67002c68002c69002c6a002c6b002c6c002c6d000251002c6e000271002c6f000250002c70000252002c72002c73002c75002c76002c7e00023f002c7f000240002c80002c81002c82002c83002c84002c85002c86002c87002c88002c89002c8a002c8b002c8c002c8d002c8e002c8f002c90002c91002c92002c93002c94002c95002c96002c97002c98002c99002c9a002c9b002c9c002c9d002c9e002c9f002ca0002ca1002ca2002ca3002ca4002ca5002ca6002ca7002ca8002ca9002caa002cab002cac002cad002cae002caf002cb0002cb1002cb2002cb3002cb4002cb5002cb6002cb7002cb8002cb9002cba002cbb002cbc002cbd002cbe002cbf002cc0002cc1002cc2002cc3002cc4002cc5002cc6002cc7002cc8002cc9002cca002ccb002ccc002ccd002cce002ccf002cd0002cd1002cd2002cd3002cd4002cd5002cd6002cd7002cd8002cd9002cda002cdb002cdc002cdd002cde002cdf002ce0002ce1002ce2002ce3002ceb002cec002ced002cee002cf2002cf300a64000a64100a64200a64300a64400a64500a64600a64700a64800a64900a64a00a64b00a64c00a64d00a64e00a64f00a65000a65100a65200a65300a65400a65500a65600a65700a65800a65900a65a00a65b00a65c00a65d00a65e00a65f00a66000a66100a66200a66300a66400a66500a66600a66700a66800a66900a66a00a66b00a66c00a66d00a68000a68100a68200a68300a68400a68500a68600a68700a68800a68900a68a00a68b00a68c00a68d00a68e00a68f00a69000a69100a69200a69300a69400a69500a69600a69700a72200a72300a72400a72500a72600a72700a72800a72900a72a00a72b00a72c00a72d00a72e00a72f00a73200a73300a73400a73500a73600a73700a73800a73900a73a00a73b00a73c00a73d00a73e00a73f00a74000a74100a74200a74300a74400a74500a74600a74700a74800a74900a74a00a74b00a74c00a74d00a74e00a74f00a75000a75100a75200a75300a75400a75500a75600a75700a75800a75900a75a00a75b00a75c00a75d00a75e00a75f00a76000a76100a76200a76300a76400a76500a76600a76700a76800a76900a76a00a76b00a76c00a76d00a76e00a76f00a77900a77a00a77b00a77c00a77d001d7900a77e00a77f00a78000a78100a78200a78300a78400a78500a78600a78700a78b00a78c00a78d00026500a79000a79100a79200a79300a7a000a7a100a7a200a7a300a7a400a7a500a7a600a7a700a7a800a7a900a7aa00026600ff2100ff4100ff2200ff4200ff2300ff4300ff2400ff4400ff2500ff4500ff2600ff4600ff2700ff4700ff2800ff4800ff2900ff4900ff2a00ff4a00ff2b00ff4b00ff2c00ff4c00ff2d00ff4d00ff2e00ff4e00ff2f00ff4f00ff3000ff5000ff3100ff5100ff3200ff5200ff3300ff5300ff3400ff5400ff3500ff5500ff3600ff5600ff3700ff5700ff3800ff5800ff3900ff5900ff3a00ff5a01040001042801040101042901040201042a01040301042b01040401042c01040501042d01040601042e01040701042f01040801043001040901043101040a01043201040b01043301040c01043401040d01043501040e01043601040f01043701041001043801041101043901041201043a01041301043b01041401043c01041501043d01041601043e01041701043f01041801044001041901044101041a01044201041b01044301041c01044401041d01044501041e01044601041f01044701042001044801042101044901042201044a01042301044b01042401044c01042501044d01042601044e01042701044f
--------------------------------------------------------------------------------
/cdata/simple_uc_map:
--------------------------------------------------------------------------------
1 | 00006100004100006200004200006300004300006400004400006500004500006600004600006700004700006800004800006900004900006a00004a00006b00004b00006c00004c00006d00004d00006e00004e00006f00004f00007000005000007100005100007200005200007300005300007400005400007500005500007600005600007700005700007800005800007900005900007a00005a0000b500039c0000e00000c00000e10000c10000e20000c20000e30000c30000e40000c40000e50000c50000e60000c60000e70000c70000e80000c80000e90000c90000ea0000ca0000eb0000cb0000ec0000cc0000ed0000cd0000ee0000ce0000ef0000cf0000f00000d00000f10000d10000f20000d20000f30000d30000f40000d40000f50000d50000f60000d60000f80000d80000f90000d90000fa0000da0000fb0000db0000fc0000dc0000fd0000dd0000fe0000de0000ff00017800010100010000010300010200010500010400010700010600010900010800010b00010a00010d00010c00010f00010e00011100011000011300011200011500011400011700011600011900011800011b00011a00011d00011c00011f00011e00012100012000012300012200012500012400012700012600012900012800012b00012a00012d00012c00012f00012e00013100004900013300013200013500013400013700013600013a00013900013c00013b00013e00013d00014000013f00014200014100014400014300014600014500014800014700014b00014a00014d00014c00014f00014e00015100015000015300015200015500015400015700015600015900015800015b00015a00015d00015c00015f00015e00016100016000016300016200016500016400016700016600016900016800016b00016a00016d00016c00016f00016e00017100017000017300017200017500017400017700017600017a00017900017c00017b00017e00017d00017f00005300018000024300018300018200018500018400018800018700018c00018b0001920001910001950001f600019900019800019a00023d00019e0002200001a10001a00001a30001a20001a50001a40001a80001a70001ad0001ac0001b00001af0001b40001b30001b60001b50001b90001b80001bd0001bc0001bf0001f70001c50001c40001c60001c40001c80001c70001c90001c70001cb0001ca0001cc0001ca0001ce0001cd0001d00001cf0001d20001d10001d40001d30001d60001d50001d80001d70001da0001d90001dc0001db0001dd00018e0001df0001de0001e10001e00001e30001e20001e50001e40001e70001e60001e90001e80001eb0001ea0001ed0001ec0001ef0001ee0001f20001f10001f30001f10001f50001f40001f90001f80001fb0001fa0001fd0001fc0001ff0001fe00020100020000020300020200020500020400020700020600020900020800020b00020a00020d00020c00020f00020e00021100021000021300021200021500021400021700021600021900021800021b00021a00021d00021c00021f00021e00022300022200022500022400022700022600022900022800022b00022a00022d00022c00022f00022e00023100023000023300023200023c00023b00023f002c7e000240002c7f00024200024100024700024600024900024800024b00024a00024d00024c00024f00024e000250002c6f000251002c6d000252002c7000025300018100025400018600025600018900025700018a00025900018f00025b00019000026000019300026300019400026500a78d00026600a7aa00026800019700026900019600026b002c6200026f00019c000271002c6e00027200019d00027500019f00027d002c640002800001a60002830001a90002880001ae00028900024400028a0001b100028b0001b200028c0002450002920001b700034500039900037100037000037300037200037700037600037b0003fd00037c0003fe00037d0003ff0003ac0003860003ad0003880003ae0003890003af00038a0003b10003910003b20003920003b30003930003b40003940003b50003950003b60003960003b70003970003b80003980003b90003990003ba00039a0003bb00039b0003bc00039c0003bd00039d0003be00039e0003bf00039f0003c00003a00003c10003a10003c20003a30003c30003a30003c40003a40003c50003a50003c60003a60003c70003a70003c80003a80003c90003a90003ca0003aa0003cb0003ab0003cc00038c0003cd00038e0003ce00038f0003d00003920003d10003980003d50003a60003d60003a00003d70003cf0003d90003d80003db0003da0003dd0003dc0003df0003de0003e10003e00003e30003e20003e50003e40003e70003e60003e90003e80003eb0003ea0003ed0003ec0003ef0003ee0003f000039a0003f10003a10003f20003f90003f50003950003f80003f70003fb0003fa00043000041000043100041100043200041200043300041300043400041400043500041500043600041600043700041700043800041800043900041900043a00041a00043b00041b00043c00041c00043d00041d00043e00041e00043f00041f00044000042000044100042100044200042200044300042300044400042400044500042500044600042600044700042700044800042800044900042900044a00042a00044b00042b00044c00042c00044d00042d00044e00042e00044f00042f00045000040000045100040100045200040200045300040300045400040400045500040500045600040600045700040700045800040800045900040900045a00040a00045b00040b00045c00040c00045d00040d00045e00040e00045f00040f00046100046000046300046200046500046400046700046600046900046800046b00046a00046d00046c00046f00046e00047100047000047300047200047500047400047700047600047900047800047b00047a00047d00047c00047f00047e00048100048000048b00048a00048d00048c00048f00048e00049100049000049300049200049500049400049700049600049900049800049b00049a00049d00049c00049f00049e0004a10004a00004a30004a20004a50004a40004a70004a60004a90004a80004ab0004aa0004ad0004ac0004af0004ae0004b10004b00004b30004b20004b50004b40004b70004b60004b90004b80004bb0004ba0004bd0004bc0004bf0004be0004c20004c10004c40004c30004c60004c50004c80004c70004ca0004c90004cc0004cb0004ce0004cd0004cf0004c00004d10004d00004d30004d20004d50004d40004d70004d60004d90004d80004db0004da0004dd0004dc0004df0004de0004e10004e00004e30004e20004e50004e40004e70004e60004e90004e80004eb0004ea0004ed0004ec0004ef0004ee0004f10004f00004f30004f20004f50004f40004f70004f60004f90004f80004fb0004fa0004fd0004fc0004ff0004fe00050100050000050300050200050500050400050700050600050900050800050b00050a00050d00050c00050f00050e00051100051000051300051200051500051400051700051600051900051800051b00051a00051d00051c00051f00051e00052100052000052300052200052500052400052700052600056100053100056200053200056300053300056400053400056500053500056600053600056700053700056800053800056900053900056a00053a00056b00053b00056c00053c00056d00053d00056e00053e00056f00053f00057000054000057100054100057200054200057300054300057400054400057500054500057600054600057700054700057800054800057900054900057a00054a00057b00054b00057c00054c00057d00054d00057e00054e00057f00054f000580000550000581000551000582000552000583000553000584000554000585000555000586000556001d7900a77d001d7d002c63001e01001e00001e03001e02001e05001e04001e07001e06001e09001e08001e0b001e0a001e0d001e0c001e0f001e0e001e11001e10001e13001e12001e15001e14001e17001e16001e19001e18001e1b001e1a001e1d001e1c001e1f001e1e001e21001e20001e23001e22001e25001e24001e27001e26001e29001e28001e2b001e2a001e2d001e2c001e2f001e2e001e31001e30001e33001e32001e35001e34001e37001e36001e39001e38001e3b001e3a001e3d001e3c001e3f001e3e001e41001e40001e43001e42001e45001e44001e47001e46001e49001e48001e4b001e4a001e4d001e4c001e4f001e4e001e51001e50001e53001e52001e55001e54001e57001e56001e59001e58001e5b001e5a001e5d001e5c001e5f001e5e001e61001e60001e63001e62001e65001e64001e67001e66001e69001e68001e6b001e6a001e6d001e6c001e6f001e6e001e71001e70001e73001e72001e75001e74001e77001e76001e79001e78001e7b001e7a001e7d001e7c001e7f001e7e001e81001e80001e83001e82001e85001e84001e87001e86001e89001e88001e8b001e8a001e8d001e8c001e8f001e8e001e91001e90001e93001e92001e95001e94001e9b001e60001ea1001ea0001ea3001ea2001ea5001ea4001ea7001ea6001ea9001ea8001eab001eaa001ead001eac001eaf001eae001eb1001eb0001eb3001eb2001eb5001eb4001eb7001eb6001eb9001eb8001ebb001eba001ebd001ebc001ebf001ebe001ec1001ec0001ec3001ec2001ec5001ec4001ec7001ec6001ec9001ec8001ecb001eca001ecd001ecc001ecf001ece001ed1001ed0001ed3001ed2001ed5001ed4001ed7001ed6001ed9001ed8001edb001eda001edd001edc001edf001ede001ee1001ee0001ee3001ee2001ee5001ee4001ee7001ee6001ee9001ee8001eeb001eea001eed001eec001eef001eee001ef1001ef0001ef3001ef2001ef5001ef4001ef7001ef6001ef9001ef8001efb001efa001efd001efc001eff001efe001f00001f08001f01001f09001f02001f0a001f03001f0b001f04001f0c001f05001f0d001f06001f0e001f07001f0f001f10001f18001f11001f19001f12001f1a001f13001f1b001f14001f1c001f15001f1d001f20001f28001f21001f29001f22001f2a001f23001f2b001f24001f2c001f25001f2d001f26001f2e001f27001f2f001f30001f38001f31001f39001f32001f3a001f33001f3b001f34001f3c001f35001f3d001f36001f3e001f37001f3f001f40001f48001f41001f49001f42001f4a001f43001f4b001f44001f4c001f45001f4d001f51001f59001f53001f5b001f55001f5d001f57001f5f001f60001f68001f61001f69001f62001f6a001f63001f6b001f64001f6c001f65001f6d001f66001f6e001f67001f6f001f70001fba001f71001fbb001f72001fc8001f73001fc9001f74001fca001f75001fcb001f76001fda001f77001fdb001f78001ff8001f79001ff9001f7a001fea001f7b001feb001f7c001ffa001f7d001ffb001f80001f88001f81001f89001f82001f8a001f83001f8b001f84001f8c001f85001f8d001f86001f8e001f87001f8f001f90001f98001f91001f99001f92001f9a001f93001f9b001f94001f9c001f95001f9d001f96001f9e001f97001f9f001fa0001fa8001fa1001fa9001fa2001faa001fa3001fab001fa4001fac001fa5001fad001fa6001fae001fa7001faf001fb0001fb8001fb1001fb9001fb3001fbc001fbe000399001fc3001fcc001fd0001fd8001fd1001fd9001fe0001fe8001fe1001fe9001fe5001fec001ff3001ffc00214e00213200217000216000217100216100217200216200217300216300217400216400217500216500217600216600217700216700217800216800217900216900217a00216a00217b00216b00217c00216c00217d00216d00217e00216e00217f00216f0021840021830024d00024b60024d10024b70024d20024b80024d30024b90024d40024ba0024d50024bb0024d60024bc0024d70024bd0024d80024be0024d90024bf0024da0024c00024db0024c10024dc0024c20024dd0024c30024de0024c40024df0024c50024e00024c60024e10024c70024e20024c80024e30024c90024e40024ca0024e50024cb0024e60024cc0024e70024cd0024e80024ce0024e90024cf002c30002c00002c31002c01002c32002c02002c33002c03002c34002c04002c35002c05002c36002c06002c37002c07002c38002c08002c39002c09002c3a002c0a002c3b002c0b002c3c002c0c002c3d002c0d002c3e002c0e002c3f002c0f002c40002c10002c41002c11002c42002c12002c43002c13002c44002c14002c45002c15002c46002c16002c47002c17002c48002c18002c49002c19002c4a002c1a002c4b002c1b002c4c002c1c002c4d002c1d002c4e002c1e002c4f002c1f002c50002c20002c51002c21002c52002c22002c53002c23002c54002c24002c55002c25002c56002c26002c57002c27002c58002c28002c59002c29002c5a002c2a002c5b002c2b002c5c002c2c002c5d002c2d002c5e002c2e002c61002c60002c6500023a002c6600023e002c68002c67002c6a002c69002c6c002c6b002c73002c72002c76002c75002c81002c80002c83002c82002c85002c84002c87002c86002c89002c88002c8b002c8a002c8d002c8c002c8f002c8e002c91002c90002c93002c92002c95002c94002c97002c96002c99002c98002c9b002c9a002c9d002c9c002c9f002c9e002ca1002ca0002ca3002ca2002ca5002ca4002ca7002ca6002ca9002ca8002cab002caa002cad002cac002caf002cae002cb1002cb0002cb3002cb2002cb5002cb4002cb7002cb6002cb9002cb8002cbb002cba002cbd002cbc002cbf002cbe002cc1002cc0002cc3002cc2002cc5002cc4002cc7002cc6002cc9002cc8002ccb002cca002ccd002ccc002ccf002cce002cd1002cd0002cd3002cd2002cd5002cd4002cd7002cd6002cd9002cd8002cdb002cda002cdd002cdc002cdf002cde002ce1002ce0002ce3002ce2002cec002ceb002cee002ced002cf3002cf2002d000010a0002d010010a1002d020010a2002d030010a3002d040010a4002d050010a5002d060010a6002d070010a7002d080010a8002d090010a9002d0a0010aa002d0b0010ab002d0c0010ac002d0d0010ad002d0e0010ae002d0f0010af002d100010b0002d110010b1002d120010b2002d130010b3002d140010b4002d150010b5002d160010b6002d170010b7002d180010b8002d190010b9002d1a0010ba002d1b0010bb002d1c0010bc002d1d0010bd002d1e0010be002d1f0010bf002d200010c0002d210010c1002d220010c2002d230010c3002d240010c4002d250010c5002d270010c7002d2d0010cd00a64100a64000a64300a64200a64500a64400a64700a64600a64900a64800a64b00a64a00a64d00a64c00a64f00a64e00a65100a65000a65300a65200a65500a65400a65700a65600a65900a65800a65b00a65a00a65d00a65c00a65f00a65e00a66100a66000a66300a66200a66500a66400a66700a66600a66900a66800a66b00a66a00a66d00a66c00a68100a68000a68300a68200a68500a68400a68700a68600a68900a68800a68b00a68a00a68d00a68c00a68f00a68e00a69100a69000a69300a69200a69500a69400a69700a69600a72300a72200a72500a72400a72700a72600a72900a72800a72b00a72a00a72d00a72c00a72f00a72e00a73300a73200a73500a73400a73700a73600a73900a73800a73b00a73a00a73d00a73c00a73f00a73e00a74100a74000a74300a74200a74500a74400a74700a74600a74900a74800a74b00a74a00a74d00a74c00a74f00a74e00a75100a75000a75300a75200a75500a75400a75700a75600a75900a75800a75b00a75a00a75d00a75c00a75f00a75e00a76100a76000a76300a76200a76500a76400a76700a76600a76900a76800a76b00a76a00a76d00a76c00a76f00a76e00a77a00a77900a77c00a77b00a77f00a77e00a78100a78000a78300a78200a78500a78400a78700a78600a78c00a78b00a79100a79000a79300a79200a7a100a7a000a7a300a7a200a7a500a7a400a7a700a7a600a7a900a7a800ff4100ff2100ff4200ff2200ff4300ff2300ff4400ff2400ff4500ff2500ff4600ff2600ff4700ff2700ff4800ff2800ff4900ff2900ff4a00ff2a00ff4b00ff2b00ff4c00ff2c00ff4d00ff2d00ff4e00ff2e00ff4f00ff2f00ff5000ff3000ff5100ff3100ff5200ff3200ff5300ff3300ff5400ff3400ff5500ff3500ff5600ff3600ff5700ff3700ff5800ff3800ff5900ff3900ff5a00ff3a01042801040001042901040101042a01040201042b01040301042c01040401042d01040501042e01040601042f01040701043001040801043101040901043201040a01043301040b01043401040c01043501040d01043601040e01043701040f01043801041001043901041101043a01041201043b01041301043c01041401043d01041501043e01041601043f01041701044001041801044101041901044201041a01044301041b01044401041c01044501041d01044601041e01044701041f01044801042001044901042101044a01042201044b01042301044c01042401044d01042501044e01042601044f010427
--------------------------------------------------------------------------------
/cdata/soft_dotted_set:
--------------------------------------------------------------------------------
1 | 00006900006a00012f00024900026800029d0002b20003f3000456000458001d62001d96001da4001da8001e2d001ecb002071002148002149002c7c01d42201d42301d45601d45701d48a01d48b01d4be01d4bf01d4f201d4f301d52601d52701d55a01d55b01d58e01d58f01d5c201d5c301d5f601d5f701d62a01d62b01d65e01d65f01d69201d693
--------------------------------------------------------------------------------
/cdata/special_lc_map:
--------------------------------------------------------------------------------
1 | 000130000069000307xxxxxx001f88001f80xxxxxx001f89001f81xxxxxx001f8a001f82xxxxxx001f8b001f83xxxxxx001f8c001f84xxxxxx001f8d001f85xxxxxx001f8e001f86xxxxxx001f8f001f87xxxxxx001f98001f90xxxxxx001f99001f91xxxxxx001f9a001f92xxxxxx001f9b001f93xxxxxx001f9c001f94xxxxxx001f9d001f95xxxxxx001f9e001f96xxxxxx001f9f001f97xxxxxx001fa8001fa0xxxxxx001fa9001fa1xxxxxx001faa001fa2xxxxxx001fab001fa3xxxxxx001fac001fa4xxxxxx001fad001fa5xxxxxx001fae001fa6xxxxxx001faf001fa7xxxxxx001fbc001fb3xxxxxx001fcc001fc3xxxxxx001ffc001ff3xxxxxx
--------------------------------------------------------------------------------
/cdata/special_tc_map:
--------------------------------------------------------------------------------
1 | 0000df000053000073xxxxxx00fb00000046000066xxxxxx00fb01000046000069xxxxxx00fb0200004600006cxxxxxx00fb03000046000066000069xxxxxx00fb0400004600006600006cxxxxxx00fb05000053000074xxxxxx00fb06000053000074xxxxxx000587000535000582xxxxxx00fb13000544000576xxxxxx00fb14000544000565xxxxxx00fb1500054400056bxxxxxx00fb1600054e000576xxxxxx00fb1700054400056dxxxxxx0001490002bc00004exxxxxx000390000399000308000301xxxxxx0003b00003a5000308000301xxxxxx0001f000004a00030cxxxxxx001e96000048000331xxxxxx001e97000054000308xxxxxx001e9800005700030axxxxxx001e9900005900030axxxxxx001e9a0000410002bexxxxxx001f500003a5000313xxxxxx001f520003a5000313000300xxxxxx001f540003a5000313000301xxxxxx001f560003a5000313000342xxxxxx001fb6000391000342xxxxxx001fc6000397000342xxxxxx001fd2000399000308000300xxxxxx001fd3000399000308000301xxxxxx001fd6000399000342xxxxxx001fd7000399000308000342xxxxxx001fe20003a5000308000300xxxxxx001fe30003a5000308000301xxxxxx001fe40003a1000313xxxxxx001fe60003a5000342xxxxxx001fe70003a5000308000342xxxxxx001ff60003a9000342xxxxxx001f80001f88xxxxxx001f81001f89xxxxxx001f82001f8axxxxxx001f83001f8bxxxxxx001f84001f8cxxxxxx001f85001f8dxxxxxx001f86001f8exxxxxx001f87001f8fxxxxxx001f90001f98xxxxxx001f91001f99xxxxxx001f92001f9axxxxxx001f93001f9bxxxxxx001f94001f9cxxxxxx001f95001f9dxxxxxx001f96001f9exxxxxx001f97001f9fxxxxxx001fa0001fa8xxxxxx001fa1001fa9xxxxxx001fa2001faaxxxxxx001fa3001fabxxxxxx001fa4001facxxxxxx001fa5001fadxxxxxx001fa6001faexxxxxx001fa7001fafxxxxxx001fb3001fbcxxxxxx001fc3001fccxxxxxx001ff3001ffcxxxxxx001fb2001fba000345xxxxxx001fb4000386000345xxxxxx001fc2001fca000345xxxxxx001fc4000389000345xxxxxx001ff2001ffa000345xxxxxx001ff400038f000345xxxxxx001fb7000391000342000345xxxxxx001fc7000397000342000345xxxxxx001ff70003a9000342000345xxxxxx
--------------------------------------------------------------------------------
/cdata/special_uc_map:
--------------------------------------------------------------------------------
1 | 0000df000053000053xxxxxx00fb00000046000046xxxxxx00fb01000046000049xxxxxx00fb0200004600004cxxxxxx00fb03000046000046000049xxxxxx00fb0400004600004600004cxxxxxx00fb05000053000054xxxxxx00fb06000053000054xxxxxx000587000535000552xxxxxx00fb13000544000546xxxxxx00fb14000544000535xxxxxx00fb1500054400053bxxxxxx00fb1600054e000546xxxxxx00fb1700054400053dxxxxxx0001490002bc00004exxxxxx000390000399000308000301xxxxxx0003b00003a5000308000301xxxxxx0001f000004a00030cxxxxxx001e96000048000331xxxxxx001e97000054000308xxxxxx001e9800005700030axxxxxx001e9900005900030axxxxxx001e9a0000410002bexxxxxx001f500003a5000313xxxxxx001f520003a5000313000300xxxxxx001f540003a5000313000301xxxxxx001f560003a5000313000342xxxxxx001fb6000391000342xxxxxx001fc6000397000342xxxxxx001fd2000399000308000300xxxxxx001fd3000399000308000301xxxxxx001fd6000399000342xxxxxx001fd7000399000308000342xxxxxx001fe20003a5000308000300xxxxxx001fe30003a5000308000301xxxxxx001fe40003a1000313xxxxxx001fe60003a5000342xxxxxx001fe70003a5000308000342xxxxxx001ff60003a9000342xxxxxx001f80001f08000399xxxxxx001f81001f09000399xxxxxx001f82001f0a000399xxxxxx001f83001f0b000399xxxxxx001f84001f0c000399xxxxxx001f85001f0d000399xxxxxx001f86001f0e000399xxxxxx001f87001f0f000399xxxxxx001f88001f08000399xxxxxx001f89001f09000399xxxxxx001f8a001f0a000399xxxxxx001f8b001f0b000399xxxxxx001f8c001f0c000399xxxxxx001f8d001f0d000399xxxxxx001f8e001f0e000399xxxxxx001f8f001f0f000399xxxxxx001f90001f28000399xxxxxx001f91001f29000399xxxxxx001f92001f2a000399xxxxxx001f93001f2b000399xxxxxx001f94001f2c000399xxxxxx001f95001f2d000399xxxxxx001f96001f2e000399xxxxxx001f97001f2f000399xxxxxx001f98001f28000399xxxxxx001f99001f29000399xxxxxx001f9a001f2a000399xxxxxx001f9b001f2b000399xxxxxx001f9c001f2c000399xxxxxx001f9d001f2d000399xxxxxx001f9e001f2e000399xxxxxx001f9f001f2f000399xxxxxx001fa0001f68000399xxxxxx001fa1001f69000399xxxxxx001fa2001f6a000399xxxxxx001fa3001f6b000399xxxxxx001fa4001f6c000399xxxxxx001fa5001f6d000399xxxxxx001fa6001f6e000399xxxxxx001fa7001f6f000399xxxxxx001fa8001f68000399xxxxxx001fa9001f69000399xxxxxx001faa001f6a000399xxxxxx001fab001f6b000399xxxxxx001fac001f6c000399xxxxxx001fad001f6d000399xxxxxx001fae001f6e000399xxxxxx001faf001f6f000399xxxxxx001fb3000391000399xxxxxx001fbc000391000399xxxxxx001fc3000397000399xxxxxx001fcc000397000399xxxxxx001ff30003a9000399xxxxxx001ffc0003a9000399xxxxxx001fb2001fba000399xxxxxx001fb4000386000399xxxxxx001fc2001fca000399xxxxxx001fc4000389000399xxxxxx001ff2001ffa000399xxxxxx001ff400038f000399xxxxxx001fb7000391000342000399xxxxxx001fc7000397000342000399xxxxxx001ff70003a9000342000399xxxxxx
--------------------------------------------------------------------------------
/cdata/white_space_set:
--------------------------------------------------------------------------------
1 | 00000900000a00000b00000c00000d0000200000850000a000168000180e00200000200100200200200300200400200500200600200700200800200900200a00202800202900202f00205f003000
--------------------------------------------------------------------------------
/data/Jamo.txt:
--------------------------------------------------------------------------------
1 | # Jamo-6.2.0.txt
2 | # Date: 2012-05-15, 22:23:00 GMT [KW, LI]
3 | #
4 | # Unicode Character Database
5 | # Copyright (c) 1991-2012 Unicode, Inc.
6 | # For terms of use, see http://www.unicode.org/terms_of_use.html
7 | # For documentation, see http://www.unicode.org/reports/tr44/
8 | #
9 | # This file defines the Jamo_Short_Name property.
10 | #
11 | # See Section 3.12 of The Unicode Standard, Version 6.2
12 | # for more information.
13 | #
14 | # Each line contains two fields, separated by a semicolon.
15 | #
16 | # The first field gives the code point, in 4-digit hexadecimal
17 | # form, of a conjoining jamo character that participates in the
18 | # algorithmic determination of Hangul syllable character names.
19 | # The second field gives the Jamo_Short_Name as a one-, two-,
20 | # or three-character ASCII string (or in one case, for U+110B,
21 | # the null string).
22 | #
23 | # #############################################################
24 |
25 | 1100; G # HANGUL CHOSEONG KIYEOK
26 | 1101; GG # HANGUL CHOSEONG SSANGKIYEOK
27 | 1102; N # HANGUL CHOSEONG NIEUN
28 | 1103; D # HANGUL CHOSEONG TIKEUT
29 | 1104; DD # HANGUL CHOSEONG SSANGTIKEUT
30 | 1105; R # HANGUL CHOSEONG RIEUL
31 | 1106; M # HANGUL CHOSEONG MIEUM
32 | 1107; B # HANGUL CHOSEONG PIEUP
33 | 1108; BB # HANGUL CHOSEONG SSANGPIEUP
34 | 1109; S # HANGUL CHOSEONG SIOS
35 | 110A; SS # HANGUL CHOSEONG SSANGSIOS
36 | 110B; # HANGUL CHOSEONG IEUNG
37 | 110C; J # HANGUL CHOSEONG CIEUC
38 | 110D; JJ # HANGUL CHOSEONG SSANGCIEUC
39 | 110E; C # HANGUL CHOSEONG CHIEUCH
40 | 110F; K # HANGUL CHOSEONG KHIEUKH
41 | 1110; T # HANGUL CHOSEONG THIEUTH
42 | 1111; P # HANGUL CHOSEONG PHIEUPH
43 | 1112; H # HANGUL CHOSEONG HIEUH
44 | 1161; A # HANGUL JUNGSEONG A
45 | 1162; AE # HANGUL JUNGSEONG AE
46 | 1163; YA # HANGUL JUNGSEONG YA
47 | 1164; YAE # HANGUL JUNGSEONG YAE
48 | 1165; EO # HANGUL JUNGSEONG EO
49 | 1166; E # HANGUL JUNGSEONG E
50 | 1167; YEO # HANGUL JUNGSEONG YEO
51 | 1168; YE # HANGUL JUNGSEONG YE
52 | 1169; O # HANGUL JUNGSEONG O
53 | 116A; WA # HANGUL JUNGSEONG WA
54 | 116B; WAE # HANGUL JUNGSEONG WAE
55 | 116C; OE # HANGUL JUNGSEONG OE
56 | 116D; YO # HANGUL JUNGSEONG YO
57 | 116E; U # HANGUL JUNGSEONG U
58 | 116F; WEO # HANGUL JUNGSEONG WEO
59 | 1170; WE # HANGUL JUNGSEONG WE
60 | 1171; WI # HANGUL JUNGSEONG WI
61 | 1172; YU # HANGUL JUNGSEONG YU
62 | 1173; EU # HANGUL JUNGSEONG EU
63 | 1174; YI # HANGUL JUNGSEONG YI
64 | 1175; I # HANGUL JUNGSEONG I
65 | 11A8; G # HANGUL JONGSEONG KIYEOK
66 | 11A9; GG # HANGUL JONGSEONG SSANGKIYEOK
67 | 11AA; GS # HANGUL JONGSEONG KIYEOK-SIOS
68 | 11AB; N # HANGUL JONGSEONG NIEUN
69 | 11AC; NJ # HANGUL JONGSEONG NIEUN-CIEUC
70 | 11AD; NH # HANGUL JONGSEONG NIEUN-HIEUH
71 | 11AE; D # HANGUL JONGSEONG TIKEUT
72 | 11AF; L # HANGUL JONGSEONG RIEUL
73 | 11B0; LG # HANGUL JONGSEONG RIEUL-KIYEOK
74 | 11B1; LM # HANGUL JONGSEONG RIEUL-MIEUM
75 | 11B2; LB # HANGUL JONGSEONG RIEUL-PIEUP
76 | 11B3; LS # HANGUL JONGSEONG RIEUL-SIOS
77 | 11B4; LT # HANGUL JONGSEONG RIEUL-THIEUTH
78 | 11B5; LP # HANGUL JONGSEONG RIEUL-PHIEUPH
79 | 11B6; LH # HANGUL JONGSEONG RIEUL-HIEUH
80 | 11B7; M # HANGUL JONGSEONG MIEUM
81 | 11B8; B # HANGUL JONGSEONG PIEUP
82 | 11B9; BS # HANGUL JONGSEONG PIEUP-SIOS
83 | 11BA; S # HANGUL JONGSEONG SIOS
84 | 11BB; SS # HANGUL JONGSEONG SSANGSIOS
85 | 11BC; NG # HANGUL JONGSEONG IEUNG
86 | 11BD; J # HANGUL JONGSEONG CIEUC
87 | 11BE; C # HANGUL JONGSEONG CHIEUCH
88 | 11BF; K # HANGUL JONGSEONG KHIEUKH
89 | 11C0; T # HANGUL JONGSEONG THIEUTH
90 | 11C1; P # HANGUL JONGSEONG PHIEUPH
91 | 11C2; H # HANGUL JONGSEONG HIEUH
92 |
93 | # EOF
94 |
--------------------------------------------------------------------------------
/data/README.txt:
--------------------------------------------------------------------------------
1 | The data files in this directory were obtained from
2 | http://www.unicode.org/Public/6.2.0/ucd/ on 2012-09-30.
3 |
--------------------------------------------------------------------------------
/install.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "rbconfig"
4 | require "fileutils"
5 |
6 | sitelibdir = Config::CONFIG['sitelibdir']
7 | installdir = ARGV[1] || sitelibdir
8 |
9 | HELP = <unicode_utils/u assigns the UnicodeUtils
54 | # module to the toplevel U constant and loads all methods:
55 | #
56 | # $ irb -r unicode_utils/u
57 | # irb(main):001:0> U.grep /angstrom/
58 | # => [#]
59 | #
60 | # If a method takes a character as argument (usually named +char+),
61 | # that argument can be an integer or a string (in which case the
62 | # first code point counts) or any other object that responds to +ord+
63 | # by returning an integer.
64 | #
65 | # All methods are non-destructive, string return values are in the
66 | # same encoding as strings passed as arguments, which must be in one
67 | # of the Unicode encodings.
68 | #
69 | # Highlevel methods are:
70 | #
71 | # UnicodeUtils.upcase:: full conversion to uppercase
72 | # UnicodeUtils.downcase:: full conversion to lowercase
73 | # UnicodeUtils.titlecase:: full conversion to titlecase
74 | # UnicodeUtils.casefold:: case folding (case insensitive string comparison)
75 | # UnicodeUtils.nfd:: Normalization Form D
76 | # UnicodeUtils.nfc:: Normalization Form C
77 | # UnicodeUtils.nfkd:: Normalization Form KD
78 | # UnicodeUtils.nfkc:: Normalization Form KC
79 | # UnicodeUtils.each_grapheme:: grapheme boundaries
80 | # UnicodeUtils.each_word:: word boundaries
81 | # UnicodeUtils.char_name:: character names
82 | # UnicodeUtils.grep:: find code points by character name
83 | module UnicodeUtils
84 | end
85 |
--------------------------------------------------------------------------------
/lib/unicode_utils/canonical_decomposition.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/hangul_syllable_decomposition"
5 | require "unicode_utils/combining_class"
6 |
7 | module UnicodeUtils
8 |
9 | CANONICAL_DECOMPOSITION_MAP =
10 | Impl.read_multivalued_map("canonical_decomposition_map") # :nodoc:
11 |
12 | # Get the canonical decomposition of the given string, also called
13 | # Normalization Form D or short NFD.
14 | #
15 | # The Unicode standard has multiple representations for some
16 | # characters. One representation as a single code point and other
17 | # representation(s) as a combination of multiple code points. This
18 | # function "decomposes" these characters in +str+ into the latter
19 | # representation.
20 | #
21 | # Example:
22 | #
23 | # require "unicode_utils/canonical_decomposition"
24 | # # LATIN SMALL LETTER A WITH ACUTE => LATIN SMALL LETTER A, COMBINING ACUTE ACCENT
25 | # UnicodeUtils.canonical_decomposition("\u{E1}") => "\u{61}\u{301}"
26 | #
27 | # See also: UnicodeUtils.nfd
28 | def canonical_decomposition(str)
29 | res = String.new.force_encoding(str.encoding)
30 | str.each_codepoint { |cp|
31 | if cp >= 0xAC00 && cp <= 0xD7A3 # hangul syllable
32 | Impl.append_hangul_syllable_decomposition(res, cp)
33 | else
34 | mapping = CANONICAL_DECOMPOSITION_MAP[cp]
35 | if mapping
36 | Impl.append_recursive_canonical_decomposition_mapping(res, mapping)
37 | else
38 | res << cp
39 | end
40 | end
41 | }
42 | Impl.put_into_canonical_order(res)
43 | end
44 | module_function :canonical_decomposition
45 |
46 | module Impl # :nodoc:
47 |
48 | def self.append_recursive_canonical_decomposition_mapping(str, mapping)
49 | mapping.each { |cp|
50 | mapping_ = CANONICAL_DECOMPOSITION_MAP[cp]
51 | if mapping_
52 | append_recursive_canonical_decomposition_mapping(str, mapping_)
53 | else
54 | str << cp
55 | end
56 | }
57 | end
58 |
59 | def self.put_into_canonical_order(str)
60 | reorder_needed = false
61 | last_cp = nil
62 | last_cc = nil
63 | str.each_codepoint { |cp|
64 | cc = COMBINING_CLASS_MAP[cp]
65 | if last_cp && cc != 0 && last_cc > cc
66 | reorder_needed = true
67 | break
68 | end
69 | last_cp = cp
70 | last_cc = cc
71 | }
72 | return str unless reorder_needed
73 | res = String.new.force_encoding(str.encoding)
74 | last_cp = nil
75 | last_cc = nil
76 | str.each_codepoint { |cp|
77 | cc = COMBINING_CLASS_MAP[cp]
78 | if last_cp
79 | if cc != 0 && last_cc > cc
80 | res << cp
81 | cp = nil
82 | cc = nil
83 | end
84 | res << last_cp
85 | end
86 | last_cp = cp
87 | last_cc = cc
88 | }
89 | res << last_cp if last_cp
90 | put_into_canonical_order(res)
91 | end
92 |
93 | end
94 |
95 | end
96 |
--------------------------------------------------------------------------------
/lib/unicode_utils/canonical_equivalents_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/canonical_decomposition"
4 |
5 | module UnicodeUtils
6 |
7 | # The strings +a+ and +b+ are canonical equivalents if their
8 | # canonical decompositions are equal.
9 | #
10 | # Example:
11 | #
12 | # require "unicode_utils/canonical_equivalents_q"
13 | # UnicodeUtils.canonical_equivalents?("Äste", "A\u{308}ste") => true
14 | # UnicodeUtils.canonical_equivalents?("Äste", "Aste") => false
15 | def canonical_equivalents?(a, b)
16 | UnicodeUtils.canonical_decomposition(a) ==
17 | UnicodeUtils.canonical_decomposition(b)
18 | end
19 | module_function :canonical_equivalents?
20 |
21 | end
22 |
--------------------------------------------------------------------------------
/lib/unicode_utils/case_ignorable_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | CASE_IGNORABLE_SET = Impl.read_code_point_set("case_ignorable_set") # :nodoc:
8 |
9 | # Returns true if the given character is case-ignorable as defined
10 | # by Unicode 5.0, section 3.13.
11 | def case_ignorable_char?(char)
12 | CASE_IGNORABLE_SET.include?(char.ord)
13 | end
14 | module_function :case_ignorable_char?
15 |
16 | end
17 |
--------------------------------------------------------------------------------
/lib/unicode_utils/cased_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/lowercase_char_q"
4 | require "unicode_utils/uppercase_char_q"
5 | require "unicode_utils/titlecase_char_q"
6 |
7 | module UnicodeUtils
8 |
9 | # A cased char is a character that has the Unicode property
10 | # Lowercase or Uppercase or the general category Titlecase_Letter.
11 | #
12 | # See also: lowercase_char?, uppercase_char?, titlecase_char?
13 | def cased_char?(char)
14 | lowercase_char?(char) || uppercase_char?(char) || titlecase_char?(char)
15 | end
16 | module_function :cased_char?
17 |
18 | end
19 |
--------------------------------------------------------------------------------
/lib/unicode_utils/casefold.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/simple_casefold"
5 |
6 | module UnicodeUtils
7 |
8 | CASEFOLD_F_MAP = Impl.read_multivalued_map("casefold_f_map") # :nodoc:
9 |
10 | # Perform full case folding. The returned string may be longer than
11 | # +str+. The purpose of case folding is case insensitive string
12 | # comparison.
13 | #
14 | # Examples:
15 | #
16 | # require "unicode_utils/casefold"
17 | # UnicodeUtils.casefold("Ümit") == UnicodeUtils.casefold("ümit") => true
18 | # UnicodeUtils.casefold("WEISS") == UnicodeUtils.casefold("weiß") => true
19 | def casefold(str)
20 | String.new.force_encoding(str.encoding).tap do |res|
21 | str.each_codepoint { |cp|
22 | if mapping = CASEFOLD_C_MAP[cp]
23 | res << mapping
24 | elsif mapping = CASEFOLD_F_MAP[cp]
25 | mapping.each { |m| res << m }
26 | else
27 | res << cp
28 | end
29 | }
30 | end
31 | end
32 | module_function :casefold
33 |
34 | end
35 |
--------------------------------------------------------------------------------
/lib/unicode_utils/char_display_width.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/display_width"
4 |
5 | module UnicodeUtils
6 |
7 | # Get the width of +char+ when displayed with a fixed pitch font.
8 | #
9 | # Some code points (especially from east asian scripts) take the
10 | # width of two characters, while others have no width.
11 | #
12 | # Examples:
13 | #
14 | # require "unicode_utils/char_display_width"
15 | # UnicodeUtils.char_display_width("別") # => 2
16 | # UnicodeUtils.char_display_width(0x308) # => 0
17 | # UnicodeUtils.char_display_width("a") # => 1
18 | #
19 | # Performs the same logic as UnicodeUtils.display_width, but for a
20 | # single code point.
21 | def char_display_width(char)
22 | cp = char.ord
23 | # copied from display_width, keep in sync!
24 | case UnicodeUtils.east_asian_width(cp)
25 | when :Wide, :Fullwidth then 2
26 | else GENERAL_CATEGORY_BASIC_WIDTH_MAP[UnicodeUtils.gc(cp)]
27 | end
28 | end
29 | module_function :char_display_width
30 |
31 | end
32 |
--------------------------------------------------------------------------------
/lib/unicode_utils/char_name.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/hangul_syllable_decomposition"
5 | require "unicode_utils/jamo_short_name"
6 |
7 | module UnicodeUtils
8 |
9 | NAME_MAP = Impl.read_names("names") # :nodoc:
10 |
11 | # Get the normative Unicode name of the given character.
12 | #
13 | # Private Use code points have no name, this function returns nil for
14 | # such code points.
15 | #
16 | # All control characters have the special name "". All
17 | # other characters have a unique name.
18 | #
19 | # Example:
20 | #
21 | # require "unicode_utils/char_name"
22 | # UnicodeUtils.char_name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
23 | # UnicodeUtils.char_name "\t" => ""
24 | #
25 | # Note that this method deviates from the Unicode Name property in two
26 | # points:
27 | #
28 | # 1. It returns "" for control codes, the Unicode Name property for
29 | # these code points is an empty string
30 | # 2. It returns nil for other non-graphic, non-format code points, the
31 | # Unicode Name property for these code points is an empty string
32 | #
33 | # See also: UnicodeUtils.sid
34 | def char_name(char)
35 | # TODO: improve with code point labels, see section 4.8 in Unicode 6.0.0
36 | if char.kind_of?(Integer)
37 | cp = char
38 | str = nil
39 | else
40 | cp = char.ord
41 | str = char
42 | end
43 | NAME_MAP[cp] ||
44 | case cp
45 | when 0x3400..0x4DB5, 0x4E00..0x9FCC, 0x20000..0x2A6D6, 0x2A700..0x2B734, 0x2B740..0x2B81D
46 | "CJK UNIFIED IDEOGRAPH-#{sprintf('%04X', cp)}"
47 | when 0xAC00..0xD7A3
48 | str ||= cp.chr(Encoding::UTF_8)
49 | "HANGUL SYLLABLE ".tap do |n|
50 | hangul_syllable_decomposition(str).each_char { |c|
51 | n << (jamo_short_name(c) || '')
52 | }
53 | end
54 | end
55 | end
56 | module_function :char_name
57 |
58 | end
59 |
--------------------------------------------------------------------------------
/lib/unicode_utils/char_type.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/gc"
4 | require "unicode_utils/general_category"
5 |
6 | module UnicodeUtils
7 |
8 | GENERAL_CATEGORY_TYPE_MAP = Hash.new.tap { |map|
9 | GENERAL_CATEGORY_ALIAS_MAP.each_pair { |short, long|
10 | if short.length == 2
11 | map[short] = GENERAL_CATEGORY_ALIAS_MAP[short[0].to_sym]
12 | end
13 | }
14 | } # :nodoc:
15 |
16 | # Get the long major general category alias of char.
17 | #
18 | # Example:
19 | #
20 | # require "unicode_utils/char_type"
21 | # UnicodeUtils.char_type("1") # => :Number
22 | #
23 | # Always returns a symbol when char is in the Unicode code point
24 | # range.
25 | #
26 | # See also: UnicodeUtils.general_category
27 | def char_type(char)
28 | GENERAL_CATEGORY_TYPE_MAP[UnicodeUtils.gc(char)]
29 | end
30 | module_function :char_type
31 |
32 | end
33 |
--------------------------------------------------------------------------------
/lib/unicode_utils/code_point_type.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/gc"
4 |
5 | module UnicodeUtils
6 |
7 | GENERAL_CATEGORY_CODE_POINT_TYPE = {
8 | Lu: :Graphic, Ll: :Graphic, Lt: :Graphic, Lm: :Graphic, Lo: :Graphic,
9 | Mn: :Graphic, Mc: :Graphic, Me: :Graphic,
10 | Nd: :Graphic, Nl: :Graphic, No: :Graphic,
11 | Pc: :Graphic, Pd: :Graphic, Ps: :Graphic,
12 | Pe: :Graphic, Pi: :Graphic, Pf: :Graphic, Po: :Graphic,
13 | Sm: :Graphic, Sc: :Graphic, Sk: :Graphic, So: :Graphic,
14 | Zs: :Graphic, Zl: :Format, Zp: :Format,
15 | Cc: :Control, Cf: :Format, Cs: :Surrogate, Co: :Private_Use,
16 | # Cn is splitted into two types (Reserved and Noncharacter)!
17 | Cn: false
18 | } # :nodoc:
19 |
20 | CN_CODE_POINT_TYPE = Hash.new.tap { |h|
21 | h.default = :Reserved
22 | # Sixty-six code points are noncharacters
23 | ary = (0xFDD0..0xFDEF).to_a
24 | 0.upto(16) { |d|
25 | ary << "#{d.to_s(16)}FFFE".to_i(16)
26 | ary << "#{d.to_s(16)}FFFF".to_i(16)
27 | }
28 | ary.each { |cp| h[cp] = :Noncharacter }
29 | raise "assertion error #{h.size}" unless h.size == 66
30 | } # :nodoc:
31 |
32 | # Get the code point type of the given +integer+ (must be instance
33 | # of Integer) as defined by the Unicode standard.
34 | #
35 | # If +integer+ is a code point (anything in
36 | # UnicodeUtils::Codepoint::RANGE), returns one of the following
37 | # symbols:
38 | #
39 | # :Graphic
40 | # :Format
41 | # :Control
42 | # :Private_Use
43 | # :Surrogate
44 | # :Noncharacter
45 | # :Reserved
46 | #
47 | # For an exact meaning of these values, read the sections
48 | # "Conformance/Characters and Encoding" and "General
49 | # Structure/Types of Codepoints" in the Unicode standard.
50 | #
51 | # Following is a paraphrased excerpt:
52 | #
53 | # +Surrogate+, +Noncharacter+ and +Reserved+ code points are not
54 | # assigned to an _abstract character_. All other code points are
55 | # assigned to an abstract character.
56 | #
57 | # +Reserved+ code points are also called _Undesignated_ code points,
58 | # all others are _Designated_ code points.
59 | #
60 | # Returns nil if +integer+ is not a code point.
61 | def code_point_type(integer)
62 | cpt = GENERAL_CATEGORY_CODE_POINT_TYPE[UnicodeUtils.gc(integer)]
63 | if false == cpt
64 | cpt = CN_CODE_POINT_TYPE[integer]
65 | end
66 | cpt
67 | end
68 | module_function :code_point_type
69 |
70 | end
71 |
--------------------------------------------------------------------------------
/lib/unicode_utils/codepoint.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/char_name"
4 |
5 | module UnicodeUtils
6 |
7 | # A Codepoint instance represents a single Unicode code point.
8 | #
9 | # UnicodeUtils::Codepoint.new(0x20ac) => #
10 | class Codepoint
11 |
12 | # The Unicode codespace. Any integer in this range is a Unicode
13 | # code point.
14 | RANGE = 0..0x10FFFF
15 |
16 | # Create a Codepoint instance that wraps the given Integer. +int+
17 | # must be in Codepoint::RANGE.
18 | def initialize(int)
19 | unless RANGE.include?(int)
20 | raise ArgumentError, "#{int} not in codespace"
21 | end
22 | @int = int
23 | end
24 |
25 | # Convert to Integer.
26 | def ord
27 | @int
28 | end
29 |
30 | # Format in U+ notation.
31 | #
32 | # Codepoint.new(0xc5).uplus => "U+00C5"
33 | def uplus
34 | sprintf('U+%04X', @int)
35 | end
36 |
37 | # Get the normative Unicode name of this code point.
38 | #
39 | # See also: UnicodeUtils.char_name
40 | def name
41 | UnicodeUtils.char_name(@int)
42 | end
43 |
44 | # Convert this code point to an UTF-8 encoded string. Returns a new
45 | # string on each call and thus it is allowed to mutate the return
46 | # value.
47 | def to_s
48 | @int.chr(Encoding::UTF_8)
49 | end
50 |
51 | # Get the bytes used to encode this code point in UTF-8,
52 | # hex-formatted.
53 | #
54 | # Codepoint.new(0xe4).hexbytes => "c3,a4"
55 | def hexbytes
56 | to_s.bytes.map { |b| sprintf("%02x", b) }.join(",")
57 | end
58 |
59 | # #
60 | def inspect
61 | "#<#{uplus} #{to_s.inspect} #{name || "nil"} utf8:#{hexbytes}>"
62 | end
63 |
64 | end
65 |
66 | end
67 |
--------------------------------------------------------------------------------
/lib/unicode_utils/combining_class.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | COMBINING_CLASS_MAP = Impl.read_combining_class_map() # :nodoc:
8 | COMBINING_CLASS_MAP.default = 0
9 |
10 | # Get the combining class of the given character as an integer in
11 | # the range 0..255.
12 | def combining_class(char)
13 | COMBINING_CLASS_MAP[char.ord]
14 | end
15 | module_function :combining_class
16 |
17 | end
18 |
--------------------------------------------------------------------------------
/lib/unicode_utils/compatibility_decomposition.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/canonical_decomposition"
5 | require "unicode_utils/hangul_syllable_decomposition"
6 |
7 | module UnicodeUtils
8 |
9 | COMPATIBILITY_DECOMPOSITION_MAP =
10 | Impl.read_multivalued_map("compatibility_decomposition_map") # :nodoc:
11 |
12 | # Get the compatibility decomposition of the given string, also
13 | # called Normalization Form KD or short NFKD.
14 | #
15 | # Compatibility decomposition decomposes more code points than
16 | # canonical decomposition and contrary to Normalization Form D and
17 | # C, this normalization can alter how a string is displayed.
18 | #
19 | # Example:
20 | #
21 | # require "unicode_utils/compatibility_decomposition"
22 | # # LATIN SMALL LIGATURE FI => LATIN SMALL LETTER F, LATIN SMALL LETTER I
23 | # UnicodeUtils.compatibility_decomposition("fi") => "fi"
24 | #
25 | # See also: UnicodeUtils.nfkd
26 | def compatibility_decomposition(str)
27 | res = String.new.force_encoding(str.encoding)
28 | str.each_codepoint { |cp|
29 | if cp >= 0xAC00 && cp <= 0xD7A3 # hangul syllable
30 | Impl.append_hangul_syllable_decomposition(res, cp)
31 | else
32 | Impl.append_recursive_compatibility_decomposition_mapping(res, cp)
33 | end
34 | }
35 | Impl.put_into_canonical_order(res)
36 | end
37 | module_function :compatibility_decomposition
38 |
39 | module Impl # :nodoc:
40 |
41 | def self.append_recursive_compatibility_decomposition_mapping(str, cp)
42 | mapping = COMPATIBILITY_DECOMPOSITION_MAP[cp]
43 | mapping ||= CANONICAL_DECOMPOSITION_MAP[cp]
44 | if mapping
45 | mapping.each { |c|
46 | append_recursive_compatibility_decomposition_mapping(str, c)
47 | }
48 | else
49 | str << cp
50 | end
51 | end
52 |
53 | end
54 |
55 | end
56 |
--------------------------------------------------------------------------------
/lib/unicode_utils/conditional_casing.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/cased_char_q"
4 | require "unicode_utils/case_ignorable_char_q"
5 | require "unicode_utils/soft_dotted_char_q"
6 | require "unicode_utils/combining_class"
7 | require "unicode_utils/read_cdata"
8 |
9 | module UnicodeUtils
10 |
11 | module Impl # :nodoc:all
12 |
13 | LANGS_WITH_RULES = {:tr => true, :lt => true, :az => true}
14 |
15 | class ConditionalCasing
16 |
17 | attr_reader :mapping
18 |
19 | def initialize(mapping)
20 | @mapping = mapping
21 | end
22 |
23 | def context_match?(str, pos)
24 | true
25 | end
26 |
27 | end
28 |
29 | class BeforeDotConditionalCasing < ConditionalCasing
30 |
31 | def context_match?(str, pos)
32 | (pos + 1).upto(str.length - 1) { |i|
33 | c = str[i]
34 | return true if c.ord == 0x0307
35 | cc = UnicodeUtils.combining_class(c)
36 | return false if cc == 0 || cc == 230
37 | }
38 | false # "combining dot above" not found
39 | end
40 |
41 | end
42 |
43 | class NotBeforeDotConditionalCasing < BeforeDotConditionalCasing
44 |
45 | def context_match?(str, pos)
46 | !super
47 | end
48 |
49 | end
50 |
51 | class MoreAboveConditionalCasing < ConditionalCasing
52 |
53 | def context_match?(str, pos)
54 | (pos + 1).upto(str.length - 1) { |i|
55 | c = str[i]
56 | cc = UnicodeUtils.combining_class(c)
57 | return true if cc == 230
58 | return false if cc == 0
59 | }
60 | false
61 | end
62 |
63 | end
64 |
65 | class AfterIConditionalCasing < ConditionalCasing
66 |
67 | def context_match?(str, pos)
68 | (pos - 1).downto(0) { |i|
69 | c = str[i]
70 | return true if c.ord == 0x49 # uppercase I
71 | cc = UnicodeUtils.combining_class(c)
72 | return false if cc == 0 || cc == 230
73 | }
74 | false # uppercase I not found
75 | end
76 |
77 | end
78 |
79 | class AfterSoftDottedConditionalCasing < ConditionalCasing
80 |
81 | def context_match?(str, pos)
82 | (pos - 1).downto(0) { |i|
83 | c = str[i]
84 | return true if UnicodeUtils.soft_dotted_char?(c)
85 | cc = UnicodeUtils.combining_class(c)
86 | return false if cc == 0 || cc == 230
87 | }
88 | false
89 | end
90 |
91 | end
92 |
93 | class FinalSigmaConditionalCasing < ConditionalCasing
94 |
95 | def context_match?(str, pos)
96 | before_match?(str, pos) && !after_match?(str, pos)
97 | end
98 |
99 | private
100 |
101 | def before_match?(str, pos)
102 | (pos - 1).downto(0) { |i|
103 | c = str[i]
104 | return true if UnicodeUtils.cased_char?(c)
105 | return false unless UnicodeUtils.case_ignorable_char?(c)
106 | }
107 | false # no cased char
108 | end
109 |
110 | def after_match?(str, pos)
111 | (pos + 1).upto(str.length - 1) { |i|
112 | c = str[i]
113 | return true if UnicodeUtils.cased_char?(c)
114 | return false unless UnicodeUtils.case_ignorable_char?(c)
115 | }
116 | false
117 | end
118 |
119 | end
120 |
121 | CONDITIONAL_UPCASE_MAP =
122 | read_conditional_casings("cond_uc_map")
123 |
124 | CONDITIONAL_DOWNCASE_MAP =
125 | read_conditional_casings("cond_lc_map")
126 |
127 | CONDITIONAL_TITLECASE_MAP =
128 | read_conditional_casings("cond_tc_map")
129 |
130 | def self.conditional_upcase_mapping(cp, str, pos, language_id)
131 | lang_map = CONDITIONAL_UPCASE_MAP[cp]
132 | if lang_map
133 | casing = lang_map[language_id] || lang_map[nil]
134 | if casing && casing.context_match?(str, pos)
135 | casing.mapping
136 | end
137 | end
138 | end
139 |
140 | def self.conditional_downcase_mapping(cp, str, pos, language_id)
141 | lang_map = CONDITIONAL_DOWNCASE_MAP[cp]
142 | if lang_map
143 | casing = lang_map[language_id] || lang_map[nil]
144 | if casing && casing.context_match?(str, pos)
145 | casing.mapping
146 | end
147 | end
148 | end
149 |
150 | def self.conditional_titlecase_mapping(cp, str, pos, language_id)
151 | lang_map = CONDITIONAL_TITLECASE_MAP[cp]
152 | if lang_map
153 | casing = lang_map[language_id] || lang_map[nil]
154 | if casing && casing.context_match?(str, pos)
155 | casing.mapping
156 | end
157 | end
158 | end
159 |
160 | end
161 |
162 | end
163 |
--------------------------------------------------------------------------------
/lib/unicode_utils/debug.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/display_width"
4 | require "unicode_utils/graphic_char_q"
5 | require "unicode_utils/char_display_width"
6 | require "unicode_utils/sid"
7 | require "unicode_utils/general_category"
8 |
9 | module UnicodeUtils
10 |
11 | # Print a table with detailed information about each code point in
12 | # +str+. +opts+ can have the following keys:
13 | #
14 | # +:io+:: An IO compatible object. Receives the output.
15 | # Defaults to $stdout.
16 | #
17 | # +str+ may also be an Integer, in which case it is interpreted as a
18 | # single code point that must be in UnicodeUtils::Codepoint::RANGE.
19 | #
20 | # Examples:
21 | #
22 | # $ ruby -r unicode_utils/u -e 'U.debug "良い一日"'
23 | # Char | Ordinal | Sid | General Category | UTF-8
24 | # ------+---------+----------------------------+------------------+----------
25 | # "良" | 826F | CJK UNIFIED IDEOGRAPH-826F | Other_Letter | E8 89 AF
26 | # "い" | 3044 | HIRAGANA LETTER I | Other_Letter | E3 81 84
27 | # "一" | 4E00 | CJK UNIFIED IDEOGRAPH-4E00 | Other_Letter | E4 B8 80
28 | # "日" | 65E5 | CJK UNIFIED IDEOGRAPH-65E5 | Other_Letter | E6 97 A5
29 | #
30 | # $ ruby -r unicode_utils/u -e 'U.debug 0xd800'
31 | # Char | Ordinal | Sid | General Category | UTF-8
32 | # ------+---------+------------------+------------------+-------
33 | # N/A | D800 | | Surrogate | N/A
34 | #
35 | # The output is purely informal and may change even in minor
36 | # releases.
37 | def debug(str, opts = {})
38 | io = opts[:io] || $stdout
39 | table = [Impl::DEBUG_COLUMNS.keys]
40 | if str.kind_of?(Integer)
41 | table << Impl::DEBUG_COLUMNS.values.map { |f| f.call(str) }
42 | else
43 | str.each_codepoint { |cp|
44 | table << Impl::DEBUG_COLUMNS.values.map { |f| f.call(cp) }
45 | }
46 | end
47 | Impl.print_table(table, io)
48 | nil
49 | end
50 | module_function :debug
51 |
52 | module Impl # :nodoc:all
53 |
54 | DEBUG_COLUMNS = {
55 | "Char" => -> cp {
56 | case cp
57 | when 0x07 then '"\a"'
58 | when 0x08 then '"\b"'
59 | when 0x09 then '"\t"'
60 | when 0x0A then '"\n"'
61 | when 0x0D then '"\r"'
62 | else
63 | if UnicodeUtils.graphic_char?(cp) &&
64 | UnicodeUtils.char_display_width(cp) > 0
65 | '"' + cp.chr(Encoding::UTF_8) + '"'
66 | else
67 | "N/A"
68 | end
69 | end
70 | },
71 | "Ordinal" => -> cp {
72 | cp.to_s(16).upcase.rjust(7)
73 | },
74 | "Sid" => -> cp {
75 | UnicodeUtils.sid(cp)
76 | },
77 | "General Category" => -> cp {
78 | UnicodeUtils.general_category(cp).to_s
79 | },
80 | "UTF-8" => -> cp {
81 | begin
82 | cp.chr(Encoding::UTF_8).bytes.map { |b| sprintf("%02X", b) }.join(" ")
83 | rescue RangeError # surrogate code points are not valid in utf-8
84 | "N/A"
85 | end
86 | }
87 | }
88 |
89 | def self.column_widths(table)
90 | Array.new.tap { |column_widths|
91 | table.each_with_index { |row|
92 | row.each_with_index { |txt, col_i|
93 | dw = UnicodeUtils.display_width(txt)
94 | cw = column_widths[col_i]
95 | column_widths[col_i] = dw if cw.nil? || cw < dw
96 | }
97 | }
98 | }
99 | end
100 |
101 | def self.print_row(row, column_widths, io)
102 | row.each_with_index { |txt, col_i|
103 | io.print(" ")
104 | io.print(txt)
105 | if col_i != row.length - 1
106 | dw = UnicodeUtils.display_width(txt)
107 | d = column_widths[col_i] - dw
108 | io.print(" " * (d + 1))
109 | io.print("|")
110 | end
111 | }
112 | io.puts
113 | end
114 |
115 | def self.print_separator_row(column_widths, io)
116 | column_widths.each_with_index { |cw, col_i|
117 | io.print("-" * (cw + 2))
118 | if col_i != column_widths.length - 1
119 | io.print("+")
120 | end
121 | }
122 | io.puts
123 | end
124 |
125 | def self.print_table(table, io)
126 | cws = column_widths(table)
127 | print_row(table[0], cws, io)
128 | print_separator_row(cws, io)
129 | table[1..-1].each { |row|
130 | print_row(row, cws, io)
131 | }
132 | io.flush
133 | end
134 |
135 | end
136 |
137 | end
138 |
--------------------------------------------------------------------------------
/lib/unicode_utils/default_ignorable_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | PROP_DEFAULT_IGNORABLE_SET =
8 | Impl.read_code_point_set("prop_set_default_ignorable") # :nodoc:
9 |
10 | # True if the given character has the Unicode property
11 | # Default_Ingorable_Code_Point (see section 5.3 in Unicode 6.0.0).
12 | #
13 | # When a system (e.g. font) can't display a default ignorable
14 | # code point, it is allowed to simply ignore, i.e. skip it (as
15 | # opposed to other characters, which must at least be displayed with
16 | # a replacement character).
17 | def default_ignorable_char?(char)
18 | PROP_DEFAULT_IGNORABLE_SET.include?(char.ord)
19 | end
20 | module_function :default_ignorable_char?
21 |
22 | end
23 |
--------------------------------------------------------------------------------
/lib/unicode_utils/display_width.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/east_asian_width"
4 | require "unicode_utils/gc"
5 | require "unicode_utils/graphic_char_q"
6 |
7 | module UnicodeUtils
8 |
9 | GENERAL_CATEGORY_BASIC_WIDTH_MAP = Hash.new.tap do |h|
10 | GENERAL_CATEGORY_IS_GRAPHIC_MAP.each_pair { |key, value|
11 | if value && key != :Mn && key != :Me
12 | h[key] = 1
13 | else
14 | h[key] = 0
15 | end
16 | }
17 | end # :nodoc:
18 |
19 | # Get the width of +str+ when displayed with a fixed pitch font.
20 | #
21 | # Counts code points, where code points with an east asian width of
22 | # +Wide+ or +Fullwidth+ count for two, non-graphic code points (e.g.
23 | # control characters, including newline!) and non-spacing marks
24 | # count for zero and all others count for one.
25 | #
26 | # Examples:
27 | #
28 | # require "unicode_utils/display_width"
29 | # "別れ".length => 2
30 | # UnicodeUtils.display_width("別れ") => 4
31 | # "12".length => 2
32 | # UnicodeUtils.display_width("12") => 2
33 | # "a\u{308}".length => 2
34 | # UnicodeUtils.display_width("a\u{308}") => 1
35 | #
36 | # Unicode assigns some reserved code points an east asian width of
37 | # +Wide+. Some systems correctly display a double width replacement
38 | # character, others not.
39 | #
40 | # See also: UnicodeUtils.graphic_char?, UnicodeUtils.east_asian_width
41 | def display_width(str)
42 | str.each_codepoint.reduce(0) { |sum, cp|
43 | sum +
44 | case UnicodeUtils.east_asian_width(cp)
45 | when :Wide, :Fullwidth then 2
46 | else GENERAL_CATEGORY_BASIC_WIDTH_MAP[UnicodeUtils.gc(cp)]
47 | end
48 | }
49 | end
50 | module_function :display_width
51 |
52 | end
53 |
--------------------------------------------------------------------------------
/lib/unicode_utils/downcase.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/simple_downcase"
5 | require "unicode_utils/conditional_casing"
6 |
7 | module UnicodeUtils
8 |
9 | SPECIAL_DOWNCASE_MAP = Impl.read_multivalued_map("special_lc_map") # :nodoc:
10 |
11 | # Perform a full case-conversion of +str+ to lowercase according to
12 | # the Unicode standard.
13 | #
14 | # Some conversion rules are language dependent, these are in effect
15 | # when a non-nil +language_id+ is given. If non-nil, the
16 | # +language_id+ must be a two letter language code as defined in BCP
17 | # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
18 | # language doesn't have a two letter code, the three letter code is
19 | # to be used. If locale independent behaviour is required, +nil+
20 | # should be passed explicitely, because a later version of
21 | # UnicodeUtils may default to something else.
22 | #
23 | # Examples:
24 | #
25 | # require "unicode_utils/downcase"
26 | # UnicodeUtils.downcase("ᾈ") => "ᾀ"
27 | # UnicodeUtils.downcase("aBI\u{307}", :tr) => "abi"
28 | def downcase(str, language_id = nil)
29 | String.new.force_encoding(str.encoding).tap { |res|
30 | if Impl::LANGS_WITH_RULES.include?(language_id)
31 | # ensure O(1) lookup by index
32 | str = str.encode(Encoding::UTF_32LE)
33 | end
34 | pos = 0
35 | str.each_codepoint { |cp|
36 | special_mapping =
37 | Impl.conditional_downcase_mapping(cp, str, pos, language_id) ||
38 | SPECIAL_DOWNCASE_MAP[cp]
39 | if special_mapping
40 | special_mapping.each { |m| res << m }
41 | else
42 | res << (SIMPLE_DOWNCASE_MAP[cp] || cp)
43 | end
44 | pos += 1
45 | }
46 | }
47 | end
48 | module_function :downcase
49 |
50 | end
51 |
--------------------------------------------------------------------------------
/lib/unicode_utils/each_grapheme.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | # Maps code points to integer codes. For the integer code to property
8 | # mapping, see #compile_grapheme_break_property in data/compile.rb.
9 | GRAPHEME_CLUSTER_BREAK_MAP =
10 | Impl.read_hexdigit_map("grapheme_break_property") # :nodoc:
11 |
12 | # Iterate over the grapheme clusters that make up +str+. A grapheme
13 | # cluster is a user perceived character (the basic unit of a writing
14 | # system for a language) and consists of one or more code points.
15 | #
16 | # This method uses the default Unicode algorithm for extended
17 | # grapheme clusters.
18 | #
19 | # Returns an enumerator if no block is given.
20 | #
21 | # Examples:
22 | #
23 | # require "unicode_utils/each_grapheme"
24 | # UnicodeUtils.each_grapheme("a\r\nb") { |g| p g }
25 | #
26 | # prints:
27 | #
28 | # "a"
29 | # "\r\n"
30 | # "b"
31 | #
32 | # and
33 | #
34 | # UnicodeUtils.each_grapheme("a\r\nb").count => 3
35 | def each_grapheme(str)
36 | return enum_for(__method__, str) unless block_given?
37 | c0 = nil
38 | c0_prop = nil
39 | grapheme = String.new.force_encoding(str.encoding)
40 | str.each_codepoint { |c|
41 | gbreak = false
42 | c_prop = GRAPHEME_CLUSTER_BREAK_MAP[c]
43 |
44 | ### rules ###
45 | if c0_prop == 0x0 && c_prop == 0x1
46 | # don't break CR LF
47 | elsif c0_prop == 0x0 || c0_prop == 0x1 || c0_prop == 0x2
48 | # break after controls
49 | gbreak = true
50 | elsif c_prop == 0x0 || c_prop == 0x1 || c_prop == 0x2
51 | # break before controls
52 | gbreak = true
53 | elsif c0_prop == 0x6 && (c_prop == 0x6 || c_prop == 0x7 ||
54 | c_prop == 0x9 || c_prop == 0xA)
55 | # don't break hangul syllable
56 | elsif (c0_prop == 0x9 || c0_prop == 0x7) &&
57 | (c_prop == 0x7 || c_prop == 0x8)
58 | # don't break hangul syllable
59 | elsif (c0_prop == 0xA || c0_prop == 0x8) && c_prop == 0x8
60 | # don't break hangul syllable
61 | elsif c0_prop == 0xB && c_prop == 0xB
62 | # don't break between regional indicator symbols
63 | elsif c_prop == 0x3
64 | # don't break before extending characters
65 | elsif c_prop == 0x5
66 | # don't break before SpacingMarks
67 | elsif c0_prop == 0x4
68 | # don't break after Prepend characters
69 | else
70 | # break everywhere
71 | gbreak = true
72 | end
73 | #############
74 |
75 | if gbreak && !grapheme.empty?
76 | yield grapheme
77 | grapheme = String.new.force_encoding(str.encoding)
78 | end
79 | grapheme << c
80 | c0 = c
81 | c0_prop = c_prop
82 | }
83 | yield grapheme unless grapheme.empty?
84 | end
85 | module_function :each_grapheme
86 |
87 | end
88 |
--------------------------------------------------------------------------------
/lib/unicode_utils/each_word.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | # Maps code points to integer codes. For the integer code to property
8 | # mapping, see #compile_word_break_property in data/compile.rb.
9 | WORD_BREAK_MAP =
10 | Impl.read_hexdigit_map("word_break_property") # :nodoc:
11 |
12 | # Split +str+ along word boundaries according to Unicode's Default
13 | # Word Boundary Specification, calling the given block with each
14 | # word. Returns +str+, or an enumerator if no block is given.
15 | #
16 | # Example:
17 | #
18 | # require "unicode_utils/each_word"
19 | # UnicodeUtils.each_word("Hello, world!").to_a => ["Hello", ",", " ", "world", "!"]
20 | def each_word(str)
21 | return enum_for(__method__, str) unless block_given?
22 | cs = str.each_codepoint.map { |c| WORD_BREAK_MAP[c] }
23 | cs << nil << nil # for negative indices
24 | word = String.new.force_encoding(str.encoding)
25 | i = 0
26 | str.each_codepoint { |c|
27 | word << c
28 | if Impl.word_break?(cs, i) && !word.empty?
29 | yield word
30 | word = String.new.force_encoding(str.encoding)
31 | end
32 | i += 1
33 | }
34 | yield word unless word.empty?
35 | str
36 | end
37 | module_function :each_word
38 |
39 | module Impl # :nodoc:all
40 |
41 | def self.word_break?(cs, i)
42 | # wb3
43 | cs_i = cs[i]
44 | i1 = i + 1
45 | cs_i1 = cs[i1]
46 | if cs_i == 0x0 && cs_i1 == 0x1
47 | return false
48 | end
49 | # wb3a
50 | if cs_i == 0x2 || cs_i == 0x0 || cs_i == 0x1
51 | return true
52 | end
53 | # wb3b
54 | if cs_i1 == 0x2 || cs_i1 == 0x0 || cs_i1 == 0x1
55 | return true
56 | end
57 | # wb5
58 | i0 = i
59 | # inline skip_l
60 | c = nil
61 | loop { c = cs[i0]; break unless c == 0x3 || c == 0x4; i0 -= 1 }
62 | ci0 = c
63 | if ci0 == 0x6 && cs_i1 == 0x6
64 | return false
65 | end
66 | # wb6
67 | i2 = i1 + 1
68 | # inline skip_r
69 | loop { c = cs[i2]; break unless c == 0x3 || c == 0x4; i2 += 1 }
70 | if ci0 == 0x6 && (cs_i1 == 0x7 || cs_i1 == 0x9) && cs[i2] == 0x6
71 | return false
72 | end
73 | # wb7
74 | i_1 = i0 - 1
75 | # inline skip_l
76 | loop { c = cs[i_1]; break unless c == 0x3 || c == 0x4; i_1 -= 1 }
77 | if cs[i_1] == 0x6 && (ci0 == 0x7 || ci0 == 0x9) && cs_i1 == 0x6
78 | return false
79 | end
80 | # wb8
81 | if ci0 == 0xA && cs_i1 == 0xA
82 | return false
83 | end
84 | # wb9
85 | if ci0 == 0x6 && cs_i1 == 0xA
86 | return false
87 | end
88 | # wb10
89 | if ci0 == 0xA && cs_i1 == 0x6
90 | return false
91 | end
92 | # wb11
93 | if cs[i_1] == 0xA && (ci0 == 0x8 || ci0 == 0x9) && cs_i1 == 0xA
94 | return false
95 | end
96 | # wb12
97 | if ci0 == 0xA && (cs_i1 == 0x8 || cs_i1 == 0x9) && cs[i2] == 0xA
98 | return false
99 | end
100 | # wb13
101 | if ci0 == 0x5 && cs_i1 == 0x5
102 | return false
103 | end
104 | # wb13a
105 | if (ci0 == 0x6 || ci0 == 0xA || ci0 == 0x5 || ci0 == 0xB) && cs_i1 == 0xB
106 | return false
107 | end
108 | # wb13b
109 | if ci0 == 0xB && (cs_i1 == 0x6 || cs_i1 == 0xA || cs_i1 == 0x5)
110 | return false
111 | end
112 | # wb13c
113 | if ci0 == 0xC && cs_i1 == 0xC
114 | return false
115 | end
116 | # break unless next char is Extend/Format
117 | cs_i1 != 0x3 && cs_i1 != 0x4
118 | end
119 |
120 | end
121 |
122 | end
123 |
--------------------------------------------------------------------------------
/lib/unicode_utils/east_asian_width.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | EAST_ASIAN_WIDTH_MAP_PER_CP =
8 | Impl.read_east_asian_width_per_cp("east_asian_width_property_per_cp") # :nodoc:
9 |
10 | EAST_ASIAN_WIDTH_RANGES =
11 | Impl.read_east_asian_width_ranges("east_asian_width_property_ranges") # :nodoc:
12 |
13 | # Returns the default with of the given code point as described in
14 | # "UAX #11: East Asian Width" (http://unicode.org/reports/tr11/).
15 | #
16 | # Each code point is mapped to one of the following six symbols:
17 | # :Neutral, :Ambiguous, :Halfwidth, :Wide, :Fullwidth, :Narrow.
18 | def east_asian_width(char)
19 | cp = char.ord
20 | EAST_ASIAN_WIDTH_RANGES.each { |pair|
21 | return pair[1] if pair[0].cover?(cp)
22 | }
23 | EAST_ASIAN_WIDTH_MAP_PER_CP[cp]
24 | end
25 | module_function :east_asian_width
26 |
27 | end
28 |
--------------------------------------------------------------------------------
/lib/unicode_utils/gc.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | GENERAL_CATEGORY_PER_CP_MAP =
8 | Impl.read_general_category_per_cp("general_category_per_cp") # :nodoc:
9 |
10 | GENERAL_CATEGORY_RANGES =
11 | Impl.read_general_category_ranges("general_category_ranges") # :nodoc:
12 |
13 | # Get the two letter general category alias of the given char. The
14 | # first letter denotes a major class, the second letter a subclass
15 | # of the major class.
16 | #
17 | # See section 4.5 in Unicode 6.0.0.
18 | #
19 | # Example:
20 | #
21 | # require "unicode_utils/gc"
22 | # UnicodeUtils.gc("A") # => :Lu (Letter, uppercase)
23 | #
24 | # Returns nil for ordinals outside the Unicode code point range, a
25 | # two letter symbol otherwise.
26 | #
27 | # See also: UnicodeUtils.general_category, UnicodeUtils.char_type
28 | def gc(char)
29 | cp = char.ord
30 | cat = GENERAL_CATEGORY_PER_CP_MAP[cp] and return cat
31 | GENERAL_CATEGORY_RANGES.each { |pair|
32 | return pair[1] if pair[0].cover?(cp)
33 | }
34 | if cp >= 0x0 && cp <= 0x10FFFF
35 | :Cn # Other, not assigned
36 | else
37 | nil
38 | end
39 | end
40 | module_function :gc
41 |
42 | end
43 |
--------------------------------------------------------------------------------
/lib/unicode_utils/general_category.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/gc"
5 |
6 | module UnicodeUtils
7 |
8 | GENERAL_CATEGORY_ALIAS_MAP =
9 | Impl.read_symbol_map("general_category_aliases") # :nodoc:
10 |
11 | # Get the long general category alias of char.
12 | #
13 | # Example:
14 | #
15 | # require "unicode_utils/general_category"
16 | # UnicodeUtils.general_category("A") # => :Uppercase_Letter
17 | #
18 | # Returns a symbol if char is in the Unicode code point range, nil
19 | # otherwise.
20 | #
21 | # See also: UnicodeUtils.gc, UnicodeUtils.char_type
22 | def general_category(char)
23 | GENERAL_CATEGORY_ALIAS_MAP[UnicodeUtils.gc(char)]
24 | end
25 | module_function :general_category
26 |
27 | end
28 |
--------------------------------------------------------------------------------
/lib/unicode_utils/graphic_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/gc"
4 |
5 | module UnicodeUtils
6 |
7 | GENERAL_CATEGORY_IS_GRAPHIC_MAP = {
8 | Lu: true, Ll: true, Lt: true, Lm: true, Lo: true,
9 | Mn: true, Mc: true, Me: true,
10 | Nd: true, Nl: true, No: true,
11 | Pc: true, Pd: true, Ps: true, Pe: true, Pi: true, Pf: true, Po: true,
12 | Sm: true, Sc: true, Sk: true, So: true,
13 | Zs: true, Zl: false, Zp: false,
14 | Cc: false, Cf: false, Cs: false, Co: false, Cn: false
15 | } # :nodoc:
16 |
17 | # Returns true if the given char is a graphic char, false otherwise.
18 | # See table 2-3 in section 2.4 of Unicode 6.0.0.
19 | #
20 | # Examples:
21 | #
22 | # require "unicode_utils/graphic_char_q"
23 | # UnicodeUtils.graphic_char?("a") # => true
24 | # UnicodeUtils.graphic_char?("\n") # => false
25 | # UnicodeUtils.graphic_char?(0x0) # => false
26 | def graphic_char?(char)
27 | GENERAL_CATEGORY_IS_GRAPHIC_MAP[UnicodeUtils.gc(char)]
28 | end
29 | module_function :graphic_char?
30 |
31 | end
32 |
--------------------------------------------------------------------------------
/lib/unicode_utils/grep.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/codepoint"
4 |
5 | module UnicodeUtils
6 |
7 | # Get an array of all Codepoint instances in Codepoint::RANGE whose
8 | # name matches regexp. Matching is case insensitive.
9 | #
10 | # require "unicode_utils/grep"
11 | # UnicodeUtils.grep(/angstrom/) => [#]
12 | def grep(regexp)
13 | # TODO: enhance behaviour by searching aliases in NameAliases.txt
14 | unless regexp.casefold?
15 | regexp = Regexp.new(regexp.source, Regexp::IGNORECASE)
16 | end
17 | Codepoint::RANGE.select { |cp|
18 | regexp =~ UnicodeUtils.char_name(cp)
19 | }.map { |cp| Codepoint.new(cp) }
20 | end
21 | module_function :grep
22 |
23 | end
24 |
--------------------------------------------------------------------------------
/lib/unicode_utils/hangul_syllable_decomposition.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | module UnicodeUtils
4 |
5 | # Derives the canonical decomposition of the given Hangul syllable.
6 | #
7 | # Example:
8 | #
9 | # require "unicode_utils/hangul_syllable_decomposition"
10 | # UnicodeUtils.hangul_syllable_decomposition("\u{d4db}") => "\u{1111}\u{1171}\u{11b6}"
11 | def hangul_syllable_decomposition(char)
12 | String.new.force_encoding(char.encoding).tap do |str|
13 | Impl.append_hangul_syllable_decomposition(str , char.ord)
14 | end
15 | end
16 | module_function :hangul_syllable_decomposition
17 |
18 | module Impl # :nodoc:
19 |
20 | def self.append_hangul_syllable_decomposition(str, s)
21 | # constants
22 | sbase = 0xAC00
23 | lbase = 0x1100
24 | vbase = 0x1161
25 | tbase = 0x11A7
26 | scount = 11172
27 | lcount = 19
28 | vcount = 21
29 | tcount = 28
30 | ncount = vcount * tcount
31 |
32 | sindex = s - sbase
33 | if 0 <= sindex && sindex < scount
34 | l = lbase + sindex / ncount
35 | v = vbase + (sindex % ncount) / tcount
36 | t = tbase + sindex % tcount
37 | str << l << v
38 | str << t if t != tbase
39 | else
40 | str << s
41 | end
42 | end
43 |
44 | end
45 |
46 | end
47 |
--------------------------------------------------------------------------------
/lib/unicode_utils/jamo_short_name.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | JAMO_SHORT_NAME_MAP = Impl.read_names("jamo_short_names") # :nodoc:
8 |
9 | # The Jamo Short Name property of the given character (defaults
10 | # to nil).
11 | #
12 | # Example:
13 | #
14 | # require "unicode_utils/jamo_short_name"
15 | # UnicodeUtils.jamo_short_name("\u{1101}") => "GG"
16 | def jamo_short_name(char)
17 | JAMO_SHORT_NAME_MAP[char.ord]
18 | end
19 | module_function :jamo_short_name
20 |
21 | end
22 |
--------------------------------------------------------------------------------
/lib/unicode_utils/lowercase_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | PROP_LOWERCASE_SET = Impl.read_code_point_set("prop_set_lowercase") # :nodoc:
8 |
9 | # True if the given character has the Unicode property Lowercase.
10 | def lowercase_char?(char)
11 | PROP_LOWERCASE_SET.include?(char.ord)
12 | end
13 | module_function :lowercase_char?
14 |
15 | end
16 |
--------------------------------------------------------------------------------
/lib/unicode_utils/name_alias.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | module UnicodeUtils
4 |
5 | # See: UnicodeUtils.name_aliases
6 | class NameAlias
7 |
8 | # The alias as string.
9 | attr_reader :name
10 |
11 | # The type of alias as symbol. Currently one of :correction,
12 | # :control, :alternate, :figment, :abbreviation.
13 | attr_reader :type
14 |
15 | # Do not construct directly. Use UnicodeUtils.name_aliases.
16 | def initialize(name, type)
17 | @name = name
18 | @type = type
19 | end
20 |
21 | # Returns a descriptive string. The format may change even in minor
22 | # releases.
23 | def inspect
24 | "#"
25 | end
26 |
27 | # Returns name.
28 | def to_s
29 | name
30 | end
31 |
32 | def ==(other)
33 | other.kind_of?(NameAlias) && other.type == type && other.name == name
34 | end
35 |
36 | def eql?(other)
37 | self == other
38 | end
39 |
40 | def hash
41 | name.hash
42 | end
43 |
44 | end
45 |
46 | end
47 |
--------------------------------------------------------------------------------
/lib/unicode_utils/name_aliases.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/name_alias"
4 | require "unicode_utils/read_cdata"
5 |
6 | module UnicodeUtils
7 |
8 | NAME_ALIASES_MAP = Impl.read_name_aliases("name_aliases") # :nodoc:
9 | NAME_ALIASES_MAP.default = [].freeze
10 |
11 | # Get an Enumerable of formal name aliases of the given character. Returns an
12 | # empty Enumerable if the character doesn't have an alias.
13 | #
14 | # The aliases are instances of UnicodeUtils::NameAlias, the order of the
15 | # aliases in the returned Enumerable is preserved from NameAliases.txt in the
16 | # Unicode Character Database.
17 | #
18 | # Example:
19 | #
20 | # require "unicode_utils/name_aliases"
21 | # UnicodeUtils.name_aliases("\n").map(&:name) # => ["LINE FEED", "NEW LINE", "END OF LINE", "LF", "NL", "EOL"]
22 | #
23 | # See also: UnicodeUtils.char_name
24 | def name_aliases(char)
25 | NAME_ALIASES_MAP[char.ord]
26 | end
27 | module_function :name_aliases
28 |
29 | end
30 |
--------------------------------------------------------------------------------
/lib/unicode_utils/nfc.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/canonical_decomposition"
5 | require "unicode_utils/combining_class"
6 |
7 | module UnicodeUtils
8 |
9 | module Impl # :nodoc:all
10 |
11 | COMPOSITION_EXCLUSION_SET =
12 | Impl.read_code_point_set("composition_exclusion_set")
13 |
14 | CANONICAL_COMPOSITION_MAP = Hash.new.tap do |m|
15 | CANONICAL_DECOMPOSITION_MAP.each_pair { |comp, decomp|
16 | if decomp.length == 2
17 | (m[decomp[0]] ||= {})[decomp[1]] = comp
18 | end
19 | }
20 | end
21 |
22 | module NFC
23 |
24 | # does b block c?
25 | def self.blocked?(b, c)
26 | # From the standard:
27 | # "If a combining character sequence is in canonical order,
28 | # then testing whether a character is blocked requires looking
29 | # at only the immediately preceding character."
30 | # cpary is in canonical order (since it comes out of
31 | # canonical_decomposition).
32 | COMBINING_CLASS_MAP[b] >= COMBINING_CLASS_MAP[c]
33 | end
34 |
35 | def self.primary_composite?(cp)
36 | unless CANONICAL_DECOMPOSITION_MAP[cp] ||
37 | # has hangul syllable decomposition?
38 | (cp >= 0xAC00 && cp <= 0xD7A3)
39 | return false
40 | end
41 | !COMPOSITION_EXCLUSION_SET.include?(cp)
42 | end
43 |
44 | end
45 |
46 | def self.composition(str)
47 | ### constants for hangul composition ###
48 | sbase = 0xAC00
49 | lbase = 0x1100
50 | vbase = 0x1161
51 | tbase = 0x11A7
52 | lcount = 19
53 | vcount = 21
54 | tcount = 28
55 | ncount = vcount * tcount
56 | scount = lcount * ncount
57 | ########################################
58 |
59 | String.new.force_encoding(str.encoding).tap do |res|
60 | last_starter = nil
61 | uncomposable_non_starters = []
62 | str.each_codepoint { |cp|
63 | if COMBINING_CLASS_MAP[cp] == 0 # starter?
64 | combined = false
65 | if last_starter && uncomposable_non_starters.empty?
66 | ### hangul ###
67 | lindex = last_starter - lbase
68 | if 0 <= lindex && lindex < lcount
69 | vindex = cp - vbase
70 | if 0 <= vindex && vindex <= vcount
71 | last_starter =
72 | sbase + (lindex * vcount + vindex) * tcount
73 | combined = true
74 | end
75 | end
76 | unless combined
77 | sindex = last_starter - sbase
78 | if 0 <= sindex && sindex < scount && (sindex % tcount) == 0
79 | tindex = cp - tbase
80 | if 0 <= tindex && tindex < tcount
81 | last_starter += tindex
82 | combined = true
83 | end
84 | end
85 | end
86 | ##############
87 | unless combined
88 | map = Impl::CANONICAL_COMPOSITION_MAP[last_starter]
89 | composition = map && map[cp]
90 | if composition && Impl::NFC.primary_composite?(composition)
91 | last_starter = composition
92 | combined = true
93 | end
94 | end
95 | end
96 | unless combined
97 | res << last_starter if last_starter
98 | uncomposable_non_starters.each { |nc| res << nc }
99 | uncomposable_non_starters.clear
100 | last_starter = cp
101 | end
102 | else
103 | last_non_starter = uncomposable_non_starters.last
104 | if last_non_starter && Impl::NFC.blocked?(last_non_starter, cp)
105 | uncomposable_non_starters << cp
106 | else
107 | map = Impl::CANONICAL_COMPOSITION_MAP[last_starter]
108 | composition = map && map[cp]
109 | if composition && Impl::NFC.primary_composite?(composition)
110 | last_starter = composition
111 | else
112 | uncomposable_non_starters << cp
113 | end
114 | end
115 | end
116 | }
117 | res << last_starter if last_starter
118 | uncomposable_non_starters.each { |nc| res << nc }
119 | end
120 | end
121 |
122 | end
123 |
124 | # Get +str+ in Normalization Form C.
125 | #
126 | # The Unicode standard has multiple representations for some
127 | # characters. One representation as a single code point and other
128 | # representation(s) as a combination of multiple code points. This
129 | # function "composes" these characters into the former
130 | # representation.
131 | #
132 | # Example:
133 | #
134 | # require "unicode_utils/nfc"
135 | # UnicodeUtils.nfc("La\u{308}mpchen") => "Lämpchen"
136 | def nfc(str)
137 | str = UnicodeUtils.canonical_decomposition(str)
138 | Impl.composition(str)
139 | end
140 | module_function :nfc
141 |
142 | end
143 |
--------------------------------------------------------------------------------
/lib/unicode_utils/nfd.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/canonical_decomposition"
4 |
5 | module UnicodeUtils
6 |
7 | # Get +str+ in Normalization Form D.
8 | #
9 | # Alias for UnicodeUtils.canonical_decomposition.
10 | def nfd(str)
11 | UnicodeUtils.canonical_decomposition(str)
12 | end
13 | module_function :nfd
14 |
15 | end
16 |
--------------------------------------------------------------------------------
/lib/unicode_utils/nfkc.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/compatibility_decomposition"
4 | require "unicode_utils/nfc"
5 |
6 | module UnicodeUtils
7 |
8 | # Get +str+ in Normalization Form KC.
9 | #
10 | # Normalization Form KC is compatibiliy decomposition (NFKD)
11 | # followed by composition. Like NFKD, this normalization can alter
12 | # how a string is displayed.
13 | #
14 | # Example:
15 | #
16 | # require "unicode_utils/nfkc"
17 | # # LATIN SMALL LIGATURE FI => LATIN SMALL LETTER F, LATIN SMALL LETTER I
18 | # UnicodeUtils.nfkc("fi") => "fi"
19 | #
20 | # See also: UnicodeUtils.compatibility_decomposition
21 | def nfkc(str)
22 | str = UnicodeUtils.compatibility_decomposition(str)
23 | Impl.composition(str)
24 | end
25 | module_function :nfkc
26 |
27 | end
28 |
--------------------------------------------------------------------------------
/lib/unicode_utils/nfkd.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/compatibility_decomposition"
4 |
5 | module UnicodeUtils
6 |
7 | # Get +str+ in Normalization Form KD.
8 | #
9 | # Alias for UnicodeUtils.compatibility_decomposition.
10 | def nfkd(str)
11 | UnicodeUtils.compatibility_decomposition(str)
12 | end
13 | module_function :nfkd
14 |
15 | end
16 |
--------------------------------------------------------------------------------
/lib/unicode_utils/read_cdata.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | module UnicodeUtils
4 |
5 | # Absolute path to the directory from which UnicodeUtils loads its
6 | # compiled Unicode data files at runtime.
7 | CDATA_DIR = File.absolute_path(File.join(File.dirname(__FILE__), "..", "..", "cdata"))
8 |
9 | module Impl # :nodoc:
10 |
11 | EAST_ASIAN_WIDTH_SYMBOL_MAP = {
12 | 1 => :Ambiguous,
13 | 2 => :Halfwidth,
14 | 3 => :Wide,
15 | 4 => :Fullwidth,
16 | 5 => :Narrow
17 | }.freeze
18 |
19 | NAME_ALIAS_TYPE_TO_SYMBOL_MAP = {
20 | 1 => :correction,
21 | 2 => :control,
22 | 3 => :alternate,
23 | 4 => :figment,
24 | 5 => :abbreviation
25 | }.freeze
26 |
27 | def self.open_cdata_file(filename, &block)
28 | File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block)
29 | end
30 |
31 | def self.read_code_point_set(filename)
32 | Hash.new.tap { |set|
33 | open_cdata_file(filename) do |input|
34 | buffer = "x" * 6
35 | buffer.force_encoding(Encoding::US_ASCII)
36 | while input.read(6, buffer)
37 | set[buffer.to_i(16)] = true
38 | end
39 | end
40 | }
41 | end
42 |
43 | def self.read_code_point_map(filename)
44 | Hash.new.tap { |map|
45 | open_cdata_file(filename) do |input|
46 | buffer = "x" * 6
47 | buffer.force_encoding(Encoding::US_ASCII)
48 | while input.read(6, buffer)
49 | map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
50 | end
51 | end
52 | }
53 | end
54 |
55 | def self.read_multivalued_map(filename)
56 | Hash.new.tap { |map|
57 | open_cdata_file(filename) do |input|
58 | buffer = "x" * 6
59 | buffer.force_encoding(Encoding::US_ASCII)
60 | while input.read(6, buffer)
61 | cp = buffer.to_i(16)
62 | mapping = []
63 | while input.read(6, buffer).getbyte(0) != 120
64 | mapping << buffer.to_i(16)
65 | end
66 | map[cp] = mapping
67 | end
68 | end
69 | }
70 | end
71 |
72 | def self.read_names(filename)
73 | Hash.new.tap { |map|
74 | open_cdata_file(filename) do |input|
75 | buffer = "x" * 6
76 | buffer.force_encoding(Encoding::US_ASCII)
77 | while input.read(6, buffer)
78 | map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
79 | end
80 | end
81 | }
82 | end
83 |
84 | def self.read_conditional_casings(filename)
85 | Hash.new.tap { |cp_map|
86 | open_cdata_file(filename) do |input|
87 | input.each_line { |line|
88 | line.chomp!
89 | record = line.split(";")
90 | cp = record[0].to_i(16)
91 | mapping = record[1].split(",").map { |c| c.to_i(16) }
92 | language_id = record[2].empty? ? nil : record[2].to_sym
93 | context = record[3] && record[3].gsub('_', '')
94 | casing = Impl.const_get("#{context}ConditionalCasing").new(mapping)
95 | (cp_map[cp] ||= {})[language_id] = casing
96 | }
97 | end
98 | }
99 | end
100 |
101 | def self.read_combining_class_map
102 | Hash.new.tap { |map|
103 | open_cdata_file("combining_class_map") do |input|
104 | buffer = "x" * 6
105 | buffer.force_encoding(Encoding::US_ASCII)
106 | cc_buffer = "x" * 2
107 | cc_buffer.force_encoding(Encoding::US_ASCII)
108 | while input.read(6, buffer)
109 | map[buffer.to_i(16)] = input.read(2, cc_buffer).to_i(16)
110 | end
111 | end
112 | }
113 | end
114 |
115 | # Read a map whose keys are code points (6 hexgdigits, converted to
116 | # integer) and whose values are single hexdigits (converted to
117 | # integer).
118 | def self.read_hexdigit_map(filename)
119 | Hash.new.tap { |map|
120 | open_cdata_file(filename) do |input|
121 | buffer = "x" * 6
122 | buffer.force_encoding(Encoding::US_ASCII)
123 | val_buffer = "x"
124 | val_buffer.force_encoding(Encoding::US_ASCII)
125 | while input.read(6, buffer)
126 | map[buffer.to_i(16)] = input.read(1, val_buffer).to_i(16)
127 | end
128 | end
129 | }
130 | end
131 |
132 | # Returns a list (array) of pairs (two element Arrays) of Range
133 | # (code points) and associated integer value.
134 | def self.read_range_to_hexdigit_list(filename)
135 | Array.new.tap { |list|
136 | open_cdata_file(filename) do |input|
137 | cp_buffer = "x" * 6
138 | cp_buffer.force_encoding(Encoding::US_ASCII)
139 | val_buffer = "x"
140 | val_buffer.force_encoding(Encoding::US_ASCII)
141 | while input.read(6, cp_buffer)
142 | list << [
143 | Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
144 | input.read(1, val_buffer).to_i(16)
145 | ]
146 | end
147 | end
148 | }
149 | end
150 |
151 | def self.read_east_asian_width_per_cp(filename)
152 | # like read_hexdigit_map, but with translation to symbol values
153 | Hash.new(:Neutral).tap { |map|
154 | open_cdata_file(filename) do |input|
155 | buffer = "x" * 6
156 | buffer.force_encoding(Encoding::US_ASCII)
157 | val_buffer = "x"
158 | val_buffer.force_encoding(Encoding::US_ASCII)
159 | while input.read(6, buffer)
160 | map[buffer.to_i(16)] =
161 | EAST_ASIAN_WIDTH_SYMBOL_MAP[input.read(1, val_buffer).to_i(16)]
162 | end
163 | end
164 | }
165 | end
166 |
167 | def self.read_east_asian_width_ranges(filename)
168 | read_range_to_hexdigit_list(filename).tap { |list|
169 | list.each { |pair|
170 | pair[1] = EAST_ASIAN_WIDTH_SYMBOL_MAP[pair[1]]
171 | }
172 | }
173 | end
174 |
175 | def self.read_general_category_per_cp(filename)
176 | Hash.new.tap { |map|
177 | open_cdata_file(filename) do |input|
178 | cp_buffer = "x" * 6
179 | cp_buffer.force_encoding(Encoding::US_ASCII)
180 | cat_buffer = "x" * 2
181 | cat_buffer.force_encoding(Encoding::US_ASCII)
182 | while input.read(6, cp_buffer)
183 | map[cp_buffer.to_i(16)] = input.read(2, cat_buffer).to_sym
184 | end
185 | end
186 | }
187 | end
188 |
189 | def self.read_general_category_ranges(filename)
190 | Array.new.tap { |list|
191 | open_cdata_file(filename) do |input|
192 | cp_buffer = "x" * 6
193 | cp_buffer.force_encoding(Encoding::US_ASCII)
194 | cat_buffer = "x" * 2
195 | cat_buffer.force_encoding(Encoding::US_ASCII)
196 | while input.read(6, cp_buffer)
197 | list << [
198 | Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
199 | input.read(2, cat_buffer).to_sym
200 | ]
201 | end
202 | end
203 | }
204 | end
205 |
206 | def self.read_symbol_map(filename)
207 | Hash.new.tap { |map|
208 | open_cdata_file(filename) do |input|
209 | input.each_line { |line|
210 | parts = line.split(";")
211 | parts[0].strip!
212 | parts[1].strip!
213 | map[parts[0].to_sym] = parts[1].to_sym
214 | }
215 | end
216 | }
217 | end
218 |
219 | def self.read_name_aliases(filename)
220 | Hash.new.tap { |map|
221 | open_cdata_file(filename) do |input|
222 | cp_buffer = "x" * 6
223 | cp_buffer.force_encoding(Encoding::US_ASCII)
224 | ac_buffer = "x" * 1
225 | ac_buffer.force_encoding(Encoding::US_ASCII)
226 | at_buffer = "x" * 1
227 | at_buffer.force_encoding(Encoding::US_ASCII)
228 | al_buffer = "x" * 2
229 | al_buffer.force_encoding(Encoding::US_ASCII)
230 | while input.read(6, cp_buffer)
231 | aliases = Array.new(input.read(1, ac_buffer).to_i(16))
232 | 0.upto(aliases.length - 1) { |i|
233 | type = NAME_ALIAS_TYPE_TO_SYMBOL_MAP[input.read(1, at_buffer).to_i(16)]
234 | name = input.read(input.read(2, al_buffer).to_i(16))
235 | aliases[i] = NameAlias.new(name.freeze, type)
236 | }
237 | map[cp_buffer.to_i(16)] = aliases.freeze
238 | end
239 | end
240 | }
241 | end
242 |
243 | end
244 |
245 | end
246 |
--------------------------------------------------------------------------------
/lib/unicode_utils/sid.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/name_aliases"
4 | require "unicode_utils/code_point_type"
5 |
6 | module UnicodeUtils
7 |
8 | CP_PREFERRED_ALIAS_STRING_MAP = Hash.new.tap do |map|
9 | NAME_ALIASES_MAP.each { |cp, aliases|
10 | al =
11 | (aliases.find { |al| al.type == :correction } ||
12 | aliases.find { |al| al.type == :control } ||
13 | aliases.find { |al| al.type == :figment } ||
14 | aliases.find { |al| al.type == :alternate })
15 | map[cp] = al.name if al
16 | }
17 | end #:nodoc:
18 |
19 | # Returns a unique string identifier for every code point. Returns
20 | # nil if +code_point+ is not in the Unicode codespace. +code_point+
21 | # must be an Integer.
22 | #
23 | # The returned string identifier is either the non-empty Name
24 | # property value of +code_point+, a non-empty Name_Alias string
25 | # property value of +code_point+, or the code point label as
26 | # described by section "Code Point Labels" in chapter 4.8 "Name" of
27 | # the Unicode standard.
28 | #
29 | # If the returned identifier starts with "<", it is a code point
30 | # label and it ends with ">". Otherwise it is the normative name or
31 | # a formal alias string.
32 | #
33 | # The exact name/alias/label selection algorithm may change even in
34 | # minor UnicodeUtils releases, but overall behaviour will stay the
35 | # same in spirit.
36 | #
37 | # The selection process in this version of UnicodeUtils is:
38 | # 1. Use an alias of type :correction, :control, :figment or
39 | # :alternate (with listed precendence) if available
40 | # 2. Use the Unicode Name property value if it is not empty
41 | # 3. Construct a code point label in angle brackets.
42 | #
43 | # Examples:
44 | #
45 | # require "unicode_utils/sid"
46 | #
47 | # U.sid 0xa # => "LINE FEED"
48 | # U.sid 0x0 # => "NULL"
49 | # U.sid 0xfeff # => "BYTE ORDER MARK"
50 | # U.sid 0xe000 # => ""
51 | # U.sid 0x61 # => "LATIN SMALL LETTER A"
52 | # U.sid -1 # => nil
53 | def sid(code_point)
54 | s = CP_PREFERRED_ALIAS_STRING_MAP[code_point] and return s
55 | cn = UnicodeUtils.char_name(code_point)
56 | return cn if cn && cn !~ /\A(\<|\z)/
57 | ct = UnicodeUtils.code_point_type(code_point) or return nil
58 | ts = ct.to_s.downcase.gsub('_', '-')
59 | "<#{ts}-#{code_point.to_s(16).upcase.rjust(4, '0')}>"
60 | end
61 | module_function :sid
62 |
63 | end
64 |
--------------------------------------------------------------------------------
/lib/unicode_utils/simple_casefold.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | CASEFOLD_C_MAP = Impl.read_code_point_map("casefold_c_map") # :nodoc:
8 |
9 | CASEFOLD_S_MAP = Impl.read_code_point_map("casefold_s_map") # :nodoc:
10 |
11 | # Perform simple case folding. Contrary to full case folding, this
12 | # uses only one to one mappings, so that the length of the returned
13 | # string is equal to the length of +str+.
14 | #
15 | # The purpose of case folding is case insensitive string comparison.
16 | #
17 | # Examples:
18 | #
19 | # require "unicode_utils/simple_casefold"
20 | # UnicodeUtils.simple_casefold("Ümit") == UnicodeUtils.simple_casefold("ümit") => true
21 | # UnicodeUtils.simple_casefold("WEISS") == UnicodeUtils.simple_casefold("weiß") => false
22 | #
23 | # See also: UnicodeUtils.casefold
24 | def simple_casefold(str)
25 | String.new.force_encoding(str.encoding).tap do |res|
26 | str.each_codepoint { |cp|
27 | res << (CASEFOLD_C_MAP[cp] || CASEFOLD_S_MAP[cp] || cp)
28 | }
29 | end
30 | end
31 | module_function :simple_casefold
32 |
33 | end
34 |
--------------------------------------------------------------------------------
/lib/unicode_utils/simple_downcase.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | SIMPLE_DOWNCASE_MAP = Impl.read_code_point_map("simple_lc_map") # :nodoc:
8 |
9 | # Map each code point in +str+ that has a single code point
10 | # lowercase-mapping to that lowercase mapping. The returned string
11 | # has the same length as the original string.
12 | #
13 | # This function is locale independent.
14 | #
15 | # Examples:
16 | #
17 | # require "unicode_utils/simple_downcase"
18 | # UnicodeUtils.simple_downcase("ÜMIT: 123") => "ümit: 123"
19 | # UnicodeUtils.simple_downcase("STRASSE") => "strasse"
20 | def simple_downcase(str)
21 | String.new.force_encoding(str.encoding).tap { |res|
22 | str.each_codepoint { |cp|
23 | res << (SIMPLE_DOWNCASE_MAP[cp] || cp)
24 | }
25 | }
26 | end
27 | module_function :simple_downcase
28 |
29 | end
30 |
--------------------------------------------------------------------------------
/lib/unicode_utils/simple_upcase.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | SIMPLE_UPCASE_MAP = Impl.read_code_point_map("simple_uc_map") # :nodoc:
8 |
9 | # Map each code point in +str+ that has a single code point
10 | # uppercase-mapping to that uppercase mapping. The returned string
11 | # has the same length as the original string.
12 | #
13 | # This function is locale independent.
14 | #
15 | # Examples:
16 | #
17 | # require "unicode_utils/simple_upcase"
18 | # UnicodeUtils.simple_upcase("ümit: 123") => "ÜMIT: 123"
19 | # UnicodeUtils.simple_upcase("weiß") => "WEIß"
20 | def simple_upcase(str)
21 | String.new.force_encoding(str.encoding).tap { |res|
22 | str.each_codepoint { |cp|
23 | res << (SIMPLE_UPCASE_MAP[cp] || cp)
24 | }
25 | }
26 | end
27 | module_function :simple_upcase
28 |
29 | end
30 |
--------------------------------------------------------------------------------
/lib/unicode_utils/soft_dotted_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | SOFT_DOTTED_SET = Impl.read_code_point_set("soft_dotted_set") # :nodoc:
8 |
9 | # Returns true if the given character has the Unicode property
10 | # Soft_Dotted.
11 | def soft_dotted_char?(char)
12 | SOFT_DOTTED_SET.include?(char.ord)
13 | end
14 | module_function :soft_dotted_char?
15 |
16 | end
17 |
--------------------------------------------------------------------------------
/lib/unicode_utils/titlecase.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/conditional_casing"
5 | require "unicode_utils/each_word"
6 | require "unicode_utils/cased_char_q"
7 | require "unicode_utils/downcase"
8 |
9 | module UnicodeUtils
10 |
11 | SIMPLE_TITLECASE_MAP = Impl.read_code_point_map("simple_tc_map") # :nodoc:
12 | SPECIAL_TITLECASE_MAP = Impl.read_multivalued_map("special_tc_map") # :nodoc:
13 |
14 | # Convert the first cased character after each word boundary to
15 | # titlecase and all other cased characters to lowercase. For many,
16 | # but not all characters, the titlecase mapping is the same as the
17 | # uppercase mapping.
18 | #
19 | # Some conversion rules are language dependent, these are in effect
20 | # when a non-nil +language_id+ is given. If non-nil, the
21 | # +language_id+ must be a two letter language code as defined in BCP
22 | # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
23 | # language doesn't have a two letter code, the three letter code is
24 | # to be used. If locale independent behaviour is required, +nil+
25 | # should be passed explicitely, because a later version of
26 | # UnicodeUtils may default to something else.
27 | #
28 | # Example:
29 | #
30 | # require "unicode_utils/titlecase"
31 | # UnicodeUtils.titlecase("hello, world!") => "Hello, World!"
32 | def titlecase(str, language_id = nil)
33 | String.new.force_encoding(str.encoding).tap do |res|
34 | # ensure O(1) lookup by index
35 | str = str.encode(Encoding::UTF_32LE)
36 | i = 0
37 | each_word(str) { |word|
38 | cased_char_found = false
39 | word.each_codepoint { |cp|
40 | cased = cased_char?(cp)
41 | if !cased_char_found && cased
42 | cased_char_found = true
43 | special_mapping =
44 | Impl.conditional_titlecase_mapping(cp, str, i, language_id) ||
45 | SPECIAL_TITLECASE_MAP[cp]
46 | if special_mapping
47 | special_mapping.each { |m| res << m }
48 | else
49 | res << (SIMPLE_TITLECASE_MAP[cp] || cp)
50 | end
51 | elsif cased
52 | special_mapping =
53 | Impl.conditional_downcase_mapping(cp, str, i, language_id) ||
54 | SPECIAL_DOWNCASE_MAP[cp]
55 | if special_mapping
56 | special_mapping.each { |m| res << m }
57 | else
58 | res << (SIMPLE_DOWNCASE_MAP[cp] || cp)
59 | end
60 | else
61 | res << cp
62 | end
63 | i += 1
64 | }
65 | }
66 | end
67 | end
68 | module_function :titlecase
69 |
70 | end
71 |
--------------------------------------------------------------------------------
/lib/unicode_utils/titlecase_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | TITLECASE_LETTER_SET = Impl.read_code_point_set("cat_set_titlecase") # :nodoc:
8 |
9 | # True if the given character has the General_Category
10 | # Titlecase_Letter (Lt).
11 | def titlecase_char?(char)
12 | TITLECASE_LETTER_SET.include?(char.ord)
13 | end
14 | module_function :titlecase_char?
15 |
16 | end
17 |
--------------------------------------------------------------------------------
/lib/unicode_utils/u.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils"
4 |
5 | # Shortcut for usage in irb. This shortcut is only defined when
6 | # unicode_utils/u is explicitly required. It is intended for
7 | # interactive use only!
8 | #
9 | # $ irb -r unicode_utils/u
10 | # irb(main):001:0> U.grep(/angstrom/)
11 | # => [#]
12 | U = UnicodeUtils
13 |
--------------------------------------------------------------------------------
/lib/unicode_utils/upcase.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 | require "unicode_utils/simple_upcase"
5 | require "unicode_utils/conditional_casing"
6 |
7 | module UnicodeUtils
8 |
9 | SPECIAL_UPCASE_MAP = Impl.read_multivalued_map("special_uc_map") # :nodoc:
10 |
11 | # Perform a full case-conversion of +str+ to uppercase according to
12 | # the Unicode standard.
13 | #
14 | # Some conversion rules are language dependent, these are in effect
15 | # when a non-nil +language_id+ is given. If non-nil, the
16 | # +language_id+ must be a two letter language code as defined in BCP
17 | # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
18 | # language doesn't have a two letter code, the three letter code is
19 | # to be used. If locale independent behaviour is required, +nil+
20 | # should be passed explicitely, because a later version of
21 | # UnicodeUtils may default to something else.
22 | #
23 | # Examples:
24 | #
25 | # require "unicode_utils/upcase"
26 | # UnicodeUtils.upcase("weiß") => "WEISS"
27 | # UnicodeUtils.upcase("i", :en) => "I"
28 | # UnicodeUtils.upcase("i", :tr) => "İ"
29 | def upcase(str, language_id = nil)
30 | String.new.force_encoding(str.encoding).tap { |res|
31 | if Impl::LANGS_WITH_RULES.include?(language_id)
32 | # ensure O(1) lookup by index
33 | str = str.encode(Encoding::UTF_32LE)
34 | end
35 | pos = 0
36 | str.each_codepoint { |cp|
37 | special_mapping =
38 | Impl.conditional_upcase_mapping(cp, str, pos, language_id) ||
39 | SPECIAL_UPCASE_MAP[cp]
40 | if special_mapping
41 | special_mapping.each { |m| res << m }
42 | else
43 | res << (SIMPLE_UPCASE_MAP[cp] || cp)
44 | end
45 | pos += 1
46 | }
47 | }
48 | end
49 | module_function :upcase
50 |
51 | end
52 |
--------------------------------------------------------------------------------
/lib/unicode_utils/uppercase_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | PROP_UPPERCASE_SET = Impl.read_code_point_set("prop_set_uppercase") # :nodoc:
8 |
9 | # True if the given character has the Unicode property Uppercase.
10 | def uppercase_char?(char)
11 | PROP_UPPERCASE_SET.include?(char.ord)
12 | end
13 | module_function :uppercase_char?
14 |
15 | end
16 |
--------------------------------------------------------------------------------
/lib/unicode_utils/version.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | module UnicodeUtils
4 |
5 | # Corresponds to the unicode_utils gem version.
6 | #
7 | # Conforms to Semantic Versioning as documented at semver.org.
8 | #
9 | # Summary:
10 | # MAJOR.MINOR.PATCHLEVEL
11 | # - A backwards incompatible change causes a change in MAJOR
12 | # - New features or non-bugfix improvals cause a change in MINOR
13 | # - Bugfixes increase only PATCHLEVEL.
14 | # - Pre-release versions append more info after a dash.
15 | VERSION = "1.4.0"
16 |
17 | # The version of Unicode implemented by this version of UnicodeUtils.
18 | #
19 | # require "unicode_utils/version"
20 | # puts "Unicode #{UnicodeUtils::UNICODE_VERSION}"
21 | UNICODE_VERSION = "6.2.0"
22 |
23 | end
24 |
--------------------------------------------------------------------------------
/lib/unicode_utils/white_space_char_q.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "unicode_utils/read_cdata"
4 |
5 | module UnicodeUtils
6 |
7 | WHITE_SPACE_SET = Impl.read_code_point_set("white_space_set") # :nodoc:
8 |
9 | # True if the given character has the Unicode property White_Space.
10 | #
11 | # Example:
12 | #
13 | # require "unicode_utils/general_category"
14 | # require "unicode_utils/white_space_char_q"
15 | #
16 | # UnicodeUtils.general_category("\n") => :Control
17 | # UnicodeUtils.white_space_char?("\n") => true
18 | def white_space_char?(char)
19 | WHITE_SPACE_SET.include?(char.ord)
20 | end
21 | module_function :white_space_char?
22 |
23 | end
24 |
--------------------------------------------------------------------------------
/test/coverage.rb:
--------------------------------------------------------------------------------
1 | require "simplecov"
2 | SimpleCov.start
3 |
4 | require_relative "suite.rb"
5 |
--------------------------------------------------------------------------------
/test/suite.rb:
--------------------------------------------------------------------------------
1 | Dir["#{File.dirname __FILE__}/test_*.rb"].each { |fn|
2 | require_relative File.basename(fn)
3 | }
4 |
--------------------------------------------------------------------------------
/test/test_case_mappings.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils/upcase"
6 | require "unicode_utils/downcase"
7 | require "unicode_utils/casefold"
8 |
9 | class TestCaseMappings < Test::Unit::TestCase
10 |
11 | TXT_DIR = File.dirname(__FILE__)
12 |
13 | def read_txt(filename)
14 | File.read(File.join(TXT_DIR, filename), mode: "r:UTF-8:-")
15 | end
16 |
17 | def test_upcase_german_text
18 | assert_equal read_txt("dreilaendereck_uc.txt"),
19 | UnicodeUtils.upcase(read_txt("dreilaendereck.txt"))
20 | end
21 |
22 | def test_upcase_german_text_language_de
23 | assert_equal read_txt("dreilaendereck_uc.txt"),
24 | UnicodeUtils.upcase(read_txt("dreilaendereck.txt"), :de)
25 | end
26 |
27 | def test_upcase_german_text_language_tr
28 | assert_not_equal read_txt("dreilaendereck_uc.txt"),
29 | UnicodeUtils.upcase(read_txt("dreilaendereck.txt"), :tr)
30 | end
31 |
32 | def test_downcase_german_text
33 | assert_equal read_txt("dreilaendereck_lc.txt"),
34 | UnicodeUtils.downcase(read_txt("dreilaendereck.txt"))
35 | end
36 |
37 | def test_downcase_german_text_language_de
38 | assert_equal read_txt("dreilaendereck_lc.txt"),
39 | UnicodeUtils.downcase(read_txt("dreilaendereck.txt"), :de)
40 | end
41 |
42 | def test_downcase_german_text_language_tr
43 | assert_not_equal read_txt("dreilaendereck_lc.txt"),
44 | UnicodeUtils.downcase(read_txt("dreilaendereck.txt"), :tr)
45 | end
46 |
47 | def test_casefold_german_text
48 | assert_equal read_txt("dreilaendereck_cf.txt"),
49 | UnicodeUtils.casefold(read_txt("dreilaendereck.txt"))
50 | end
51 |
52 | end
53 |
--------------------------------------------------------------------------------
/test/test_codepoint.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils/codepoint"
6 |
7 | class TestCodepoint < Test::Unit::TestCase
8 |
9 | def test_ord
10 | assert_equal 0x20ac, UnicodeUtils::Codepoint.new(0x20ac).ord
11 | end
12 |
13 | def test_uplus
14 | assert_equal "U+20AC", UnicodeUtils::Codepoint.new(0x20ac).uplus
15 | end
16 |
17 | def test_uplus_more_than_four_digits
18 | assert_equal "U+10FFFF", UnicodeUtils::Codepoint.new(0x10FFFF).uplus
19 | end
20 |
21 | def test_uplus_less_than_four_digits
22 | assert_equal "U+0061", UnicodeUtils::Codepoint.new(0x61).uplus
23 | end
24 |
25 | def test_name
26 | assert_equal "EURO SIGN", UnicodeUtils::Codepoint.new(0x20ac).name
27 | end
28 |
29 | def test_to_s
30 | assert_equal 0x20ac.chr(Encoding::UTF_8), UnicodeUtils::Codepoint.new(0x20ac).to_s
31 | end
32 |
33 | def test_hexbytes
34 | assert_equal "e2,82,ac", UnicodeUtils::Codepoint.new(0x20ac).hexbytes
35 | end
36 |
37 | def test_hexbytes_one_byte
38 | assert_equal "61", UnicodeUtils::Codepoint.new(0x61).hexbytes
39 | end
40 |
41 | def test_inspect
42 | str = UnicodeUtils::Codepoint.new(0x20ac).inspect
43 | assert str.include?("U+")
44 | assert str.include?("€")
45 | assert str.include?("EURO SIGN")
46 | assert str.include?("utf8:e2,82,ac")
47 | end
48 |
49 | end
50 |
--------------------------------------------------------------------------------
/test/test_each_grapheme.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils/each_grapheme"
6 |
7 | class TestEachGrapheme < Test::Unit::TestCase
8 |
9 | UNPAIRED_D800 = [0xd800]
10 |
11 | def each_grapheme_list(encoding = 'utf-8')
12 | count = 0
13 | skip_count = 0
14 | fn = File.join(File.dirname(__FILE__),
15 | "..", "data", "GraphemeBreakTest.txt")
16 | File.open(fn, "r:utf-8:-") do |input|
17 | input.each_line { |line|
18 | has_unpaired_surrogate = false
19 | if line =~ /^([^#]*)#/
20 | line = $1
21 | end
22 | line.strip!
23 | next if line.empty?
24 | count += 1
25 | graphemes = line.split("÷").map(&:strip).delete_if(&:empty?)
26 | graphemes.map! { |g|
27 | cps = g.split("×").map(&:strip).delete_if(&:empty?).map { |c| c.to_i(16) }
28 | if cps == UNPAIRED_D800
29 | has_unpaired_surrogate = true
30 | skip_count += 1
31 | break
32 | end
33 | cps.inject(String.new.force_encoding(encoding), &:<<)
34 | }
35 | # Unpaired surrogates are not allowed in UTF-8
36 | # GraphemeBreakTest has test cases with unpaired surrogates
37 | yield graphemes unless has_unpaired_surrogate
38 | }
39 | end
40 | #print "\nSkipped #{skip_count} out of #{count} grapheme tests due to surrogates\n"
41 | end
42 |
43 | def test_each_grapheme_utf8
44 | c = 0
45 | each_grapheme_list { |grapheme_list|
46 | c += 1
47 | graphemes = []
48 | UnicodeUtils.each_grapheme(grapheme_list.join) { |g| graphemes << g }
49 | assert_equal grapheme_list, graphemes
50 | }
51 | assert_equal 348, c
52 | end
53 |
54 | def test_each_grapheme_utf16
55 | c = 0
56 | each_grapheme_list('utf-16le') { |grapheme_list|
57 | c += 1
58 | graphemes = []
59 | UnicodeUtils.each_grapheme(grapheme_list.join) { |g| graphemes << g }
60 | assert_equal grapheme_list, graphemes
61 | }
62 | # TODO: currently we skip the unpaired surrogates for UTF-16 also,
63 | # because current Ruby implementations raise an exception in
64 | # each_codepoint. Review this point with future implementations.
65 | assert_equal 348, c
66 | end
67 |
68 | end
69 |
--------------------------------------------------------------------------------
/test/test_each_word.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils/each_word"
6 |
7 | class TestEachWord < Test::Unit::TestCase
8 |
9 | def each_word_list
10 | fn = File.join(File.dirname(__FILE__),
11 | "..", "data", "WordBreakTest.txt")
12 | File.open(fn, "r:utf-8:-") do |input|
13 | input.each_line { |line|
14 | if line =~ /^([^#]*)#/
15 | line = $1
16 | end
17 | line.strip!
18 | next if line.empty?
19 | words = line.split("÷").map(&:strip).delete_if(&:empty?)
20 | words.map! { |w|
21 | cps = w.split("×").map(&:strip).delete_if(&:empty?).map { |c| c.to_i(16) }
22 | cps.inject(String.new.force_encoding('utf-8'), &:<<)
23 | }
24 | yield words
25 | }
26 | end
27 | end
28 |
29 | def test_each_word
30 | c = 0
31 | each_word_list { |word_list|
32 | words = UnicodeUtils.each_word(word_list.join).to_a
33 | assert_equal word_list, words
34 | }
35 | end
36 |
37 | end
38 |
--------------------------------------------------------------------------------
/test/test_grep.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils/grep"
6 |
7 | class TestGrep < Test::Unit::TestCase
8 |
9 | def test_angstrom
10 | assert_equal [0x212b], UnicodeUtils.grep(/angstrom/).map(&:ord)
11 | end
12 |
13 | end
14 |
--------------------------------------------------------------------------------
/test/test_normalization.rb:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils/nfd"
6 | require "unicode_utils/nfc"
7 |
8 | # See data/NormalizationTest.txt
9 | class TestNormalization < Test::Unit::TestCase
10 |
11 | class Record
12 | def initialize(ary)
13 | @ary = ary
14 | end
15 | def c1
16 | @ary[0]
17 | end
18 | def c2
19 | @ary[1]
20 | end
21 | def c3
22 | @ary[2]
23 | end
24 | def c4
25 | @ary[3]
26 | end
27 | def c5
28 | @ary[4]
29 | end
30 | end
31 |
32 | def each_testdata_record
33 | fn = File.join(File.dirname(__FILE__),
34 | "..", "data", "NormalizationTest.txt")
35 | File.open(fn, "r:utf-8:-") do |input|
36 | input.each_line { |line|
37 | if line =~ /^([^#]*)#/
38 | line = $1
39 | end
40 | line.strip!
41 | next if line.empty? || line =~ /^@Part/
42 | columns = line.split(";")
43 | ary = columns.map { |column|
44 | String.new.force_encoding(Encoding::UTF_8).tap do |str|
45 | column.split(" ").each { |c|
46 | str << c.strip.to_i(16)
47 | }
48 | end
49 | }
50 | yield Record.new(ary)
51 | }
52 | end
53 | end
54 |
55 | def test_nfd
56 | each_testdata_record { |r|
57 | assert_equal r.c3, UnicodeUtils.nfd(r.c1)
58 | assert_equal r.c3, UnicodeUtils.nfd(r.c2)
59 | assert_equal r.c3, UnicodeUtils.nfd(r.c3)
60 | assert_equal r.c5, UnicodeUtils.nfd(r.c4)
61 | assert_equal r.c5, UnicodeUtils.nfd(r.c5)
62 | }
63 | end
64 |
65 | def test_nfc
66 | each_testdata_record { |r|
67 | assert_equal r.c2, UnicodeUtils.nfc(r.c1)
68 | assert_equal r.c2, UnicodeUtils.nfc(r.c2)
69 | assert_equal r.c2, UnicodeUtils.nfc(r.c3)
70 | assert_equal r.c4, UnicodeUtils.nfc(r.c4)
71 | assert_equal r.c4, UnicodeUtils.nfc(r.c5)
72 | }
73 | end
74 |
75 | def test_nfkd
76 | each_testdata_record { |r|
77 | assert_equal r.c5, UnicodeUtils.nfkd(r.c1)
78 | assert_equal r.c5, UnicodeUtils.nfkd(r.c2)
79 | assert_equal r.c5, UnicodeUtils.nfkd(r.c3)
80 | assert_equal r.c5, UnicodeUtils.nfkd(r.c4)
81 | assert_equal r.c5, UnicodeUtils.nfkd(r.c5)
82 | }
83 | end
84 |
85 | def test_nfkc
86 | each_testdata_record { |r|
87 | assert_equal r.c4, UnicodeUtils.nfkc(r.c1)
88 | assert_equal r.c4, UnicodeUtils.nfkc(r.c2)
89 | assert_equal r.c4, UnicodeUtils.nfkc(r.c3)
90 | assert_equal r.c4, UnicodeUtils.nfkc(r.c4)
91 | assert_equal r.c4, UnicodeUtils.nfkc(r.c5)
92 | }
93 | end
94 |
95 | end
96 |
--------------------------------------------------------------------------------
/test/test_unicode_6_0_0.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils"
6 |
7 | # Tests behaviour in Unicode 6.0.0 that wasn't in the previously
8 | # supported standard. That means each one of these assertions fails
9 | # with UnicodeUtils 1.0.0.
10 | class TestUnicode_6_0_0 < Test::Unit::TestCase
11 |
12 | def test_char_name
13 | assert_equal "CYRILLIC CAPITAL LETTER PE WITH DESCENDER",
14 | UnicodeUtils.char_name("\u{524}")
15 | assert_equal "SAMARITAN LETTER QUF",
16 | UnicodeUtils.char_name("\u{812}")
17 | assert_equal "TIBETAN SUBJOINED SIGN INVERTED MCHU CAN",
18 | UnicodeUtils.char_name("\u{F8F}")
19 | assert_equal "CANADIAN SYLLABICS TLHWE",
20 | UnicodeUtils.char_name("\u{18E8}")
21 | assert_equal "EGYPTIAN HIEROGLYPH F040",
22 | UnicodeUtils.char_name("\u{1312B}")
23 | assert_equal "STEAMING BOWL",
24 | UnicodeUtils.char_name("\u{1F35C}")
25 | assert_equal "HANGUL JUNGSEONG ARAEA-A",
26 | UnicodeUtils.char_name("\u{d7c5}")
27 | assert_equal "CJK UNIFIED IDEOGRAPH-2A700",
28 | UnicodeUtils.char_name("\u{2a700}")
29 | assert_equal "CJK UNIFIED IDEOGRAPH-2B81D",
30 | UnicodeUtils.char_name("\u{2b81d}")
31 | end
32 |
33 | def test_grep
34 | assert_equal [0x1F35C], UnicodeUtils.grep(/Steaming Bowl/).map(&:ord)
35 | end
36 |
37 | def test_simple_upcase
38 | assert_equal "\u{2c7e}", UnicodeUtils.simple_upcase("\u{23f}")
39 | end
40 |
41 | def test_simple_downcase
42 | assert_equal "\u{23f}", UnicodeUtils.simple_downcase("\u{2c7e}")
43 | end
44 |
45 | end
46 |
--------------------------------------------------------------------------------
/test/test_unicode_6_1_0.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils"
6 |
7 | # Tests behaviour in Unicode 6.0.1 that wasn't in the previously
8 | # supported standard. That means each one of these assertions fails
9 | # with UnicodeUtils 1.0.2.
10 | class TestUnicode_6_0_1 < Test::Unit::TestCase
11 |
12 | def test_gc
13 | assert_equal :Po, UnicodeUtils.gc(0xa7)
14 | assert_equal :Po, UnicodeUtils.gc(0xb6)
15 | assert_equal :Po, UnicodeUtils.gc(0xf14)
16 | assert_equal :Po, UnicodeUtils.gc(0x1360)
17 | assert_equal :Po, UnicodeUtils.gc(0x10102)
18 | 0x3248.upto(0x324F) { |cp|
19 | assert_equal :No, UnicodeUtils.gc(cp)
20 | }
21 | end
22 |
23 | def test_char_name
24 | assert_equal "CJK UNIFIED IDEOGRAPH-9FCC", UnicodeUtils.char_name(0x9fcc)
25 | assert_equal "ARABIC LETTER BEH WITH SMALL V BELOW", UnicodeUtils.char_name(0x8a0)
26 | assert_equal "SLEEPING FACE", UnicodeUtils.char_name(0x1f634)
27 | end
28 |
29 | def test_canonical_decomposition
30 | assert_equal "\u{11131}\u{11127}", UnicodeUtils.canonical_decomposition("\u{1112e}")
31 | assert_equal "\u{11132}\u{11127}", UnicodeUtils.canonical_decomposition("\u{1112f}")
32 | end
33 |
34 | def test_nfd
35 | assert_equal "\u{11131}\u{11127}", UnicodeUtils.nfd("\u{1112e}")
36 | assert_equal "\u{11132}\u{11127}", UnicodeUtils.nfd("\u{1112f}")
37 | end
38 |
39 | def test_nfc
40 | assert_equal "\u{1112e}", UnicodeUtils.nfc("\u{11131}\u{11127}")
41 | assert_equal "\u{1112f}", UnicodeUtils.nfc("\u{11132}\u{11127}")
42 | end
43 |
44 | def test_casefold
45 | assert_equal "\u{2d2d}", UnicodeUtils.casefold("\u{10cd}")
46 | assert_equal "\u{a793}", UnicodeUtils.casefold("\u{a792}")
47 | end
48 |
49 | def test_combining_class
50 | assert_equal 7, UnicodeUtils.combining_class(0x116b7)
51 | end
52 |
53 | def test_lowercase_char?
54 | assert_equal true, UnicodeUtils.lowercase_char?(0x2071)
55 | end
56 |
57 | end
58 |
--------------------------------------------------------------------------------
/test/test_unicode_6_2_0.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | require "test/unit"
4 |
5 | require "unicode_utils"
6 |
7 | # Tests behaviour in Unicode 6.2.0 that wasn't in the previously
8 | # supported standard. That means each one of these tests fails
9 | # with UnicodeUtils 1.3.0.
10 | class TestUnicode_6_2_0 < Test::Unit::TestCase
11 |
12 | def test_east_asian_width
13 | assert_equal :Neutral, UnicodeUtils.east_asian_width(0x11a3)
14 | end
15 |
16 | def test_display_width
17 | assert_equal 1, UnicodeUtils.display_width("\u{11a3}")
18 | end
19 |
20 | def test_char_display_width
21 | assert_equal 1, UnicodeUtils.char_display_width(0x11a3)
22 | end
23 |
24 | def test_each_grapheme
25 | # don't break between regional indicator symbols
26 | assert_equal ["\u{1F1E6}\u{1F1E7}"],
27 | UnicodeUtils.each_grapheme("\u{1F1E6}\u{1F1E7}").to_a
28 | end
29 |
30 | def test_sid
31 | # name alias of type correction introduced
32 | assert_equal "SYRIAC SUBLINEAR COLON SKEWED LEFT", UnicodeUtils.sid(0x709)
33 | end
34 |
35 | def test_char_name
36 | assert_equal "TURKISH LIRA SIGN", UnicodeUtils.char_name(0x20ba)
37 | end
38 |
39 | def test_general_category
40 | assert_equal :Currency_Symbol, UnicodeUtils.general_category(0x20ba)
41 | end
42 |
43 | def test_each_word
44 | # don't break between regional indicator symbols
45 | assert_equal ["foo", "\u{1F1E6}\u{1F1E7}", "bar"],
46 | UnicodeUtils.each_word("foo\u{1F1E6}\u{1F1E7}bar").to_a
47 | end
48 |
49 | end
50 |
--------------------------------------------------------------------------------
/unicode_utils.gemspec:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | require "#{File.dirname(__FILE__)}/lib/unicode_utils/version"
4 |
5 | test_files = ["test/test_unicode_utils.rb"]
6 | files =
7 | Dir["lib/**/*.rb"] + Dir["cdata/*"] + test_files +
8 | ["README.rdoc", "INSTALL.txt", "LICENSE.txt", "CHANGES.txt"]
9 | files.reject! { |fn| fn.end_with?("~") }
10 |
11 | Gem::Specification.new do |g|
12 | g.name = "unicode_utils"
13 | g.version = UnicodeUtils::VERSION
14 | g.platform = Gem::Platform::RUBY
15 | g.summary = "additional Unicode aware functions for Ruby 1.9"
16 | g.require_paths = ["lib"]
17 | g.files = files
18 | g.test_files = test_files
19 | g.required_ruby_version = ">= 1.9.1"
20 | g.author = "Stefan Lang"
21 | g.email = "langstefan@gmx.at"
22 | g.has_rdoc = true
23 | g.extra_rdoc_files = ["README.rdoc", "INSTALL.txt", "CHANGES.txt"]
24 | g.rdoc_options = ["--main=README.rdoc", "--charset=UTF-8"]
25 | g.homepage = "http://github.com/lang/unicode_utils"
26 | g.rubyforge_project = "unicode-utils"
27 | end
28 |
--------------------------------------------------------------------------------